{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 112900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0044286979627989375, "grad_norm": 11.820367813110352, "learning_rate": 4.428697962798938e-08, "loss": 1.7004, "step": 5 }, { "epoch": 0.008857395925597875, "grad_norm": 12.1470308303833, "learning_rate": 8.857395925597876e-08, "loss": 1.6149, "step": 10 }, { "epoch": 0.013286093888396812, "grad_norm": 11.656866073608398, "learning_rate": 1.3286093888396814e-07, "loss": 1.6436, "step": 15 }, { "epoch": 0.01771479185119575, "grad_norm": 11.312893867492676, "learning_rate": 1.7714791851195753e-07, "loss": 1.6183, "step": 20 }, { "epoch": 0.022143489813994686, "grad_norm": 11.812414169311523, "learning_rate": 2.214348981399469e-07, "loss": 1.6503, "step": 25 }, { "epoch": 0.026572187776793623, "grad_norm": 11.052279472351074, "learning_rate": 2.657218777679363e-07, "loss": 1.655, "step": 30 }, { "epoch": 0.03100088573959256, "grad_norm": 12.188301086425781, "learning_rate": 3.1000885739592564e-07, "loss": 1.6897, "step": 35 }, { "epoch": 0.0354295837023915, "grad_norm": 12.003920555114746, "learning_rate": 3.5429583702391505e-07, "loss": 1.5922, "step": 40 }, { "epoch": 0.03985828166519043, "grad_norm": 11.673710823059082, "learning_rate": 3.9858281665190436e-07, "loss": 1.6776, "step": 45 }, { "epoch": 0.04428697962798937, "grad_norm": 10.381232261657715, "learning_rate": 4.428697962798938e-07, "loss": 1.64, "step": 50 }, { "epoch": 0.048715677590788306, "grad_norm": 10.352510452270508, "learning_rate": 4.871567759078831e-07, "loss": 1.6747, "step": 55 }, { "epoch": 0.053144375553587246, "grad_norm": 10.216808319091797, "learning_rate": 5.314437555358726e-07, "loss": 1.6381, "step": 60 }, { "epoch": 0.05757307351638618, "grad_norm": 8.932103157043457, "learning_rate": 5.757307351638619e-07, "loss": 1.597, "step": 65 }, { "epoch": 0.06200177147918512, "grad_norm": 10.895230293273926, "learning_rate": 6.200177147918513e-07, "loss": 1.6513, "step": 70 }, { "epoch": 0.06643046944198405, "grad_norm": 10.943780899047852, "learning_rate": 6.643046944198406e-07, "loss": 1.611, "step": 75 }, { "epoch": 0.070859167404783, "grad_norm": 10.078645706176758, "learning_rate": 7.085916740478301e-07, "loss": 1.6542, "step": 80 }, { "epoch": 0.07528786536758193, "grad_norm": 8.50240421295166, "learning_rate": 7.528786536758194e-07, "loss": 1.6215, "step": 85 }, { "epoch": 0.07971656333038087, "grad_norm": 7.825551986694336, "learning_rate": 7.971656333038087e-07, "loss": 1.5225, "step": 90 }, { "epoch": 0.0841452612931798, "grad_norm": 7.054741382598877, "learning_rate": 8.41452612931798e-07, "loss": 1.5448, "step": 95 }, { "epoch": 0.08857395925597875, "grad_norm": 6.6052422523498535, "learning_rate": 8.857395925597875e-07, "loss": 1.504, "step": 100 }, { "epoch": 0.09300265721877768, "grad_norm": 6.6351752281188965, "learning_rate": 9.300265721877769e-07, "loss": 1.4697, "step": 105 }, { "epoch": 0.09743135518157661, "grad_norm": 7.448113918304443, "learning_rate": 9.743135518157663e-07, "loss": 1.5385, "step": 110 }, { "epoch": 0.10186005314437556, "grad_norm": 6.1844162940979, "learning_rate": 1.0186005314437557e-06, "loss": 1.4528, "step": 115 }, { "epoch": 0.10628875110717449, "grad_norm": 5.5842604637146, "learning_rate": 1.062887511071745e-06, "loss": 1.4363, "step": 120 }, { "epoch": 0.11071744906997343, "grad_norm": 4.3081440925598145, "learning_rate": 1.1071744906997343e-06, "loss": 1.3547, "step": 125 }, { "epoch": 0.11514614703277236, "grad_norm": 4.156342506408691, "learning_rate": 1.1514614703277237e-06, "loss": 1.3596, "step": 130 }, { "epoch": 0.1195748449955713, "grad_norm": 3.787323236465454, "learning_rate": 1.1957484499557131e-06, "loss": 1.4111, "step": 135 }, { "epoch": 0.12400354295837024, "grad_norm": 3.9504499435424805, "learning_rate": 1.2400354295837025e-06, "loss": 1.3179, "step": 140 }, { "epoch": 0.12843224092116917, "grad_norm": 3.5985236167907715, "learning_rate": 1.2843224092116918e-06, "loss": 1.2544, "step": 145 }, { "epoch": 0.1328609388839681, "grad_norm": 3.0680859088897705, "learning_rate": 1.3286093888396812e-06, "loss": 1.2642, "step": 150 }, { "epoch": 0.13728963684676704, "grad_norm": 2.8051581382751465, "learning_rate": 1.3728963684676706e-06, "loss": 1.2545, "step": 155 }, { "epoch": 0.141718334809566, "grad_norm": 2.2688193321228027, "learning_rate": 1.4171833480956602e-06, "loss": 1.2377, "step": 160 }, { "epoch": 0.14614703277236493, "grad_norm": 1.8552714586257935, "learning_rate": 1.4614703277236494e-06, "loss": 1.2488, "step": 165 }, { "epoch": 0.15057573073516387, "grad_norm": 1.4935394525527954, "learning_rate": 1.5057573073516388e-06, "loss": 1.185, "step": 170 }, { "epoch": 0.1550044286979628, "grad_norm": 0.948371946811676, "learning_rate": 1.550044286979628e-06, "loss": 1.1916, "step": 175 }, { "epoch": 0.15943312666076173, "grad_norm": 0.8102244734764099, "learning_rate": 1.5943312666076174e-06, "loss": 1.1421, "step": 180 }, { "epoch": 0.16386182462356066, "grad_norm": 0.5309062004089355, "learning_rate": 1.6386182462356069e-06, "loss": 1.1567, "step": 185 }, { "epoch": 0.1682905225863596, "grad_norm": 0.5291673541069031, "learning_rate": 1.682905225863596e-06, "loss": 1.1144, "step": 190 }, { "epoch": 0.17271922054915856, "grad_norm": 0.39175063371658325, "learning_rate": 1.7271922054915857e-06, "loss": 1.1344, "step": 195 }, { "epoch": 0.1771479185119575, "grad_norm": 0.42602333426475525, "learning_rate": 1.771479185119575e-06, "loss": 1.124, "step": 200 }, { "epoch": 0.18157661647475642, "grad_norm": 0.27804046869277954, "learning_rate": 1.8157661647475643e-06, "loss": 1.1309, "step": 205 }, { "epoch": 0.18600531443755536, "grad_norm": 0.3582090735435486, "learning_rate": 1.8600531443755537e-06, "loss": 1.0846, "step": 210 }, { "epoch": 0.1904340124003543, "grad_norm": 0.33198463916778564, "learning_rate": 1.9043401240035431e-06, "loss": 1.1095, "step": 215 }, { "epoch": 0.19486271036315322, "grad_norm": 0.312926322221756, "learning_rate": 1.9486271036315325e-06, "loss": 1.0828, "step": 220 }, { "epoch": 0.19929140832595216, "grad_norm": 0.3295394480228424, "learning_rate": 1.9929140832595215e-06, "loss": 1.0841, "step": 225 }, { "epoch": 0.20372010628875112, "grad_norm": 0.3140348792076111, "learning_rate": 2.0372010628875114e-06, "loss": 1.1161, "step": 230 }, { "epoch": 0.20814880425155005, "grad_norm": 0.264995813369751, "learning_rate": 2.0814880425155008e-06, "loss": 1.1086, "step": 235 }, { "epoch": 0.21257750221434898, "grad_norm": 0.27205732464790344, "learning_rate": 2.12577502214349e-06, "loss": 1.1358, "step": 240 }, { "epoch": 0.21700620017714792, "grad_norm": 0.31799212098121643, "learning_rate": 2.170062001771479e-06, "loss": 1.0842, "step": 245 }, { "epoch": 0.22143489813994685, "grad_norm": 0.27930590510368347, "learning_rate": 2.2143489813994686e-06, "loss": 1.1427, "step": 250 }, { "epoch": 0.22586359610274578, "grad_norm": 0.43252986669540405, "learning_rate": 2.258635961027458e-06, "loss": 1.1273, "step": 255 }, { "epoch": 0.23029229406554472, "grad_norm": 0.538478672504425, "learning_rate": 2.3029229406554474e-06, "loss": 1.085, "step": 260 }, { "epoch": 0.23472099202834368, "grad_norm": 0.25883543491363525, "learning_rate": 2.347209920283437e-06, "loss": 1.0959, "step": 265 }, { "epoch": 0.2391496899911426, "grad_norm": 0.3655385971069336, "learning_rate": 2.3914968999114263e-06, "loss": 1.1034, "step": 270 }, { "epoch": 0.24357838795394154, "grad_norm": 0.28594473004341125, "learning_rate": 2.4357838795394157e-06, "loss": 1.0776, "step": 275 }, { "epoch": 0.24800708591674048, "grad_norm": 0.34155675768852234, "learning_rate": 2.480070859167405e-06, "loss": 1.0311, "step": 280 }, { "epoch": 0.2524357838795394, "grad_norm": 0.4211924970149994, "learning_rate": 2.5243578387953945e-06, "loss": 1.0631, "step": 285 }, { "epoch": 0.25686448184233834, "grad_norm": 0.36909550428390503, "learning_rate": 2.5686448184233835e-06, "loss": 1.0825, "step": 290 }, { "epoch": 0.2612931798051373, "grad_norm": 0.37971094250679016, "learning_rate": 2.612931798051373e-06, "loss": 1.147, "step": 295 }, { "epoch": 0.2657218777679362, "grad_norm": 0.46819737553596497, "learning_rate": 2.6572187776793623e-06, "loss": 1.0498, "step": 300 }, { "epoch": 0.27015057573073514, "grad_norm": 0.6770675182342529, "learning_rate": 2.7015057573073517e-06, "loss": 1.1552, "step": 305 }, { "epoch": 0.2745792736935341, "grad_norm": 0.37761443853378296, "learning_rate": 2.745792736935341e-06, "loss": 1.1078, "step": 310 }, { "epoch": 0.27900797165633306, "grad_norm": 0.40357354283332825, "learning_rate": 2.790079716563331e-06, "loss": 1.1332, "step": 315 }, { "epoch": 0.283436669619132, "grad_norm": 0.28160160779953003, "learning_rate": 2.8343666961913204e-06, "loss": 1.1283, "step": 320 }, { "epoch": 0.28786536758193093, "grad_norm": 0.37395164370536804, "learning_rate": 2.8786536758193094e-06, "loss": 1.1381, "step": 325 }, { "epoch": 0.29229406554472986, "grad_norm": 0.3414997458457947, "learning_rate": 2.922940655447299e-06, "loss": 1.0947, "step": 330 }, { "epoch": 0.2967227635075288, "grad_norm": 0.5143349766731262, "learning_rate": 2.9672276350752882e-06, "loss": 1.1016, "step": 335 }, { "epoch": 0.30115146147032773, "grad_norm": 0.37565624713897705, "learning_rate": 3.0115146147032776e-06, "loss": 1.0792, "step": 340 }, { "epoch": 0.30558015943312666, "grad_norm": 0.308960884809494, "learning_rate": 3.055801594331267e-06, "loss": 1.0969, "step": 345 }, { "epoch": 0.3100088573959256, "grad_norm": 0.4782509207725525, "learning_rate": 3.100088573959256e-06, "loss": 1.1341, "step": 350 }, { "epoch": 0.31443755535872453, "grad_norm": 0.3736422061920166, "learning_rate": 3.1443755535872455e-06, "loss": 1.1104, "step": 355 }, { "epoch": 0.31886625332152346, "grad_norm": 0.4654044508934021, "learning_rate": 3.188662533215235e-06, "loss": 1.0979, "step": 360 }, { "epoch": 0.3232949512843224, "grad_norm": 0.35784783959388733, "learning_rate": 3.2329495128432243e-06, "loss": 1.0766, "step": 365 }, { "epoch": 0.32772364924712133, "grad_norm": 0.39122867584228516, "learning_rate": 3.2772364924712137e-06, "loss": 1.1236, "step": 370 }, { "epoch": 0.33215234720992026, "grad_norm": 0.3576284945011139, "learning_rate": 3.3215234720992027e-06, "loss": 1.0313, "step": 375 }, { "epoch": 0.3365810451727192, "grad_norm": 0.4109048545360565, "learning_rate": 3.365810451727192e-06, "loss": 1.0666, "step": 380 }, { "epoch": 0.3410097431355182, "grad_norm": 0.5468763113021851, "learning_rate": 3.410097431355182e-06, "loss": 1.0576, "step": 385 }, { "epoch": 0.3454384410983171, "grad_norm": 0.39048174023628235, "learning_rate": 3.4543844109831714e-06, "loss": 1.052, "step": 390 }, { "epoch": 0.34986713906111605, "grad_norm": 0.41467055678367615, "learning_rate": 3.4986713906111608e-06, "loss": 1.092, "step": 395 }, { "epoch": 0.354295837023915, "grad_norm": 0.41701918840408325, "learning_rate": 3.54295837023915e-06, "loss": 1.0536, "step": 400 }, { "epoch": 0.3587245349867139, "grad_norm": 0.5761395692825317, "learning_rate": 3.5872453498671396e-06, "loss": 1.1105, "step": 405 }, { "epoch": 0.36315323294951285, "grad_norm": 0.4602561295032501, "learning_rate": 3.6315323294951286e-06, "loss": 1.1196, "step": 410 }, { "epoch": 0.3675819309123118, "grad_norm": 0.4647201597690582, "learning_rate": 3.675819309123118e-06, "loss": 1.1325, "step": 415 }, { "epoch": 0.3720106288751107, "grad_norm": 0.3582036793231964, "learning_rate": 3.7201062887511074e-06, "loss": 1.101, "step": 420 }, { "epoch": 0.37643932683790965, "grad_norm": 0.44388604164123535, "learning_rate": 3.764393268379097e-06, "loss": 1.0995, "step": 425 }, { "epoch": 0.3808680248007086, "grad_norm": 0.42416882514953613, "learning_rate": 3.8086802480070863e-06, "loss": 1.0735, "step": 430 }, { "epoch": 0.3852967227635075, "grad_norm": 0.3444736897945404, "learning_rate": 3.852967227635076e-06, "loss": 1.0877, "step": 435 }, { "epoch": 0.38972542072630645, "grad_norm": 0.3834967017173767, "learning_rate": 3.897254207263065e-06, "loss": 1.0893, "step": 440 }, { "epoch": 0.3941541186891054, "grad_norm": 0.6447907090187073, "learning_rate": 3.9415411868910545e-06, "loss": 1.072, "step": 445 }, { "epoch": 0.3985828166519043, "grad_norm": 0.4464726448059082, "learning_rate": 3.985828166519043e-06, "loss": 1.0568, "step": 450 }, { "epoch": 0.4030115146147033, "grad_norm": 0.4295834004878998, "learning_rate": 4.030115146147033e-06, "loss": 1.0923, "step": 455 }, { "epoch": 0.40744021257750224, "grad_norm": 0.4237326979637146, "learning_rate": 4.074402125775023e-06, "loss": 1.0702, "step": 460 }, { "epoch": 0.41186891054030117, "grad_norm": 0.4239921271800995, "learning_rate": 4.118689105403012e-06, "loss": 1.0865, "step": 465 }, { "epoch": 0.4162976085031001, "grad_norm": 0.4564349353313446, "learning_rate": 4.1629760850310016e-06, "loss": 1.1196, "step": 470 }, { "epoch": 0.42072630646589904, "grad_norm": 0.4897279143333435, "learning_rate": 4.207263064658991e-06, "loss": 1.0389, "step": 475 }, { "epoch": 0.42515500442869797, "grad_norm": 0.4564661681652069, "learning_rate": 4.25155004428698e-06, "loss": 1.0837, "step": 480 }, { "epoch": 0.4295837023914969, "grad_norm": 0.43336793780326843, "learning_rate": 4.295837023914969e-06, "loss": 1.0765, "step": 485 }, { "epoch": 0.43401240035429584, "grad_norm": 0.5645413994789124, "learning_rate": 4.340124003542958e-06, "loss": 1.0094, "step": 490 }, { "epoch": 0.43844109831709477, "grad_norm": 0.93393874168396, "learning_rate": 4.384410983170948e-06, "loss": 1.1016, "step": 495 }, { "epoch": 0.4428697962798937, "grad_norm": 0.7553842067718506, "learning_rate": 4.428697962798937e-06, "loss": 1.0389, "step": 500 }, { "epoch": 0.44729849424269263, "grad_norm": 0.41328611969947815, "learning_rate": 4.472984942426927e-06, "loss": 1.0544, "step": 505 }, { "epoch": 0.45172719220549157, "grad_norm": 0.4961669445037842, "learning_rate": 4.517271922054916e-06, "loss": 1.0901, "step": 510 }, { "epoch": 0.4561558901682905, "grad_norm": 0.37691730260849, "learning_rate": 4.5615589016829055e-06, "loss": 1.0793, "step": 515 }, { "epoch": 0.46058458813108943, "grad_norm": 0.4213143587112427, "learning_rate": 4.605845881310895e-06, "loss": 1.1046, "step": 520 }, { "epoch": 0.4650132860938884, "grad_norm": 0.4811985492706299, "learning_rate": 4.650132860938884e-06, "loss": 1.1424, "step": 525 }, { "epoch": 0.46944198405668736, "grad_norm": 0.4970588982105255, "learning_rate": 4.694419840566874e-06, "loss": 1.045, "step": 530 }, { "epoch": 0.4738706820194863, "grad_norm": 0.38179582357406616, "learning_rate": 4.738706820194863e-06, "loss": 1.0647, "step": 535 }, { "epoch": 0.4782993799822852, "grad_norm": 0.6697129011154175, "learning_rate": 4.7829937998228525e-06, "loss": 1.1035, "step": 540 }, { "epoch": 0.48272807794508416, "grad_norm": 0.3649693429470062, "learning_rate": 4.827280779450842e-06, "loss": 1.0344, "step": 545 }, { "epoch": 0.4871567759078831, "grad_norm": 0.38832440972328186, "learning_rate": 4.871567759078831e-06, "loss": 1.11, "step": 550 }, { "epoch": 0.491585473870682, "grad_norm": 0.5177776217460632, "learning_rate": 4.915854738706821e-06, "loss": 1.0588, "step": 555 }, { "epoch": 0.49601417183348095, "grad_norm": 0.3817882239818573, "learning_rate": 4.96014171833481e-06, "loss": 1.0765, "step": 560 }, { "epoch": 0.5004428697962799, "grad_norm": 0.6858991980552673, "learning_rate": 5.0044286979628e-06, "loss": 1.0933, "step": 565 }, { "epoch": 0.5048715677590788, "grad_norm": 0.4919489622116089, "learning_rate": 5.048715677590789e-06, "loss": 1.1109, "step": 570 }, { "epoch": 0.5093002657218778, "grad_norm": 0.5251255035400391, "learning_rate": 5.0930026572187784e-06, "loss": 1.1175, "step": 575 }, { "epoch": 0.5137289636846767, "grad_norm": 0.5085564851760864, "learning_rate": 5.137289636846767e-06, "loss": 1.086, "step": 580 }, { "epoch": 0.5181576616474757, "grad_norm": 0.49915391206741333, "learning_rate": 5.181576616474757e-06, "loss": 1.0986, "step": 585 }, { "epoch": 0.5225863596102746, "grad_norm": 0.6538212895393372, "learning_rate": 5.225863596102746e-06, "loss": 1.0522, "step": 590 }, { "epoch": 0.5270150575730735, "grad_norm": 0.58580482006073, "learning_rate": 5.270150575730736e-06, "loss": 1.0854, "step": 595 }, { "epoch": 0.5314437555358724, "grad_norm": 0.5502675771713257, "learning_rate": 5.314437555358725e-06, "loss": 1.0861, "step": 600 }, { "epoch": 0.5358724534986714, "grad_norm": 0.4223230183124542, "learning_rate": 5.358724534986715e-06, "loss": 1.1022, "step": 605 }, { "epoch": 0.5403011514614703, "grad_norm": 0.6294427514076233, "learning_rate": 5.4030115146147035e-06, "loss": 1.068, "step": 610 }, { "epoch": 0.5447298494242693, "grad_norm": 0.5249088406562805, "learning_rate": 5.447298494242693e-06, "loss": 1.087, "step": 615 }, { "epoch": 0.5491585473870682, "grad_norm": 0.7817137241363525, "learning_rate": 5.491585473870682e-06, "loss": 1.0777, "step": 620 }, { "epoch": 0.5535872453498671, "grad_norm": 0.5898805260658264, "learning_rate": 5.535872453498672e-06, "loss": 1.0282, "step": 625 }, { "epoch": 0.5580159433126661, "grad_norm": 0.7486714124679565, "learning_rate": 5.580159433126662e-06, "loss": 1.0189, "step": 630 }, { "epoch": 0.562444641275465, "grad_norm": 0.49449342489242554, "learning_rate": 5.6244464127546506e-06, "loss": 1.0781, "step": 635 }, { "epoch": 0.566873339238264, "grad_norm": 0.483915776014328, "learning_rate": 5.668733392382641e-06, "loss": 1.0406, "step": 640 }, { "epoch": 0.5713020372010629, "grad_norm": 0.8464071750640869, "learning_rate": 5.713020372010629e-06, "loss": 1.0296, "step": 645 }, { "epoch": 0.5757307351638619, "grad_norm": 0.657326877117157, "learning_rate": 5.757307351638619e-06, "loss": 1.0481, "step": 650 }, { "epoch": 0.5801594331266607, "grad_norm": 0.5357707738876343, "learning_rate": 5.801594331266608e-06, "loss": 1.0536, "step": 655 }, { "epoch": 0.5845881310894597, "grad_norm": 0.4976673722267151, "learning_rate": 5.845881310894598e-06, "loss": 1.0818, "step": 660 }, { "epoch": 0.5890168290522586, "grad_norm": 0.6040676236152649, "learning_rate": 5.890168290522586e-06, "loss": 1.0511, "step": 665 }, { "epoch": 0.5934455270150576, "grad_norm": 0.8693132996559143, "learning_rate": 5.9344552701505765e-06, "loss": 1.0654, "step": 670 }, { "epoch": 0.5978742249778565, "grad_norm": 0.7987152338027954, "learning_rate": 5.978742249778565e-06, "loss": 1.0634, "step": 675 }, { "epoch": 0.6023029229406555, "grad_norm": 0.8263706564903259, "learning_rate": 6.023029229406555e-06, "loss": 1.0539, "step": 680 }, { "epoch": 0.6067316209034543, "grad_norm": 0.6553357839584351, "learning_rate": 6.067316209034544e-06, "loss": 1.0636, "step": 685 }, { "epoch": 0.6111603188662533, "grad_norm": 0.680133044719696, "learning_rate": 6.111603188662534e-06, "loss": 1.0842, "step": 690 }, { "epoch": 0.6155890168290522, "grad_norm": 0.6346431970596313, "learning_rate": 6.155890168290523e-06, "loss": 1.0823, "step": 695 }, { "epoch": 0.6200177147918512, "grad_norm": 0.6364468932151794, "learning_rate": 6.200177147918512e-06, "loss": 1.0705, "step": 700 }, { "epoch": 0.6244464127546502, "grad_norm": 0.9277119040489197, "learning_rate": 6.244464127546502e-06, "loss": 1.0438, "step": 705 }, { "epoch": 0.6288751107174491, "grad_norm": 0.9401694536209106, "learning_rate": 6.288751107174491e-06, "loss": 1.0416, "step": 710 }, { "epoch": 0.633303808680248, "grad_norm": 0.534271776676178, "learning_rate": 6.333038086802481e-06, "loss": 1.0831, "step": 715 }, { "epoch": 0.6377325066430469, "grad_norm": 0.7278724908828735, "learning_rate": 6.37732506643047e-06, "loss": 1.0662, "step": 720 }, { "epoch": 0.6421612046058459, "grad_norm": 0.6743765473365784, "learning_rate": 6.42161204605846e-06, "loss": 1.1087, "step": 725 }, { "epoch": 0.6465899025686448, "grad_norm": 0.5241950750350952, "learning_rate": 6.465899025686449e-06, "loss": 1.0673, "step": 730 }, { "epoch": 0.6510186005314438, "grad_norm": 0.6579424738883972, "learning_rate": 6.510186005314438e-06, "loss": 1.0607, "step": 735 }, { "epoch": 0.6554472984942427, "grad_norm": 0.7520736455917358, "learning_rate": 6.554472984942427e-06, "loss": 1.0426, "step": 740 }, { "epoch": 0.6598759964570416, "grad_norm": 0.5593547224998474, "learning_rate": 6.598759964570417e-06, "loss": 1.0462, "step": 745 }, { "epoch": 0.6643046944198405, "grad_norm": 0.7848285436630249, "learning_rate": 6.643046944198405e-06, "loss": 1.0614, "step": 750 }, { "epoch": 0.6687333923826395, "grad_norm": 0.5062673091888428, "learning_rate": 6.687333923826396e-06, "loss": 1.0777, "step": 755 }, { "epoch": 0.6731620903454384, "grad_norm": 0.6015821695327759, "learning_rate": 6.731620903454384e-06, "loss": 1.0379, "step": 760 }, { "epoch": 0.6775907883082374, "grad_norm": 0.570980966091156, "learning_rate": 6.7759078830823745e-06, "loss": 1.0463, "step": 765 }, { "epoch": 0.6820194862710364, "grad_norm": 0.6550763845443726, "learning_rate": 6.820194862710364e-06, "loss": 0.9967, "step": 770 }, { "epoch": 0.6864481842338352, "grad_norm": 0.5866822600364685, "learning_rate": 6.864481842338353e-06, "loss": 1.0862, "step": 775 }, { "epoch": 0.6908768821966342, "grad_norm": 0.6404900550842285, "learning_rate": 6.908768821966343e-06, "loss": 1.0833, "step": 780 }, { "epoch": 0.6953055801594331, "grad_norm": 0.4883545935153961, "learning_rate": 6.953055801594331e-06, "loss": 1.0391, "step": 785 }, { "epoch": 0.6997342781222321, "grad_norm": 0.45520517230033875, "learning_rate": 6.9973427812223216e-06, "loss": 1.0488, "step": 790 }, { "epoch": 0.704162976085031, "grad_norm": 0.5145585536956787, "learning_rate": 7.04162976085031e-06, "loss": 1.0492, "step": 795 }, { "epoch": 0.70859167404783, "grad_norm": 0.4754512906074524, "learning_rate": 7.0859167404783e-06, "loss": 1.0581, "step": 800 }, { "epoch": 0.7130203720106288, "grad_norm": 0.43282949924468994, "learning_rate": 7.130203720106289e-06, "loss": 1.0167, "step": 805 }, { "epoch": 0.7174490699734278, "grad_norm": 0.42789632081985474, "learning_rate": 7.174490699734279e-06, "loss": 1.0912, "step": 810 }, { "epoch": 0.7218777679362267, "grad_norm": 0.7601043581962585, "learning_rate": 7.218777679362268e-06, "loss": 1.0695, "step": 815 }, { "epoch": 0.7263064658990257, "grad_norm": 0.5453900694847107, "learning_rate": 7.263064658990257e-06, "loss": 1.0743, "step": 820 }, { "epoch": 0.7307351638618246, "grad_norm": 0.4569368064403534, "learning_rate": 7.307351638618247e-06, "loss": 1.0835, "step": 825 }, { "epoch": 0.7351638618246236, "grad_norm": 0.5344635248184204, "learning_rate": 7.351638618246236e-06, "loss": 0.9881, "step": 830 }, { "epoch": 0.7395925597874224, "grad_norm": 0.5342046618461609, "learning_rate": 7.3959255978742254e-06, "loss": 1.0668, "step": 835 }, { "epoch": 0.7440212577502214, "grad_norm": 0.5426737070083618, "learning_rate": 7.440212577502215e-06, "loss": 0.985, "step": 840 }, { "epoch": 0.7484499557130204, "grad_norm": 0.39210185408592224, "learning_rate": 7.484499557130205e-06, "loss": 1.0626, "step": 845 }, { "epoch": 0.7528786536758193, "grad_norm": 0.6499355435371399, "learning_rate": 7.528786536758194e-06, "loss": 1.0395, "step": 850 }, { "epoch": 0.7573073516386183, "grad_norm": 0.6279856562614441, "learning_rate": 7.573073516386183e-06, "loss": 1.0534, "step": 855 }, { "epoch": 0.7617360496014172, "grad_norm": 0.6102858185768127, "learning_rate": 7.6173604960141725e-06, "loss": 1.0939, "step": 860 }, { "epoch": 0.7661647475642162, "grad_norm": 0.5285583138465881, "learning_rate": 7.661647475642163e-06, "loss": 1.0494, "step": 865 }, { "epoch": 0.770593445527015, "grad_norm": 0.6597489714622498, "learning_rate": 7.705934455270151e-06, "loss": 1.0923, "step": 870 }, { "epoch": 0.775022143489814, "grad_norm": 0.37603759765625, "learning_rate": 7.750221434898142e-06, "loss": 1.0834, "step": 875 }, { "epoch": 0.7794508414526129, "grad_norm": 0.4725598394870758, "learning_rate": 7.79450841452613e-06, "loss": 1.0845, "step": 880 }, { "epoch": 0.7838795394154119, "grad_norm": 0.4172370731830597, "learning_rate": 7.838795394154119e-06, "loss": 1.0816, "step": 885 }, { "epoch": 0.7883082373782108, "grad_norm": 0.49854663014411926, "learning_rate": 7.883082373782109e-06, "loss": 1.0549, "step": 890 }, { "epoch": 0.7927369353410098, "grad_norm": 0.36657387018203735, "learning_rate": 7.927369353410098e-06, "loss": 1.0752, "step": 895 }, { "epoch": 0.7971656333038086, "grad_norm": 0.5820833444595337, "learning_rate": 7.971656333038086e-06, "loss": 1.052, "step": 900 }, { "epoch": 0.8015943312666076, "grad_norm": 0.6577838659286499, "learning_rate": 8.015943312666076e-06, "loss": 1.0638, "step": 905 }, { "epoch": 0.8060230292294066, "grad_norm": 0.6086249947547913, "learning_rate": 8.060230292294067e-06, "loss": 1.0278, "step": 910 }, { "epoch": 0.8104517271922055, "grad_norm": 0.7938503623008728, "learning_rate": 8.104517271922055e-06, "loss": 1.0165, "step": 915 }, { "epoch": 0.8148804251550045, "grad_norm": 0.7336840629577637, "learning_rate": 8.148804251550045e-06, "loss": 1.049, "step": 920 }, { "epoch": 0.8193091231178034, "grad_norm": 0.42411020398139954, "learning_rate": 8.193091231178034e-06, "loss": 1.0462, "step": 925 }, { "epoch": 0.8237378210806023, "grad_norm": 0.4607183337211609, "learning_rate": 8.237378210806024e-06, "loss": 1.0412, "step": 930 }, { "epoch": 0.8281665190434012, "grad_norm": 0.4874175488948822, "learning_rate": 8.281665190434013e-06, "loss": 1.0541, "step": 935 }, { "epoch": 0.8325952170062002, "grad_norm": 0.4741254448890686, "learning_rate": 8.325952170062003e-06, "loss": 1.0319, "step": 940 }, { "epoch": 0.8370239149689991, "grad_norm": 0.5217910408973694, "learning_rate": 8.370239149689992e-06, "loss": 1.0361, "step": 945 }, { "epoch": 0.8414526129317981, "grad_norm": 0.5650365948677063, "learning_rate": 8.414526129317982e-06, "loss": 1.0448, "step": 950 }, { "epoch": 0.845881310894597, "grad_norm": 0.4486577808856964, "learning_rate": 8.45881310894597e-06, "loss": 1.0569, "step": 955 }, { "epoch": 0.8503100088573959, "grad_norm": 0.5307552218437195, "learning_rate": 8.50310008857396e-06, "loss": 1.0653, "step": 960 }, { "epoch": 0.8547387068201948, "grad_norm": 0.8470759987831116, "learning_rate": 8.54738706820195e-06, "loss": 1.0722, "step": 965 }, { "epoch": 0.8591674047829938, "grad_norm": 0.6499133706092834, "learning_rate": 8.591674047829938e-06, "loss": 1.0423, "step": 970 }, { "epoch": 0.8635961027457927, "grad_norm": 0.46664857864379883, "learning_rate": 8.635961027457928e-06, "loss": 1.0776, "step": 975 }, { "epoch": 0.8680248007085917, "grad_norm": 0.4962954521179199, "learning_rate": 8.680248007085917e-06, "loss": 1.0191, "step": 980 }, { "epoch": 0.8724534986713907, "grad_norm": 0.6851896643638611, "learning_rate": 8.724534986713907e-06, "loss": 1.05, "step": 985 }, { "epoch": 0.8768821966341895, "grad_norm": 0.6915547251701355, "learning_rate": 8.768821966341896e-06, "loss": 1.0195, "step": 990 }, { "epoch": 0.8813108945969885, "grad_norm": 0.5120362639427185, "learning_rate": 8.813108945969886e-06, "loss": 1.0953, "step": 995 }, { "epoch": 0.8857395925597874, "grad_norm": 0.4280599057674408, "learning_rate": 8.857395925597874e-06, "loss": 1.067, "step": 1000 }, { "epoch": 0.8901682905225864, "grad_norm": 0.6475976705551147, "learning_rate": 8.901682905225865e-06, "loss": 1.1131, "step": 1005 }, { "epoch": 0.8945969884853853, "grad_norm": 0.5279582142829895, "learning_rate": 8.945969884853853e-06, "loss": 1.0436, "step": 1010 }, { "epoch": 0.8990256864481843, "grad_norm": 0.48513951897621155, "learning_rate": 8.990256864481844e-06, "loss": 1.074, "step": 1015 }, { "epoch": 0.9034543844109831, "grad_norm": 0.5839154124259949, "learning_rate": 9.034543844109832e-06, "loss": 1.035, "step": 1020 }, { "epoch": 0.9078830823737821, "grad_norm": 0.7936385273933411, "learning_rate": 9.078830823737822e-06, "loss": 1.0731, "step": 1025 }, { "epoch": 0.912311780336581, "grad_norm": 0.8670493960380554, "learning_rate": 9.123117803365811e-06, "loss": 1.065, "step": 1030 }, { "epoch": 0.91674047829938, "grad_norm": 0.6253863573074341, "learning_rate": 9.167404782993801e-06, "loss": 1.0698, "step": 1035 }, { "epoch": 0.9211691762621789, "grad_norm": 0.6447590589523315, "learning_rate": 9.21169176262179e-06, "loss": 1.0249, "step": 1040 }, { "epoch": 0.9255978742249779, "grad_norm": 0.6368715763092041, "learning_rate": 9.25597874224978e-06, "loss": 1.0293, "step": 1045 }, { "epoch": 0.9300265721877768, "grad_norm": 0.7156099081039429, "learning_rate": 9.300265721877769e-06, "loss": 1.0461, "step": 1050 }, { "epoch": 0.9344552701505757, "grad_norm": 0.7243468165397644, "learning_rate": 9.344552701505759e-06, "loss": 1.0236, "step": 1055 }, { "epoch": 0.9388839681133747, "grad_norm": 0.5831469893455505, "learning_rate": 9.388839681133747e-06, "loss": 1.0654, "step": 1060 }, { "epoch": 0.9433126660761736, "grad_norm": 0.6030169129371643, "learning_rate": 9.433126660761736e-06, "loss": 1.0206, "step": 1065 }, { "epoch": 0.9477413640389726, "grad_norm": 0.5703352093696594, "learning_rate": 9.477413640389726e-06, "loss": 1.0819, "step": 1070 }, { "epoch": 0.9521700620017715, "grad_norm": 0.6313313245773315, "learning_rate": 9.521700620017715e-06, "loss": 1.0658, "step": 1075 }, { "epoch": 0.9565987599645704, "grad_norm": 0.5377680659294128, "learning_rate": 9.565987599645705e-06, "loss": 1.0728, "step": 1080 }, { "epoch": 0.9610274579273693, "grad_norm": 0.5323534607887268, "learning_rate": 9.610274579273694e-06, "loss": 1.0578, "step": 1085 }, { "epoch": 0.9654561558901683, "grad_norm": 0.52129065990448, "learning_rate": 9.654561558901684e-06, "loss": 1.0116, "step": 1090 }, { "epoch": 0.9698848538529672, "grad_norm": 0.5387641191482544, "learning_rate": 9.698848538529672e-06, "loss": 1.0145, "step": 1095 }, { "epoch": 0.9743135518157662, "grad_norm": 0.8255786299705505, "learning_rate": 9.743135518157663e-06, "loss": 1.0599, "step": 1100 }, { "epoch": 0.978742249778565, "grad_norm": 0.8005098700523376, "learning_rate": 9.787422497785651e-06, "loss": 1.0571, "step": 1105 }, { "epoch": 0.983170947741364, "grad_norm": 0.5431801080703735, "learning_rate": 9.831709477413642e-06, "loss": 1.024, "step": 1110 }, { "epoch": 0.9875996457041629, "grad_norm": 0.6002717018127441, "learning_rate": 9.87599645704163e-06, "loss": 1.0321, "step": 1115 }, { "epoch": 0.9920283436669619, "grad_norm": 0.435141921043396, "learning_rate": 9.92028343666962e-06, "loss": 1.0913, "step": 1120 }, { "epoch": 0.9964570416297609, "grad_norm": 0.503257691860199, "learning_rate": 9.964570416297609e-06, "loss": 1.0394, "step": 1125 }, { "epoch": 1.0008857395925599, "grad_norm": 0.4300505816936493, "learning_rate": 1e-05, "loss": 1.0071, "step": 1130 }, { "epoch": 1.0053144375553587, "grad_norm": 0.500792920589447, "learning_rate": 1e-05, "loss": 1.0992, "step": 1135 }, { "epoch": 1.0097431355181576, "grad_norm": 0.6591557264328003, "learning_rate": 1e-05, "loss": 1.0903, "step": 1140 }, { "epoch": 1.0141718334809566, "grad_norm": 0.4528239369392395, "learning_rate": 1e-05, "loss": 1.0596, "step": 1145 }, { "epoch": 1.0186005314437556, "grad_norm": 0.5716541409492493, "learning_rate": 1e-05, "loss": 0.9764, "step": 1150 }, { "epoch": 1.0230292294065544, "grad_norm": 0.5290917754173279, "learning_rate": 1e-05, "loss": 1.0068, "step": 1155 }, { "epoch": 1.0274579273693534, "grad_norm": 0.6618760824203491, "learning_rate": 1e-05, "loss": 1.0003, "step": 1160 }, { "epoch": 1.0318866253321524, "grad_norm": 0.591672956943512, "learning_rate": 1e-05, "loss": 1.0734, "step": 1165 }, { "epoch": 1.0363153232949514, "grad_norm": 0.4395208954811096, "learning_rate": 1e-05, "loss": 1.0354, "step": 1170 }, { "epoch": 1.0407440212577501, "grad_norm": 0.5045232176780701, "learning_rate": 1e-05, "loss": 1.0035, "step": 1175 }, { "epoch": 1.045172719220549, "grad_norm": 0.48325854539871216, "learning_rate": 1e-05, "loss": 1.0253, "step": 1180 }, { "epoch": 1.049601417183348, "grad_norm": 0.5216041803359985, "learning_rate": 1e-05, "loss": 1.0049, "step": 1185 }, { "epoch": 1.054030115146147, "grad_norm": 0.7825465202331543, "learning_rate": 1e-05, "loss": 1.0839, "step": 1190 }, { "epoch": 1.058458813108946, "grad_norm": 0.46927815675735474, "learning_rate": 1e-05, "loss": 1.022, "step": 1195 }, { "epoch": 1.0628875110717448, "grad_norm": 0.5872330069541931, "learning_rate": 1e-05, "loss": 1.0285, "step": 1200 }, { "epoch": 1.0673162090345438, "grad_norm": 0.5527483820915222, "learning_rate": 1e-05, "loss": 1.0337, "step": 1205 }, { "epoch": 1.0717449069973428, "grad_norm": 0.8837293386459351, "learning_rate": 1e-05, "loss": 1.0523, "step": 1210 }, { "epoch": 1.0761736049601418, "grad_norm": 0.6395681500434875, "learning_rate": 1e-05, "loss": 1.0064, "step": 1215 }, { "epoch": 1.0806023029229406, "grad_norm": 0.5509293079376221, "learning_rate": 1e-05, "loss": 1.0468, "step": 1220 }, { "epoch": 1.0850310008857396, "grad_norm": 0.593695342540741, "learning_rate": 1e-05, "loss": 1.0534, "step": 1225 }, { "epoch": 1.0894596988485385, "grad_norm": 0.5877532958984375, "learning_rate": 1e-05, "loss": 1.0454, "step": 1230 }, { "epoch": 1.0938883968113375, "grad_norm": 0.7444388270378113, "learning_rate": 1e-05, "loss": 1.0029, "step": 1235 }, { "epoch": 1.0983170947741363, "grad_norm": 0.5822707414627075, "learning_rate": 1e-05, "loss": 1.0237, "step": 1240 }, { "epoch": 1.1027457927369353, "grad_norm": 0.5833282470703125, "learning_rate": 1e-05, "loss": 1.0657, "step": 1245 }, { "epoch": 1.1071744906997343, "grad_norm": 0.6106334328651428, "learning_rate": 1e-05, "loss": 1.0895, "step": 1250 }, { "epoch": 1.1116031886625333, "grad_norm": 0.5250518321990967, "learning_rate": 1e-05, "loss": 1.04, "step": 1255 }, { "epoch": 1.1160318866253323, "grad_norm": 0.5018734335899353, "learning_rate": 1e-05, "loss": 1.0247, "step": 1260 }, { "epoch": 1.120460584588131, "grad_norm": 0.5880675911903381, "learning_rate": 1e-05, "loss": 1.0165, "step": 1265 }, { "epoch": 1.12488928255093, "grad_norm": 0.39481469988822937, "learning_rate": 1e-05, "loss": 1.0912, "step": 1270 }, { "epoch": 1.129317980513729, "grad_norm": 0.5716440677642822, "learning_rate": 1e-05, "loss": 0.9983, "step": 1275 }, { "epoch": 1.133746678476528, "grad_norm": 0.6783057451248169, "learning_rate": 1e-05, "loss": 1.0606, "step": 1280 }, { "epoch": 1.1381753764393268, "grad_norm": 0.758237361907959, "learning_rate": 1e-05, "loss": 1.0298, "step": 1285 }, { "epoch": 1.1426040744021257, "grad_norm": 0.6016380190849304, "learning_rate": 1e-05, "loss": 1.0515, "step": 1290 }, { "epoch": 1.1470327723649247, "grad_norm": 0.5406017303466797, "learning_rate": 1e-05, "loss": 1.0224, "step": 1295 }, { "epoch": 1.1514614703277237, "grad_norm": 0.6456558704376221, "learning_rate": 1e-05, "loss": 1.0803, "step": 1300 }, { "epoch": 1.1558901682905225, "grad_norm": 0.5820453763008118, "learning_rate": 1e-05, "loss": 1.0439, "step": 1305 }, { "epoch": 1.1603188662533215, "grad_norm": 0.6696756482124329, "learning_rate": 1e-05, "loss": 1.0178, "step": 1310 }, { "epoch": 1.1647475642161205, "grad_norm": 0.648473858833313, "learning_rate": 1e-05, "loss": 1.0683, "step": 1315 }, { "epoch": 1.1691762621789195, "grad_norm": 0.6276561617851257, "learning_rate": 1e-05, "loss": 1.1075, "step": 1320 }, { "epoch": 1.1736049601417182, "grad_norm": 0.6598888635635376, "learning_rate": 1e-05, "loss": 1.0439, "step": 1325 }, { "epoch": 1.1780336581045172, "grad_norm": 0.9274353384971619, "learning_rate": 1e-05, "loss": 1.0031, "step": 1330 }, { "epoch": 1.1824623560673162, "grad_norm": 0.5601986050605774, "learning_rate": 1e-05, "loss": 1.0467, "step": 1335 }, { "epoch": 1.1868910540301152, "grad_norm": 0.42352792620658875, "learning_rate": 1e-05, "loss": 1.0422, "step": 1340 }, { "epoch": 1.1913197519929142, "grad_norm": 0.6717144250869751, "learning_rate": 1e-05, "loss": 1.0522, "step": 1345 }, { "epoch": 1.195748449955713, "grad_norm": 0.6188071966171265, "learning_rate": 1e-05, "loss": 1.0732, "step": 1350 }, { "epoch": 1.200177147918512, "grad_norm": 0.5769972205162048, "learning_rate": 1e-05, "loss": 1.0529, "step": 1355 }, { "epoch": 1.204605845881311, "grad_norm": 0.5368040204048157, "learning_rate": 1e-05, "loss": 1.0637, "step": 1360 }, { "epoch": 1.20903454384411, "grad_norm": 0.4155511260032654, "learning_rate": 1e-05, "loss": 1.0876, "step": 1365 }, { "epoch": 1.2134632418069087, "grad_norm": 0.5935255885124207, "learning_rate": 1e-05, "loss": 1.0483, "step": 1370 }, { "epoch": 1.2178919397697077, "grad_norm": 0.4535612165927887, "learning_rate": 1e-05, "loss": 1.0846, "step": 1375 }, { "epoch": 1.2223206377325067, "grad_norm": 0.6879892945289612, "learning_rate": 1e-05, "loss": 1.0726, "step": 1380 }, { "epoch": 1.2267493356953056, "grad_norm": 0.380599707365036, "learning_rate": 1e-05, "loss": 1.0291, "step": 1385 }, { "epoch": 1.2311780336581046, "grad_norm": 0.4734128713607788, "learning_rate": 1e-05, "loss": 1.0602, "step": 1390 }, { "epoch": 1.2356067316209034, "grad_norm": 0.6638430953025818, "learning_rate": 1e-05, "loss": 1.0537, "step": 1395 }, { "epoch": 1.2400354295837024, "grad_norm": 0.5650917291641235, "learning_rate": 1e-05, "loss": 1.0764, "step": 1400 }, { "epoch": 1.2444641275465014, "grad_norm": 0.4994293749332428, "learning_rate": 1e-05, "loss": 1.0738, "step": 1405 }, { "epoch": 1.2488928255093001, "grad_norm": 0.7376642823219299, "learning_rate": 1e-05, "loss": 1.0753, "step": 1410 }, { "epoch": 1.2533215234720991, "grad_norm": 0.45495113730430603, "learning_rate": 1e-05, "loss": 0.9962, "step": 1415 }, { "epoch": 1.2577502214348981, "grad_norm": 0.5402055978775024, "learning_rate": 1e-05, "loss": 1.0506, "step": 1420 }, { "epoch": 1.262178919397697, "grad_norm": 0.9967095255851746, "learning_rate": 1e-05, "loss": 1.054, "step": 1425 }, { "epoch": 1.266607617360496, "grad_norm": 0.732774555683136, "learning_rate": 1e-05, "loss": 1.0701, "step": 1430 }, { "epoch": 1.271036315323295, "grad_norm": 0.7219616174697876, "learning_rate": 1e-05, "loss": 1.0511, "step": 1435 }, { "epoch": 1.2754650132860939, "grad_norm": 0.6792771220207214, "learning_rate": 1e-05, "loss": 1.0757, "step": 1440 }, { "epoch": 1.2798937112488928, "grad_norm": 0.5615405440330505, "learning_rate": 1e-05, "loss": 0.992, "step": 1445 }, { "epoch": 1.2843224092116918, "grad_norm": 0.4365237355232239, "learning_rate": 1e-05, "loss": 1.035, "step": 1450 }, { "epoch": 1.2887511071744906, "grad_norm": 0.418634831905365, "learning_rate": 1e-05, "loss": 1.033, "step": 1455 }, { "epoch": 1.2931798051372896, "grad_norm": 0.4341377913951874, "learning_rate": 1e-05, "loss": 1.0178, "step": 1460 }, { "epoch": 1.2976085031000886, "grad_norm": 0.4297645092010498, "learning_rate": 1e-05, "loss": 1.0219, "step": 1465 }, { "epoch": 1.3020372010628876, "grad_norm": 0.44168952107429504, "learning_rate": 1e-05, "loss": 1.0329, "step": 1470 }, { "epoch": 1.3064658990256866, "grad_norm": 0.565904438495636, "learning_rate": 1e-05, "loss": 1.0327, "step": 1475 }, { "epoch": 1.3108945969884853, "grad_norm": 0.566770076751709, "learning_rate": 1e-05, "loss": 1.0554, "step": 1480 }, { "epoch": 1.3153232949512843, "grad_norm": 0.4992661774158478, "learning_rate": 1e-05, "loss": 1.0202, "step": 1485 }, { "epoch": 1.3197519929140833, "grad_norm": 0.550749659538269, "learning_rate": 1e-05, "loss": 1.0008, "step": 1490 }, { "epoch": 1.324180690876882, "grad_norm": 0.5356817245483398, "learning_rate": 1e-05, "loss": 1.0366, "step": 1495 }, { "epoch": 1.328609388839681, "grad_norm": 0.565149188041687, "learning_rate": 1e-05, "loss": 1.061, "step": 1500 }, { "epoch": 1.33303808680248, "grad_norm": 0.46444451808929443, "learning_rate": 1e-05, "loss": 1.0806, "step": 1505 }, { "epoch": 1.337466784765279, "grad_norm": 0.4967142343521118, "learning_rate": 1e-05, "loss": 1.0231, "step": 1510 }, { "epoch": 1.341895482728078, "grad_norm": 0.5111200213432312, "learning_rate": 1e-05, "loss": 1.0646, "step": 1515 }, { "epoch": 1.346324180690877, "grad_norm": 0.41596952080726624, "learning_rate": 1e-05, "loss": 1.0477, "step": 1520 }, { "epoch": 1.3507528786536758, "grad_norm": 0.476852685213089, "learning_rate": 1e-05, "loss": 1.0781, "step": 1525 }, { "epoch": 1.3551815766164748, "grad_norm": 0.5790765285491943, "learning_rate": 1e-05, "loss": 1.0344, "step": 1530 }, { "epoch": 1.3596102745792737, "grad_norm": 0.5472508072853088, "learning_rate": 1e-05, "loss": 1.0824, "step": 1535 }, { "epoch": 1.3640389725420725, "grad_norm": 0.5851724743843079, "learning_rate": 1e-05, "loss": 1.0261, "step": 1540 }, { "epoch": 1.3684676705048715, "grad_norm": 0.49602991342544556, "learning_rate": 1e-05, "loss": 1.04, "step": 1545 }, { "epoch": 1.3728963684676705, "grad_norm": 0.48241379857063293, "learning_rate": 1e-05, "loss": 1.1002, "step": 1550 }, { "epoch": 1.3773250664304695, "grad_norm": 0.4322584867477417, "learning_rate": 1e-05, "loss": 1.0923, "step": 1555 }, { "epoch": 1.3817537643932685, "grad_norm": 0.3377690613269806, "learning_rate": 1e-05, "loss": 1.0696, "step": 1560 }, { "epoch": 1.3861824623560672, "grad_norm": 0.4481833875179291, "learning_rate": 1e-05, "loss": 1.0519, "step": 1565 }, { "epoch": 1.3906111603188662, "grad_norm": 0.6130571961402893, "learning_rate": 1e-05, "loss": 1.0094, "step": 1570 }, { "epoch": 1.3950398582816652, "grad_norm": 0.6391801238059998, "learning_rate": 1e-05, "loss": 1.0147, "step": 1575 }, { "epoch": 1.3994685562444642, "grad_norm": 0.8034498691558838, "learning_rate": 1e-05, "loss": 1.0651, "step": 1580 }, { "epoch": 1.403897254207263, "grad_norm": 0.5643225312232971, "learning_rate": 1e-05, "loss": 1.0126, "step": 1585 }, { "epoch": 1.408325952170062, "grad_norm": 0.6384730339050293, "learning_rate": 1e-05, "loss": 1.0725, "step": 1590 }, { "epoch": 1.412754650132861, "grad_norm": 0.8570442795753479, "learning_rate": 1e-05, "loss": 1.0485, "step": 1595 }, { "epoch": 1.41718334809566, "grad_norm": 0.4651219844818115, "learning_rate": 1e-05, "loss": 1.1197, "step": 1600 }, { "epoch": 1.421612046058459, "grad_norm": 0.6274141669273376, "learning_rate": 1e-05, "loss": 1.0223, "step": 1605 }, { "epoch": 1.4260407440212577, "grad_norm": 0.6916234493255615, "learning_rate": 1e-05, "loss": 1.0658, "step": 1610 }, { "epoch": 1.4304694419840567, "grad_norm": 0.650422215461731, "learning_rate": 1e-05, "loss": 1.0352, "step": 1615 }, { "epoch": 1.4348981399468557, "grad_norm": 0.6984378695487976, "learning_rate": 1e-05, "loss": 1.095, "step": 1620 }, { "epoch": 1.4393268379096544, "grad_norm": 0.5732066035270691, "learning_rate": 1e-05, "loss": 1.0147, "step": 1625 }, { "epoch": 1.4437555358724534, "grad_norm": 0.4563065469264984, "learning_rate": 1e-05, "loss": 1.0247, "step": 1630 }, { "epoch": 1.4481842338352524, "grad_norm": 0.4518648386001587, "learning_rate": 1e-05, "loss": 1.0484, "step": 1635 }, { "epoch": 1.4526129317980514, "grad_norm": 0.5408558249473572, "learning_rate": 1e-05, "loss": 1.0337, "step": 1640 }, { "epoch": 1.4570416297608504, "grad_norm": 0.4345446228981018, "learning_rate": 1e-05, "loss": 1.037, "step": 1645 }, { "epoch": 1.4614703277236494, "grad_norm": 0.6300520300865173, "learning_rate": 1e-05, "loss": 1.0008, "step": 1650 }, { "epoch": 1.4658990256864481, "grad_norm": 0.6697047352790833, "learning_rate": 1e-05, "loss": 1.0331, "step": 1655 }, { "epoch": 1.4703277236492471, "grad_norm": 0.5353518724441528, "learning_rate": 1e-05, "loss": 1.0606, "step": 1660 }, { "epoch": 1.4747564216120461, "grad_norm": 0.5491823554039001, "learning_rate": 1e-05, "loss": 1.032, "step": 1665 }, { "epoch": 1.4791851195748449, "grad_norm": 0.5808018445968628, "learning_rate": 1e-05, "loss": 1.0763, "step": 1670 }, { "epoch": 1.4836138175376439, "grad_norm": 0.4444665312767029, "learning_rate": 1e-05, "loss": 1.0046, "step": 1675 }, { "epoch": 1.4880425155004429, "grad_norm": 0.5143681168556213, "learning_rate": 1e-05, "loss": 1.0296, "step": 1680 }, { "epoch": 1.4924712134632419, "grad_norm": 0.45289140939712524, "learning_rate": 1e-05, "loss": 1.0721, "step": 1685 }, { "epoch": 1.4968999114260408, "grad_norm": 0.608600914478302, "learning_rate": 1e-05, "loss": 1.0351, "step": 1690 }, { "epoch": 1.5013286093888398, "grad_norm": 0.6080148816108704, "learning_rate": 1e-05, "loss": 1.0666, "step": 1695 }, { "epoch": 1.5057573073516386, "grad_norm": 0.39750945568084717, "learning_rate": 1e-05, "loss": 1.1251, "step": 1700 }, { "epoch": 1.5101860053144376, "grad_norm": 0.6348580121994019, "learning_rate": 1e-05, "loss": 1.015, "step": 1705 }, { "epoch": 1.5146147032772364, "grad_norm": 0.5678335428237915, "learning_rate": 1e-05, "loss": 1.0744, "step": 1710 }, { "epoch": 1.5190434012400353, "grad_norm": 0.5787584781646729, "learning_rate": 1e-05, "loss": 1.0653, "step": 1715 }, { "epoch": 1.5234720992028343, "grad_norm": 0.44927048683166504, "learning_rate": 1e-05, "loss": 0.9957, "step": 1720 }, { "epoch": 1.5279007971656333, "grad_norm": 0.38046860694885254, "learning_rate": 1e-05, "loss": 1.0236, "step": 1725 }, { "epoch": 1.5323294951284323, "grad_norm": 0.4957244396209717, "learning_rate": 1e-05, "loss": 1.0357, "step": 1730 }, { "epoch": 1.5367581930912313, "grad_norm": 0.6060448884963989, "learning_rate": 1e-05, "loss": 1.0363, "step": 1735 }, { "epoch": 1.54118689105403, "grad_norm": 0.6205506920814514, "learning_rate": 1e-05, "loss": 1.0141, "step": 1740 }, { "epoch": 1.545615589016829, "grad_norm": 0.4897048771381378, "learning_rate": 1e-05, "loss": 1.1262, "step": 1745 }, { "epoch": 1.550044286979628, "grad_norm": 0.5701556205749512, "learning_rate": 1e-05, "loss": 1.0459, "step": 1750 }, { "epoch": 1.5544729849424268, "grad_norm": 0.4535682797431946, "learning_rate": 1e-05, "loss": 1.0237, "step": 1755 }, { "epoch": 1.5589016829052258, "grad_norm": 0.5063267350196838, "learning_rate": 1e-05, "loss": 0.9991, "step": 1760 }, { "epoch": 1.5633303808680248, "grad_norm": 0.4346560537815094, "learning_rate": 1e-05, "loss": 0.9901, "step": 1765 }, { "epoch": 1.5677590788308238, "grad_norm": 0.7285047173500061, "learning_rate": 1e-05, "loss": 1.018, "step": 1770 }, { "epoch": 1.5721877767936228, "grad_norm": 0.7841598987579346, "learning_rate": 1e-05, "loss": 1.0508, "step": 1775 }, { "epoch": 1.5766164747564217, "grad_norm": 0.5126988887786865, "learning_rate": 1e-05, "loss": 0.9749, "step": 1780 }, { "epoch": 1.5810451727192205, "grad_norm": 0.5649213194847107, "learning_rate": 1e-05, "loss": 1.1065, "step": 1785 }, { "epoch": 1.5854738706820195, "grad_norm": 0.4846344292163849, "learning_rate": 1e-05, "loss": 1.0087, "step": 1790 }, { "epoch": 1.5899025686448183, "grad_norm": 0.4923091232776642, "learning_rate": 1e-05, "loss": 1.0173, "step": 1795 }, { "epoch": 1.5943312666076173, "grad_norm": 0.4576951563358307, "learning_rate": 1e-05, "loss": 1.0064, "step": 1800 }, { "epoch": 1.5987599645704162, "grad_norm": 0.6318348050117493, "learning_rate": 1e-05, "loss": 1.0603, "step": 1805 }, { "epoch": 1.6031886625332152, "grad_norm": 0.4736900329589844, "learning_rate": 1e-05, "loss": 0.9767, "step": 1810 }, { "epoch": 1.6076173604960142, "grad_norm": 0.7124386429786682, "learning_rate": 1e-05, "loss": 1.0254, "step": 1815 }, { "epoch": 1.6120460584588132, "grad_norm": 0.6950289607048035, "learning_rate": 1e-05, "loss": 1.0501, "step": 1820 }, { "epoch": 1.6164747564216122, "grad_norm": 0.6133359670639038, "learning_rate": 1e-05, "loss": 1.0468, "step": 1825 }, { "epoch": 1.620903454384411, "grad_norm": 0.6645231246948242, "learning_rate": 1e-05, "loss": 1.009, "step": 1830 }, { "epoch": 1.62533215234721, "grad_norm": 0.41025203466415405, "learning_rate": 1e-05, "loss": 1.0513, "step": 1835 }, { "epoch": 1.6297608503100087, "grad_norm": 0.3575768768787384, "learning_rate": 1e-05, "loss": 1.0384, "step": 1840 }, { "epoch": 1.6341895482728077, "grad_norm": 0.7201238870620728, "learning_rate": 1e-05, "loss": 1.0273, "step": 1845 }, { "epoch": 1.6386182462356067, "grad_norm": 0.4535777270793915, "learning_rate": 1e-05, "loss": 1.0226, "step": 1850 }, { "epoch": 1.6430469441984057, "grad_norm": 0.7136516571044922, "learning_rate": 1e-05, "loss": 1.0704, "step": 1855 }, { "epoch": 1.6474756421612047, "grad_norm": 0.4246818423271179, "learning_rate": 1e-05, "loss": 1.0558, "step": 1860 }, { "epoch": 1.6519043401240037, "grad_norm": 0.4250449240207672, "learning_rate": 1e-05, "loss": 1.002, "step": 1865 }, { "epoch": 1.6563330380868024, "grad_norm": 0.5725380778312683, "learning_rate": 1e-05, "loss": 1.086, "step": 1870 }, { "epoch": 1.6607617360496014, "grad_norm": 0.5512462258338928, "learning_rate": 1e-05, "loss": 1.0546, "step": 1875 }, { "epoch": 1.6651904340124002, "grad_norm": 0.4659876525402069, "learning_rate": 1e-05, "loss": 1.004, "step": 1880 }, { "epoch": 1.6696191319751992, "grad_norm": 0.47894182801246643, "learning_rate": 1e-05, "loss": 1.0323, "step": 1885 }, { "epoch": 1.6740478299379982, "grad_norm": 0.6409828066825867, "learning_rate": 1e-05, "loss": 1.064, "step": 1890 }, { "epoch": 1.6784765279007972, "grad_norm": 0.5657796859741211, "learning_rate": 1e-05, "loss": 1.0191, "step": 1895 }, { "epoch": 1.6829052258635961, "grad_norm": 0.5008763074874878, "learning_rate": 1e-05, "loss": 1.0149, "step": 1900 }, { "epoch": 1.6873339238263951, "grad_norm": 0.47699323296546936, "learning_rate": 1e-05, "loss": 1.0211, "step": 1905 }, { "epoch": 1.6917626217891941, "grad_norm": 0.5299811959266663, "learning_rate": 1e-05, "loss": 1.0474, "step": 1910 }, { "epoch": 1.6961913197519929, "grad_norm": 0.47808408737182617, "learning_rate": 1e-05, "loss": 1.0157, "step": 1915 }, { "epoch": 1.7006200177147919, "grad_norm": 0.5127821564674377, "learning_rate": 1e-05, "loss": 1.0615, "step": 1920 }, { "epoch": 1.7050487156775906, "grad_norm": 0.42068690061569214, "learning_rate": 1e-05, "loss": 1.066, "step": 1925 }, { "epoch": 1.7094774136403896, "grad_norm": 0.5648940801620483, "learning_rate": 1e-05, "loss": 1.0041, "step": 1930 }, { "epoch": 1.7139061116031886, "grad_norm": 0.4861409664154053, "learning_rate": 1e-05, "loss": 1.016, "step": 1935 }, { "epoch": 1.7183348095659876, "grad_norm": 0.5075104832649231, "learning_rate": 1e-05, "loss": 1.1242, "step": 1940 }, { "epoch": 1.7227635075287866, "grad_norm": 0.5100075602531433, "learning_rate": 1e-05, "loss": 1.0264, "step": 1945 }, { "epoch": 1.7271922054915856, "grad_norm": 0.4531674385070801, "learning_rate": 1e-05, "loss": 1.0312, "step": 1950 }, { "epoch": 1.7316209034543846, "grad_norm": 0.447305828332901, "learning_rate": 1e-05, "loss": 1.0537, "step": 1955 }, { "epoch": 1.7360496014171833, "grad_norm": 0.45211970806121826, "learning_rate": 1e-05, "loss": 1.013, "step": 1960 }, { "epoch": 1.7404782993799823, "grad_norm": 0.4799877405166626, "learning_rate": 1e-05, "loss": 1.0458, "step": 1965 }, { "epoch": 1.744906997342781, "grad_norm": 0.4379311501979828, "learning_rate": 1e-05, "loss": 1.0001, "step": 1970 }, { "epoch": 1.74933569530558, "grad_norm": 0.45466145873069763, "learning_rate": 1e-05, "loss": 1.0432, "step": 1975 }, { "epoch": 1.753764393268379, "grad_norm": 0.368390828371048, "learning_rate": 1e-05, "loss": 1.0219, "step": 1980 }, { "epoch": 1.758193091231178, "grad_norm": 0.6489937901496887, "learning_rate": 1e-05, "loss": 1.0748, "step": 1985 }, { "epoch": 1.762621789193977, "grad_norm": 0.41698235273361206, "learning_rate": 1e-05, "loss": 0.9928, "step": 1990 }, { "epoch": 1.767050487156776, "grad_norm": 0.4092991054058075, "learning_rate": 1e-05, "loss": 1.05, "step": 1995 }, { "epoch": 1.7714791851195748, "grad_norm": 0.360099732875824, "learning_rate": 1e-05, "loss": 1.0842, "step": 2000 }, { "epoch": 1.7759078830823738, "grad_norm": 0.34736168384552, "learning_rate": 1e-05, "loss": 1.0539, "step": 2005 }, { "epoch": 1.7803365810451726, "grad_norm": 0.3399425745010376, "learning_rate": 1e-05, "loss": 1.034, "step": 2010 }, { "epoch": 1.7847652790079716, "grad_norm": 0.5124260783195496, "learning_rate": 1e-05, "loss": 1.0546, "step": 2015 }, { "epoch": 1.7891939769707705, "grad_norm": 0.6677430868148804, "learning_rate": 1e-05, "loss": 1.0402, "step": 2020 }, { "epoch": 1.7936226749335695, "grad_norm": 0.5623106956481934, "learning_rate": 1e-05, "loss": 1.0666, "step": 2025 }, { "epoch": 1.7980513728963685, "grad_norm": 0.3347500264644623, "learning_rate": 1e-05, "loss": 1.0003, "step": 2030 }, { "epoch": 1.8024800708591675, "grad_norm": 0.35451000928878784, "learning_rate": 1e-05, "loss": 1.0079, "step": 2035 }, { "epoch": 1.8069087688219665, "grad_norm": 0.4026930332183838, "learning_rate": 1e-05, "loss": 0.9839, "step": 2040 }, { "epoch": 1.8113374667847653, "grad_norm": 0.6083166003227234, "learning_rate": 1e-05, "loss": 1.0409, "step": 2045 }, { "epoch": 1.8157661647475642, "grad_norm": 0.3873279094696045, "learning_rate": 1e-05, "loss": 1.0847, "step": 2050 }, { "epoch": 1.820194862710363, "grad_norm": 0.7231238484382629, "learning_rate": 1e-05, "loss": 1.0247, "step": 2055 }, { "epoch": 1.824623560673162, "grad_norm": 0.4188913106918335, "learning_rate": 1e-05, "loss": 1.051, "step": 2060 }, { "epoch": 1.829052258635961, "grad_norm": 0.4547122120857239, "learning_rate": 1e-05, "loss": 1.005, "step": 2065 }, { "epoch": 1.83348095659876, "grad_norm": 0.47021517157554626, "learning_rate": 1e-05, "loss": 1.0363, "step": 2070 }, { "epoch": 1.837909654561559, "grad_norm": 0.5474349856376648, "learning_rate": 1e-05, "loss": 0.9969, "step": 2075 }, { "epoch": 1.842338352524358, "grad_norm": 0.47033992409706116, "learning_rate": 1e-05, "loss": 1.0477, "step": 2080 }, { "epoch": 1.8467670504871567, "grad_norm": 0.6260009407997131, "learning_rate": 1e-05, "loss": 1.0587, "step": 2085 }, { "epoch": 1.8511957484499557, "grad_norm": 0.44886916875839233, "learning_rate": 1e-05, "loss": 1.0188, "step": 2090 }, { "epoch": 1.8556244464127547, "grad_norm": 0.31309664249420166, "learning_rate": 1e-05, "loss": 1.0449, "step": 2095 }, { "epoch": 1.8600531443755535, "grad_norm": 0.4695584774017334, "learning_rate": 1e-05, "loss": 1.0642, "step": 2100 }, { "epoch": 1.8644818423383525, "grad_norm": 0.5193604826927185, "learning_rate": 1e-05, "loss": 1.0631, "step": 2105 }, { "epoch": 1.8689105403011514, "grad_norm": 0.5674470663070679, "learning_rate": 1e-05, "loss": 1.0733, "step": 2110 }, { "epoch": 1.8733392382639504, "grad_norm": 0.5005362033843994, "learning_rate": 1e-05, "loss": 1.0192, "step": 2115 }, { "epoch": 1.8777679362267494, "grad_norm": 0.4786299467086792, "learning_rate": 1e-05, "loss": 1.0587, "step": 2120 }, { "epoch": 1.8821966341895484, "grad_norm": 0.31839287281036377, "learning_rate": 1e-05, "loss": 1.0096, "step": 2125 }, { "epoch": 1.8866253321523472, "grad_norm": 0.6383420825004578, "learning_rate": 1e-05, "loss": 1.0354, "step": 2130 }, { "epoch": 1.8910540301151462, "grad_norm": 0.4370393753051758, "learning_rate": 1e-05, "loss": 1.0526, "step": 2135 }, { "epoch": 1.895482728077945, "grad_norm": 0.4346504211425781, "learning_rate": 1e-05, "loss": 1.0233, "step": 2140 }, { "epoch": 1.899911426040744, "grad_norm": 0.4525904059410095, "learning_rate": 1e-05, "loss": 1.0617, "step": 2145 }, { "epoch": 1.904340124003543, "grad_norm": 0.4848250448703766, "learning_rate": 1e-05, "loss": 0.9862, "step": 2150 }, { "epoch": 1.908768821966342, "grad_norm": 0.4482121765613556, "learning_rate": 1e-05, "loss": 1.0218, "step": 2155 }, { "epoch": 1.9131975199291409, "grad_norm": 0.5617077946662903, "learning_rate": 1e-05, "loss": 1.0317, "step": 2160 }, { "epoch": 1.9176262178919399, "grad_norm": 0.4350849390029907, "learning_rate": 1e-05, "loss": 1.0902, "step": 2165 }, { "epoch": 1.9220549158547389, "grad_norm": 0.3970003128051758, "learning_rate": 1e-05, "loss": 1.0279, "step": 2170 }, { "epoch": 1.9264836138175376, "grad_norm": 0.4444156587123871, "learning_rate": 1e-05, "loss": 1.0388, "step": 2175 }, { "epoch": 1.9309123117803366, "grad_norm": 0.6810417175292969, "learning_rate": 1e-05, "loss": 1.0351, "step": 2180 }, { "epoch": 1.9353410097431354, "grad_norm": 0.5398831367492676, "learning_rate": 1e-05, "loss": 1.0344, "step": 2185 }, { "epoch": 1.9397697077059344, "grad_norm": 0.39851683378219604, "learning_rate": 1e-05, "loss": 1.0039, "step": 2190 }, { "epoch": 1.9441984056687334, "grad_norm": 0.47903212904930115, "learning_rate": 1e-05, "loss": 1.0439, "step": 2195 }, { "epoch": 1.9486271036315324, "grad_norm": 0.468469500541687, "learning_rate": 1e-05, "loss": 1.0225, "step": 2200 }, { "epoch": 1.9530558015943313, "grad_norm": 0.46060481667518616, "learning_rate": 1e-05, "loss": 1.0477, "step": 2205 }, { "epoch": 1.9574844995571303, "grad_norm": 0.45541658997535706, "learning_rate": 1e-05, "loss": 1.0601, "step": 2210 }, { "epoch": 1.961913197519929, "grad_norm": 0.42831093072891235, "learning_rate": 1e-05, "loss": 1.0615, "step": 2215 }, { "epoch": 1.966341895482728, "grad_norm": 0.283743679523468, "learning_rate": 1e-05, "loss": 0.9972, "step": 2220 }, { "epoch": 1.9707705934455269, "grad_norm": 0.44202861189842224, "learning_rate": 1e-05, "loss": 1.0832, "step": 2225 }, { "epoch": 1.9751992914083258, "grad_norm": 0.4106939733028412, "learning_rate": 1e-05, "loss": 1.0506, "step": 2230 }, { "epoch": 1.9796279893711248, "grad_norm": 0.34922152757644653, "learning_rate": 1e-05, "loss": 0.9847, "step": 2235 }, { "epoch": 1.9840566873339238, "grad_norm": 0.5265505909919739, "learning_rate": 1e-05, "loss": 1.0309, "step": 2240 }, { "epoch": 1.9884853852967228, "grad_norm": 0.5609035491943359, "learning_rate": 1e-05, "loss": 1.0049, "step": 2245 }, { "epoch": 1.9929140832595218, "grad_norm": 0.5041390657424927, "learning_rate": 1e-05, "loss": 1.03, "step": 2250 }, { "epoch": 1.9973427812223208, "grad_norm": 0.6953744292259216, "learning_rate": 1e-05, "loss": 1.0603, "step": 2255 }, { "epoch": 2.0017714791851198, "grad_norm": 0.3584790527820587, "learning_rate": 1e-05, "loss": 1.0633, "step": 2260 }, { "epoch": 2.0062001771479183, "grad_norm": 0.4668038785457611, "learning_rate": 1e-05, "loss": 0.9682, "step": 2265 }, { "epoch": 2.0106288751107173, "grad_norm": 0.44779878854751587, "learning_rate": 1e-05, "loss": 1.0322, "step": 2270 }, { "epoch": 2.0150575730735163, "grad_norm": 0.5731096267700195, "learning_rate": 1e-05, "loss": 1.0594, "step": 2275 }, { "epoch": 2.0194862710363153, "grad_norm": 0.5145208239555359, "learning_rate": 1e-05, "loss": 1.0314, "step": 2280 }, { "epoch": 2.0239149689991143, "grad_norm": 0.6408817768096924, "learning_rate": 1e-05, "loss": 1.0167, "step": 2285 }, { "epoch": 2.0283436669619133, "grad_norm": 0.6031640768051147, "learning_rate": 1e-05, "loss": 1.0694, "step": 2290 }, { "epoch": 2.0327723649247122, "grad_norm": 0.6322607398033142, "learning_rate": 1e-05, "loss": 1.032, "step": 2295 }, { "epoch": 2.0372010628875112, "grad_norm": 0.6165448427200317, "learning_rate": 1e-05, "loss": 1.0205, "step": 2300 }, { "epoch": 2.0416297608503102, "grad_norm": 0.34421566128730774, "learning_rate": 1e-05, "loss": 1.0773, "step": 2305 }, { "epoch": 2.0460584588131088, "grad_norm": 0.5919576287269592, "learning_rate": 1e-05, "loss": 1.0579, "step": 2310 }, { "epoch": 2.0504871567759078, "grad_norm": 0.773813009262085, "learning_rate": 1e-05, "loss": 0.9834, "step": 2315 }, { "epoch": 2.0549158547387067, "grad_norm": 0.43329134583473206, "learning_rate": 1e-05, "loss": 1.057, "step": 2320 }, { "epoch": 2.0593445527015057, "grad_norm": 0.5484357476234436, "learning_rate": 1e-05, "loss": 1.0441, "step": 2325 }, { "epoch": 2.0637732506643047, "grad_norm": 0.4514033794403076, "learning_rate": 1e-05, "loss": 1.0415, "step": 2330 }, { "epoch": 2.0682019486271037, "grad_norm": 0.5863833427429199, "learning_rate": 1e-05, "loss": 1.0145, "step": 2335 }, { "epoch": 2.0726306465899027, "grad_norm": 0.4827769696712494, "learning_rate": 1e-05, "loss": 1.0384, "step": 2340 }, { "epoch": 2.0770593445527017, "grad_norm": 0.4218099117279053, "learning_rate": 1e-05, "loss": 1.0158, "step": 2345 }, { "epoch": 2.0814880425155002, "grad_norm": 0.49901875853538513, "learning_rate": 1e-05, "loss": 1.0729, "step": 2350 }, { "epoch": 2.0859167404782992, "grad_norm": 0.4048095643520355, "learning_rate": 1e-05, "loss": 1.0413, "step": 2355 }, { "epoch": 2.090345438441098, "grad_norm": 0.5221948027610779, "learning_rate": 1e-05, "loss": 1.0444, "step": 2360 }, { "epoch": 2.094774136403897, "grad_norm": 0.4813966155052185, "learning_rate": 1e-05, "loss": 1.0545, "step": 2365 }, { "epoch": 2.099202834366696, "grad_norm": 0.4535312056541443, "learning_rate": 1e-05, "loss": 1.0314, "step": 2370 }, { "epoch": 2.103631532329495, "grad_norm": 0.5918130874633789, "learning_rate": 1e-05, "loss": 1.0561, "step": 2375 }, { "epoch": 2.108060230292294, "grad_norm": 0.4560534656047821, "learning_rate": 1e-05, "loss": 1.0732, "step": 2380 }, { "epoch": 2.112488928255093, "grad_norm": 0.5319624543190002, "learning_rate": 1e-05, "loss": 1.0379, "step": 2385 }, { "epoch": 2.116917626217892, "grad_norm": 0.6658979654312134, "learning_rate": 1e-05, "loss": 1.018, "step": 2390 }, { "epoch": 2.1213463241806907, "grad_norm": 0.4870125651359558, "learning_rate": 1e-05, "loss": 0.9994, "step": 2395 }, { "epoch": 2.1257750221434897, "grad_norm": 0.3799990117549896, "learning_rate": 1e-05, "loss": 1.0476, "step": 2400 }, { "epoch": 2.1302037201062887, "grad_norm": 0.42643675208091736, "learning_rate": 1e-05, "loss": 1.0433, "step": 2405 }, { "epoch": 2.1346324180690877, "grad_norm": 0.5843912959098816, "learning_rate": 1e-05, "loss": 1.003, "step": 2410 }, { "epoch": 2.1390611160318866, "grad_norm": 0.40163424611091614, "learning_rate": 1e-05, "loss": 1.0077, "step": 2415 }, { "epoch": 2.1434898139946856, "grad_norm": 0.6528959274291992, "learning_rate": 1e-05, "loss": 1.038, "step": 2420 }, { "epoch": 2.1479185119574846, "grad_norm": 0.48753559589385986, "learning_rate": 1e-05, "loss": 1.0305, "step": 2425 }, { "epoch": 2.1523472099202836, "grad_norm": 0.4759777784347534, "learning_rate": 1e-05, "loss": 1.0776, "step": 2430 }, { "epoch": 2.156775907883082, "grad_norm": 0.4306032061576843, "learning_rate": 1e-05, "loss": 0.9828, "step": 2435 }, { "epoch": 2.161204605845881, "grad_norm": 0.4647599458694458, "learning_rate": 1e-05, "loss": 1.0478, "step": 2440 }, { "epoch": 2.16563330380868, "grad_norm": 0.4583887755870819, "learning_rate": 1e-05, "loss": 1.0759, "step": 2445 }, { "epoch": 2.170062001771479, "grad_norm": 0.39155516028404236, "learning_rate": 1e-05, "loss": 1.0184, "step": 2450 }, { "epoch": 2.174490699734278, "grad_norm": 0.5356603264808655, "learning_rate": 1e-05, "loss": 1.0384, "step": 2455 }, { "epoch": 2.178919397697077, "grad_norm": 0.48529770970344543, "learning_rate": 1e-05, "loss": 1.0464, "step": 2460 }, { "epoch": 2.183348095659876, "grad_norm": 0.4828105568885803, "learning_rate": 1e-05, "loss": 1.0219, "step": 2465 }, { "epoch": 2.187776793622675, "grad_norm": 0.46481409668922424, "learning_rate": 1e-05, "loss": 1.0551, "step": 2470 }, { "epoch": 2.192205491585474, "grad_norm": 0.5605440139770508, "learning_rate": 1e-05, "loss": 1.0225, "step": 2475 }, { "epoch": 2.1966341895482726, "grad_norm": 0.5987666249275208, "learning_rate": 1e-05, "loss": 0.989, "step": 2480 }, { "epoch": 2.2010628875110716, "grad_norm": 0.33373814821243286, "learning_rate": 1e-05, "loss": 1.015, "step": 2485 }, { "epoch": 2.2054915854738706, "grad_norm": 0.5175365209579468, "learning_rate": 1e-05, "loss": 1.062, "step": 2490 }, { "epoch": 2.2099202834366696, "grad_norm": 0.45836082100868225, "learning_rate": 1e-05, "loss": 1.0729, "step": 2495 }, { "epoch": 2.2143489813994686, "grad_norm": 0.38386160135269165, "learning_rate": 1e-05, "loss": 1.0725, "step": 2500 }, { "epoch": 2.2187776793622676, "grad_norm": 0.44808462262153625, "learning_rate": 1e-05, "loss": 1.0641, "step": 2505 }, { "epoch": 2.2232063773250665, "grad_norm": 0.3938499391078949, "learning_rate": 1e-05, "loss": 1.0104, "step": 2510 }, { "epoch": 2.2276350752878655, "grad_norm": 0.4187353253364563, "learning_rate": 1e-05, "loss": 1.093, "step": 2515 }, { "epoch": 2.2320637732506645, "grad_norm": 0.4338715672492981, "learning_rate": 1e-05, "loss": 0.9909, "step": 2520 }, { "epoch": 2.236492471213463, "grad_norm": 0.8376438617706299, "learning_rate": 1e-05, "loss": 1.0319, "step": 2525 }, { "epoch": 2.240921169176262, "grad_norm": 0.5062357187271118, "learning_rate": 1e-05, "loss": 1.0377, "step": 2530 }, { "epoch": 2.245349867139061, "grad_norm": 0.4610828757286072, "learning_rate": 1e-05, "loss": 1.0726, "step": 2535 }, { "epoch": 2.24977856510186, "grad_norm": 0.47126075625419617, "learning_rate": 1e-05, "loss": 1.0618, "step": 2540 }, { "epoch": 2.254207263064659, "grad_norm": 0.4205315411090851, "learning_rate": 1e-05, "loss": 1.025, "step": 2545 }, { "epoch": 2.258635961027458, "grad_norm": 0.40285390615463257, "learning_rate": 1e-05, "loss": 1.0446, "step": 2550 }, { "epoch": 2.263064658990257, "grad_norm": 0.37645837664604187, "learning_rate": 1e-05, "loss": 1.0637, "step": 2555 }, { "epoch": 2.267493356953056, "grad_norm": 0.7137925624847412, "learning_rate": 1e-05, "loss": 0.9601, "step": 2560 }, { "epoch": 2.271922054915855, "grad_norm": 0.558600664138794, "learning_rate": 1e-05, "loss": 1.0151, "step": 2565 }, { "epoch": 2.2763507528786535, "grad_norm": 0.7317633032798767, "learning_rate": 1e-05, "loss": 1.0795, "step": 2570 }, { "epoch": 2.2807794508414525, "grad_norm": 0.4917280077934265, "learning_rate": 1e-05, "loss": 1.0471, "step": 2575 }, { "epoch": 2.2852081488042515, "grad_norm": 0.40624135732650757, "learning_rate": 1e-05, "loss": 1.0626, "step": 2580 }, { "epoch": 2.2896368467670505, "grad_norm": 0.5770871639251709, "learning_rate": 1e-05, "loss": 1.0705, "step": 2585 }, { "epoch": 2.2940655447298495, "grad_norm": 0.3286740779876709, "learning_rate": 1e-05, "loss": 1.0578, "step": 2590 }, { "epoch": 2.2984942426926485, "grad_norm": 0.6179378628730774, "learning_rate": 1e-05, "loss": 1.0379, "step": 2595 }, { "epoch": 2.3029229406554474, "grad_norm": 0.3655185103416443, "learning_rate": 1e-05, "loss": 1.0122, "step": 2600 }, { "epoch": 2.307351638618246, "grad_norm": 0.45200085639953613, "learning_rate": 1e-05, "loss": 1.059, "step": 2605 }, { "epoch": 2.311780336581045, "grad_norm": 0.3856170177459717, "learning_rate": 1e-05, "loss": 1.0273, "step": 2610 }, { "epoch": 2.316209034543844, "grad_norm": 0.3704911470413208, "learning_rate": 1e-05, "loss": 1.0056, "step": 2615 }, { "epoch": 2.320637732506643, "grad_norm": 0.5131767988204956, "learning_rate": 1e-05, "loss": 0.9905, "step": 2620 }, { "epoch": 2.325066430469442, "grad_norm": 0.4233784079551697, "learning_rate": 1e-05, "loss": 0.9846, "step": 2625 }, { "epoch": 2.329495128432241, "grad_norm": 0.524312436580658, "learning_rate": 1e-05, "loss": 1.0243, "step": 2630 }, { "epoch": 2.33392382639504, "grad_norm": 0.5103597044944763, "learning_rate": 1e-05, "loss": 1.093, "step": 2635 }, { "epoch": 2.338352524357839, "grad_norm": 0.5735695958137512, "learning_rate": 1e-05, "loss": 1.0279, "step": 2640 }, { "epoch": 2.342781222320638, "grad_norm": 0.5327978134155273, "learning_rate": 1e-05, "loss": 1.0083, "step": 2645 }, { "epoch": 2.3472099202834364, "grad_norm": 0.4777471721172333, "learning_rate": 1e-05, "loss": 0.9991, "step": 2650 }, { "epoch": 2.3516386182462354, "grad_norm": 0.3995833694934845, "learning_rate": 1e-05, "loss": 1.031, "step": 2655 }, { "epoch": 2.3560673162090344, "grad_norm": 0.5510890483856201, "learning_rate": 1e-05, "loss": 1.0196, "step": 2660 }, { "epoch": 2.3604960141718334, "grad_norm": 0.4626731872558594, "learning_rate": 1e-05, "loss": 1.0977, "step": 2665 }, { "epoch": 2.3649247121346324, "grad_norm": 0.36934515833854675, "learning_rate": 1e-05, "loss": 1.0728, "step": 2670 }, { "epoch": 2.3693534100974314, "grad_norm": 0.5003888607025146, "learning_rate": 1e-05, "loss": 0.9761, "step": 2675 }, { "epoch": 2.3737821080602304, "grad_norm": 0.4252766966819763, "learning_rate": 1e-05, "loss": 1.0376, "step": 2680 }, { "epoch": 2.3782108060230294, "grad_norm": 0.376730352640152, "learning_rate": 1e-05, "loss": 1.0314, "step": 2685 }, { "epoch": 2.3826395039858284, "grad_norm": 0.43287017941474915, "learning_rate": 1e-05, "loss": 1.0538, "step": 2690 }, { "epoch": 2.387068201948627, "grad_norm": 0.4191151261329651, "learning_rate": 1e-05, "loss": 1.073, "step": 2695 }, { "epoch": 2.391496899911426, "grad_norm": 0.3899643123149872, "learning_rate": 1e-05, "loss": 1.0117, "step": 2700 }, { "epoch": 2.395925597874225, "grad_norm": 0.49577733874320984, "learning_rate": 1e-05, "loss": 1.0127, "step": 2705 }, { "epoch": 2.400354295837024, "grad_norm": 0.5975940227508545, "learning_rate": 1e-05, "loss": 1.015, "step": 2710 }, { "epoch": 2.404782993799823, "grad_norm": 0.4224880635738373, "learning_rate": 1e-05, "loss": 1.0275, "step": 2715 }, { "epoch": 2.409211691762622, "grad_norm": 0.5402734279632568, "learning_rate": 1e-05, "loss": 1.0528, "step": 2720 }, { "epoch": 2.413640389725421, "grad_norm": 0.31992432475090027, "learning_rate": 1e-05, "loss": 0.9535, "step": 2725 }, { "epoch": 2.41806908768822, "grad_norm": 0.32328641414642334, "learning_rate": 1e-05, "loss": 1.0146, "step": 2730 }, { "epoch": 2.422497785651019, "grad_norm": 0.3336014151573181, "learning_rate": 1e-05, "loss": 0.9849, "step": 2735 }, { "epoch": 2.4269264836138174, "grad_norm": 0.5074469447135925, "learning_rate": 1e-05, "loss": 1.0007, "step": 2740 }, { "epoch": 2.4313551815766163, "grad_norm": 0.5504379272460938, "learning_rate": 1e-05, "loss": 1.0048, "step": 2745 }, { "epoch": 2.4357838795394153, "grad_norm": 0.4262321889400482, "learning_rate": 1e-05, "loss": 1.0344, "step": 2750 }, { "epoch": 2.4402125775022143, "grad_norm": 0.5119176506996155, "learning_rate": 1e-05, "loss": 1.0201, "step": 2755 }, { "epoch": 2.4446412754650133, "grad_norm": 0.3485449552536011, "learning_rate": 1e-05, "loss": 1.0961, "step": 2760 }, { "epoch": 2.4490699734278123, "grad_norm": 0.29425960779190063, "learning_rate": 1e-05, "loss": 1.0491, "step": 2765 }, { "epoch": 2.4534986713906113, "grad_norm": 0.3820127844810486, "learning_rate": 1e-05, "loss": 1.0248, "step": 2770 }, { "epoch": 2.4579273693534103, "grad_norm": 0.3655147850513458, "learning_rate": 1e-05, "loss": 1.008, "step": 2775 }, { "epoch": 2.4623560673162093, "grad_norm": 0.4104725420475006, "learning_rate": 1e-05, "loss": 1.0458, "step": 2780 }, { "epoch": 2.466784765279008, "grad_norm": 0.595163106918335, "learning_rate": 1e-05, "loss": 1.0521, "step": 2785 }, { "epoch": 2.471213463241807, "grad_norm": 0.6888088583946228, "learning_rate": 1e-05, "loss": 0.9832, "step": 2790 }, { "epoch": 2.475642161204606, "grad_norm": 0.46057769656181335, "learning_rate": 1e-05, "loss": 1.0485, "step": 2795 }, { "epoch": 2.4800708591674048, "grad_norm": 0.5151911973953247, "learning_rate": 1e-05, "loss": 1.0137, "step": 2800 }, { "epoch": 2.4844995571302038, "grad_norm": 0.5031762719154358, "learning_rate": 1e-05, "loss": 1.0411, "step": 2805 }, { "epoch": 2.4889282550930028, "grad_norm": 0.4458197355270386, "learning_rate": 1e-05, "loss": 1.0447, "step": 2810 }, { "epoch": 2.4933569530558017, "grad_norm": 0.4074757397174835, "learning_rate": 1e-05, "loss": 1.0566, "step": 2815 }, { "epoch": 2.4977856510186003, "grad_norm": 0.5228142738342285, "learning_rate": 1e-05, "loss": 1.0889, "step": 2820 }, { "epoch": 2.5022143489813997, "grad_norm": 0.47482267022132874, "learning_rate": 1e-05, "loss": 1.0413, "step": 2825 }, { "epoch": 2.5066430469441983, "grad_norm": 0.6530681252479553, "learning_rate": 1e-05, "loss": 1.0112, "step": 2830 }, { "epoch": 2.5110717449069972, "grad_norm": 0.5544772148132324, "learning_rate": 1e-05, "loss": 1.0779, "step": 2835 }, { "epoch": 2.5155004428697962, "grad_norm": 0.5468206405639648, "learning_rate": 1e-05, "loss": 1.0152, "step": 2840 }, { "epoch": 2.5199291408325952, "grad_norm": 0.42827850580215454, "learning_rate": 1e-05, "loss": 0.9996, "step": 2845 }, { "epoch": 2.524357838795394, "grad_norm": 0.4947317838668823, "learning_rate": 1e-05, "loss": 1.0598, "step": 2850 }, { "epoch": 2.528786536758193, "grad_norm": 0.4925984740257263, "learning_rate": 1e-05, "loss": 1.0212, "step": 2855 }, { "epoch": 2.533215234720992, "grad_norm": 0.3933313190937042, "learning_rate": 1e-05, "loss": 0.9885, "step": 2860 }, { "epoch": 2.5376439326837907, "grad_norm": 0.4389501214027405, "learning_rate": 1e-05, "loss": 0.9617, "step": 2865 }, { "epoch": 2.54207263064659, "grad_norm": 0.40963491797447205, "learning_rate": 1e-05, "loss": 0.9791, "step": 2870 }, { "epoch": 2.5465013286093887, "grad_norm": 0.37005141377449036, "learning_rate": 1e-05, "loss": 1.0302, "step": 2875 }, { "epoch": 2.5509300265721877, "grad_norm": 0.5001286864280701, "learning_rate": 1e-05, "loss": 1.0083, "step": 2880 }, { "epoch": 2.5553587245349867, "grad_norm": 0.38077735900878906, "learning_rate": 1e-05, "loss": 1.0412, "step": 2885 }, { "epoch": 2.5597874224977857, "grad_norm": 0.5739526748657227, "learning_rate": 1e-05, "loss": 1.0109, "step": 2890 }, { "epoch": 2.5642161204605847, "grad_norm": 0.5770562291145325, "learning_rate": 1e-05, "loss": 1.0938, "step": 2895 }, { "epoch": 2.5686448184233837, "grad_norm": 0.40662881731987, "learning_rate": 1e-05, "loss": 1.0354, "step": 2900 }, { "epoch": 2.5730735163861826, "grad_norm": 0.39924120903015137, "learning_rate": 1e-05, "loss": 1.0447, "step": 2905 }, { "epoch": 2.577502214348981, "grad_norm": 0.4533263146877289, "learning_rate": 1e-05, "loss": 1.0146, "step": 2910 }, { "epoch": 2.58193091231178, "grad_norm": 0.5112301111221313, "learning_rate": 1e-05, "loss": 1.0464, "step": 2915 }, { "epoch": 2.586359610274579, "grad_norm": 0.4359137713909149, "learning_rate": 1e-05, "loss": 1.0375, "step": 2920 }, { "epoch": 2.590788308237378, "grad_norm": 0.4898315370082855, "learning_rate": 1e-05, "loss": 1.0335, "step": 2925 }, { "epoch": 2.595217006200177, "grad_norm": 0.43125033378601074, "learning_rate": 1e-05, "loss": 1.0173, "step": 2930 }, { "epoch": 2.599645704162976, "grad_norm": 0.5227671265602112, "learning_rate": 1e-05, "loss": 1.0986, "step": 2935 }, { "epoch": 2.604074402125775, "grad_norm": 0.4317053556442261, "learning_rate": 1e-05, "loss": 0.9966, "step": 2940 }, { "epoch": 2.608503100088574, "grad_norm": 0.425248384475708, "learning_rate": 1e-05, "loss": 1.0142, "step": 2945 }, { "epoch": 2.612931798051373, "grad_norm": 0.5229920744895935, "learning_rate": 1e-05, "loss": 1.0115, "step": 2950 }, { "epoch": 2.6173604960141716, "grad_norm": 0.3360884189605713, "learning_rate": 1e-05, "loss": 0.9524, "step": 2955 }, { "epoch": 2.6217891939769706, "grad_norm": 0.3374822437763214, "learning_rate": 1e-05, "loss": 1.0125, "step": 2960 }, { "epoch": 2.6262178919397696, "grad_norm": 0.4743426740169525, "learning_rate": 1e-05, "loss": 1.0885, "step": 2965 }, { "epoch": 2.6306465899025686, "grad_norm": 0.43363791704177856, "learning_rate": 1e-05, "loss": 0.9334, "step": 2970 }, { "epoch": 2.6350752878653676, "grad_norm": 0.3984227776527405, "learning_rate": 1e-05, "loss": 1.039, "step": 2975 }, { "epoch": 2.6395039858281666, "grad_norm": 0.5370942950248718, "learning_rate": 1e-05, "loss": 1.0036, "step": 2980 }, { "epoch": 2.6439326837909656, "grad_norm": 0.39962244033813477, "learning_rate": 1e-05, "loss": 1.004, "step": 2985 }, { "epoch": 2.648361381753764, "grad_norm": 0.40735554695129395, "learning_rate": 1e-05, "loss": 1.0281, "step": 2990 }, { "epoch": 2.6527900797165636, "grad_norm": 0.46149295568466187, "learning_rate": 1e-05, "loss": 1.036, "step": 2995 }, { "epoch": 2.657218777679362, "grad_norm": 0.4877465069293976, "learning_rate": 1e-05, "loss": 1.0401, "step": 3000 }, { "epoch": 2.661647475642161, "grad_norm": 0.5125572085380554, "learning_rate": 1e-05, "loss": 1.0087, "step": 3005 }, { "epoch": 2.66607617360496, "grad_norm": 0.5010902881622314, "learning_rate": 1e-05, "loss": 1.0656, "step": 3010 }, { "epoch": 2.670504871567759, "grad_norm": 0.507574200630188, "learning_rate": 1e-05, "loss": 1.015, "step": 3015 }, { "epoch": 2.674933569530558, "grad_norm": 0.44138211011886597, "learning_rate": 1e-05, "loss": 1.0065, "step": 3020 }, { "epoch": 2.679362267493357, "grad_norm": 0.3149421215057373, "learning_rate": 1e-05, "loss": 1.1063, "step": 3025 }, { "epoch": 2.683790965456156, "grad_norm": 0.4658683240413666, "learning_rate": 1e-05, "loss": 1.0262, "step": 3030 }, { "epoch": 2.6882196634189546, "grad_norm": 0.373843252658844, "learning_rate": 1e-05, "loss": 1.0337, "step": 3035 }, { "epoch": 2.692648361381754, "grad_norm": 0.3963046371936798, "learning_rate": 1e-05, "loss": 1.025, "step": 3040 }, { "epoch": 2.6970770593445526, "grad_norm": 0.46280255913734436, "learning_rate": 1e-05, "loss": 1.0499, "step": 3045 }, { "epoch": 2.7015057573073515, "grad_norm": 0.412231981754303, "learning_rate": 1e-05, "loss": 1.0135, "step": 3050 }, { "epoch": 2.7059344552701505, "grad_norm": 0.3762901723384857, "learning_rate": 1e-05, "loss": 1.0127, "step": 3055 }, { "epoch": 2.7103631532329495, "grad_norm": 0.4995385408401489, "learning_rate": 1e-05, "loss": 1.0288, "step": 3060 }, { "epoch": 2.7147918511957485, "grad_norm": 0.38599368929862976, "learning_rate": 1e-05, "loss": 1.018, "step": 3065 }, { "epoch": 2.7192205491585475, "grad_norm": 0.31549742817878723, "learning_rate": 1e-05, "loss": 1.0068, "step": 3070 }, { "epoch": 2.7236492471213465, "grad_norm": 0.3889061510562897, "learning_rate": 1e-05, "loss": 1.0463, "step": 3075 }, { "epoch": 2.728077945084145, "grad_norm": 0.6376797556877136, "learning_rate": 1e-05, "loss": 0.9665, "step": 3080 }, { "epoch": 2.7325066430469445, "grad_norm": 0.549766480922699, "learning_rate": 1e-05, "loss": 1.0545, "step": 3085 }, { "epoch": 2.736935341009743, "grad_norm": 0.3776531517505646, "learning_rate": 1e-05, "loss": 1.0798, "step": 3090 }, { "epoch": 2.741364038972542, "grad_norm": 0.40356865525245667, "learning_rate": 1e-05, "loss": 1.034, "step": 3095 }, { "epoch": 2.745792736935341, "grad_norm": 0.41116103529930115, "learning_rate": 1e-05, "loss": 1.0188, "step": 3100 }, { "epoch": 2.75022143489814, "grad_norm": 0.41147708892822266, "learning_rate": 1e-05, "loss": 1.0362, "step": 3105 }, { "epoch": 2.754650132860939, "grad_norm": 0.43723002076148987, "learning_rate": 1e-05, "loss": 1.0685, "step": 3110 }, { "epoch": 2.759078830823738, "grad_norm": 0.5626050233840942, "learning_rate": 1e-05, "loss": 1.0119, "step": 3115 }, { "epoch": 2.763507528786537, "grad_norm": 0.43236517906188965, "learning_rate": 1e-05, "loss": 1.0105, "step": 3120 }, { "epoch": 2.7679362267493355, "grad_norm": 0.3476856052875519, "learning_rate": 1e-05, "loss": 1.0311, "step": 3125 }, { "epoch": 2.7723649247121345, "grad_norm": 0.47323328256607056, "learning_rate": 1e-05, "loss": 1.0661, "step": 3130 }, { "epoch": 2.7767936226749335, "grad_norm": 0.6006180047988892, "learning_rate": 1e-05, "loss": 1.0616, "step": 3135 }, { "epoch": 2.7812223206377324, "grad_norm": 0.6216714382171631, "learning_rate": 1e-05, "loss": 0.9522, "step": 3140 }, { "epoch": 2.7856510186005314, "grad_norm": 0.31843629479408264, "learning_rate": 1e-05, "loss": 1.0526, "step": 3145 }, { "epoch": 2.7900797165633304, "grad_norm": 0.32713088393211365, "learning_rate": 1e-05, "loss": 1.0475, "step": 3150 }, { "epoch": 2.7945084145261294, "grad_norm": 0.49055880308151245, "learning_rate": 1e-05, "loss": 1.069, "step": 3155 }, { "epoch": 2.7989371124889284, "grad_norm": 0.33268678188323975, "learning_rate": 1e-05, "loss": 0.9459, "step": 3160 }, { "epoch": 2.8033658104517274, "grad_norm": 0.34377628564834595, "learning_rate": 1e-05, "loss": 1.0493, "step": 3165 }, { "epoch": 2.807794508414526, "grad_norm": 0.43863555788993835, "learning_rate": 1e-05, "loss": 1.0266, "step": 3170 }, { "epoch": 2.812223206377325, "grad_norm": 0.4929737448692322, "learning_rate": 1e-05, "loss": 0.9972, "step": 3175 }, { "epoch": 2.816651904340124, "grad_norm": 0.3163629174232483, "learning_rate": 1e-05, "loss": 0.996, "step": 3180 }, { "epoch": 2.821080602302923, "grad_norm": 0.4430500864982605, "learning_rate": 1e-05, "loss": 1.0753, "step": 3185 }, { "epoch": 2.825509300265722, "grad_norm": 0.41517603397369385, "learning_rate": 1e-05, "loss": 1.0346, "step": 3190 }, { "epoch": 2.829937998228521, "grad_norm": 0.39873167872428894, "learning_rate": 1e-05, "loss": 1.0043, "step": 3195 }, { "epoch": 2.83436669619132, "grad_norm": 0.35759586095809937, "learning_rate": 1e-05, "loss": 0.9379, "step": 3200 }, { "epoch": 2.8387953941541184, "grad_norm": 0.48786240816116333, "learning_rate": 1e-05, "loss": 1.0202, "step": 3205 }, { "epoch": 2.843224092116918, "grad_norm": 0.4037776291370392, "learning_rate": 1e-05, "loss": 1.067, "step": 3210 }, { "epoch": 2.8476527900797164, "grad_norm": 0.4887538254261017, "learning_rate": 1e-05, "loss": 1.083, "step": 3215 }, { "epoch": 2.8520814880425154, "grad_norm": 0.39434826374053955, "learning_rate": 1e-05, "loss": 1.0528, "step": 3220 }, { "epoch": 2.8565101860053144, "grad_norm": 0.5967410206794739, "learning_rate": 1e-05, "loss": 1.0284, "step": 3225 }, { "epoch": 2.8609388839681134, "grad_norm": 0.4126602113246918, "learning_rate": 1e-05, "loss": 1.0614, "step": 3230 }, { "epoch": 2.8653675819309123, "grad_norm": 0.48035362362861633, "learning_rate": 1e-05, "loss": 1.0772, "step": 3235 }, { "epoch": 2.8697962798937113, "grad_norm": 0.5496764183044434, "learning_rate": 1e-05, "loss": 1.0454, "step": 3240 }, { "epoch": 2.8742249778565103, "grad_norm": 0.3879026770591736, "learning_rate": 1e-05, "loss": 1.0418, "step": 3245 }, { "epoch": 2.878653675819309, "grad_norm": 0.37894827127456665, "learning_rate": 1e-05, "loss": 1.0413, "step": 3250 }, { "epoch": 2.8830823737821083, "grad_norm": 0.3943926990032196, "learning_rate": 1e-05, "loss": 1.0005, "step": 3255 }, { "epoch": 2.887511071744907, "grad_norm": 0.5473713874816895, "learning_rate": 1e-05, "loss": 1.026, "step": 3260 }, { "epoch": 2.891939769707706, "grad_norm": 0.3977048397064209, "learning_rate": 1e-05, "loss": 1.0898, "step": 3265 }, { "epoch": 2.896368467670505, "grad_norm": 0.5678221583366394, "learning_rate": 1e-05, "loss": 0.9884, "step": 3270 }, { "epoch": 2.900797165633304, "grad_norm": 0.4889892041683197, "learning_rate": 1e-05, "loss": 0.94, "step": 3275 }, { "epoch": 2.905225863596103, "grad_norm": 0.5965889096260071, "learning_rate": 1e-05, "loss": 0.9834, "step": 3280 }, { "epoch": 2.909654561558902, "grad_norm": 0.5212200880050659, "learning_rate": 1e-05, "loss": 1.0005, "step": 3285 }, { "epoch": 2.9140832595217008, "grad_norm": 0.5184657573699951, "learning_rate": 1e-05, "loss": 0.9981, "step": 3290 }, { "epoch": 2.9185119574844993, "grad_norm": 0.4270380139350891, "learning_rate": 1e-05, "loss": 1.0375, "step": 3295 }, { "epoch": 2.9229406554472988, "grad_norm": 0.4573812782764435, "learning_rate": 1e-05, "loss": 1.0676, "step": 3300 }, { "epoch": 2.9273693534100973, "grad_norm": 0.5198473930358887, "learning_rate": 1e-05, "loss": 1.0479, "step": 3305 }, { "epoch": 2.9317980513728963, "grad_norm": 0.41153794527053833, "learning_rate": 1e-05, "loss": 1.0194, "step": 3310 }, { "epoch": 2.9362267493356953, "grad_norm": 0.4575025737285614, "learning_rate": 1e-05, "loss": 1.0378, "step": 3315 }, { "epoch": 2.9406554472984943, "grad_norm": 0.6119422316551208, "learning_rate": 1e-05, "loss": 1.0033, "step": 3320 }, { "epoch": 2.9450841452612933, "grad_norm": 0.31815919280052185, "learning_rate": 1e-05, "loss": 1.0036, "step": 3325 }, { "epoch": 2.9495128432240922, "grad_norm": 0.6472691297531128, "learning_rate": 1e-05, "loss": 1.0137, "step": 3330 }, { "epoch": 2.9539415411868912, "grad_norm": 0.39716836810112, "learning_rate": 1e-05, "loss": 0.9911, "step": 3335 }, { "epoch": 2.9583702391496898, "grad_norm": 0.4567423462867737, "learning_rate": 1e-05, "loss": 1.0525, "step": 3340 }, { "epoch": 2.9627989371124888, "grad_norm": 0.4389401376247406, "learning_rate": 1e-05, "loss": 1.0581, "step": 3345 }, { "epoch": 2.9672276350752878, "grad_norm": 0.42704617977142334, "learning_rate": 1e-05, "loss": 0.9885, "step": 3350 }, { "epoch": 2.9716563330380867, "grad_norm": 0.5419511795043945, "learning_rate": 1e-05, "loss": 1.0305, "step": 3355 }, { "epoch": 2.9760850310008857, "grad_norm": 0.33577871322631836, "learning_rate": 1e-05, "loss": 0.9875, "step": 3360 }, { "epoch": 2.9805137289636847, "grad_norm": 0.45892855525016785, "learning_rate": 1e-05, "loss": 1.0168, "step": 3365 }, { "epoch": 2.9849424269264837, "grad_norm": 0.391928106546402, "learning_rate": 1e-05, "loss": 1.0537, "step": 3370 }, { "epoch": 2.9893711248892827, "grad_norm": 0.5902290940284729, "learning_rate": 1e-05, "loss": 1.0863, "step": 3375 }, { "epoch": 2.9937998228520817, "grad_norm": 0.4113743305206299, "learning_rate": 1e-05, "loss": 1.0401, "step": 3380 }, { "epoch": 2.9982285208148802, "grad_norm": 0.3413982391357422, "learning_rate": 1e-05, "loss": 1.0306, "step": 3385 }, { "epoch": 3.002657218777679, "grad_norm": 0.41479942202568054, "learning_rate": 1e-05, "loss": 1.046, "step": 3390 }, { "epoch": 3.007085916740478, "grad_norm": 0.5093465447425842, "learning_rate": 1e-05, "loss": 1.0409, "step": 3395 }, { "epoch": 3.011514614703277, "grad_norm": 0.42168518900871277, "learning_rate": 1e-05, "loss": 1.0084, "step": 3400 }, { "epoch": 3.015943312666076, "grad_norm": 0.38336870074272156, "learning_rate": 1e-05, "loss": 0.9813, "step": 3405 }, { "epoch": 3.020372010628875, "grad_norm": 0.3866322636604309, "learning_rate": 1e-05, "loss": 1.0428, "step": 3410 }, { "epoch": 3.024800708591674, "grad_norm": 0.5771451592445374, "learning_rate": 1e-05, "loss": 1.053, "step": 3415 }, { "epoch": 3.029229406554473, "grad_norm": 0.4466056525707245, "learning_rate": 1e-05, "loss": 0.9913, "step": 3420 }, { "epoch": 3.033658104517272, "grad_norm": 0.44519755244255066, "learning_rate": 1e-05, "loss": 1.005, "step": 3425 }, { "epoch": 3.0380868024800707, "grad_norm": 0.3985057771205902, "learning_rate": 1e-05, "loss": 1.0415, "step": 3430 }, { "epoch": 3.0425155004428697, "grad_norm": 0.3949190378189087, "learning_rate": 1e-05, "loss": 0.9824, "step": 3435 }, { "epoch": 3.0469441984056687, "grad_norm": 0.39209190011024475, "learning_rate": 1e-05, "loss": 1.0192, "step": 3440 }, { "epoch": 3.0513728963684676, "grad_norm": 0.3770267367362976, "learning_rate": 1e-05, "loss": 1.0223, "step": 3445 }, { "epoch": 3.0558015943312666, "grad_norm": 0.47111639380455017, "learning_rate": 1e-05, "loss": 1.0541, "step": 3450 }, { "epoch": 3.0602302922940656, "grad_norm": 0.4482271373271942, "learning_rate": 1e-05, "loss": 1.0278, "step": 3455 }, { "epoch": 3.0646589902568646, "grad_norm": 0.377704918384552, "learning_rate": 1e-05, "loss": 1.0235, "step": 3460 }, { "epoch": 3.0690876882196636, "grad_norm": 0.4829293191432953, "learning_rate": 1e-05, "loss": 1.0646, "step": 3465 }, { "epoch": 3.073516386182462, "grad_norm": 0.30433109402656555, "learning_rate": 1e-05, "loss": 1.037, "step": 3470 }, { "epoch": 3.077945084145261, "grad_norm": 0.4092828631401062, "learning_rate": 1e-05, "loss": 0.9808, "step": 3475 }, { "epoch": 3.08237378210806, "grad_norm": 0.37381264567375183, "learning_rate": 1e-05, "loss": 0.9555, "step": 3480 }, { "epoch": 3.086802480070859, "grad_norm": 0.5895156860351562, "learning_rate": 1e-05, "loss": 1.038, "step": 3485 }, { "epoch": 3.091231178033658, "grad_norm": 0.4740341305732727, "learning_rate": 1e-05, "loss": 1.0104, "step": 3490 }, { "epoch": 3.095659875996457, "grad_norm": 0.4457544982433319, "learning_rate": 1e-05, "loss": 0.9814, "step": 3495 }, { "epoch": 3.100088573959256, "grad_norm": 0.42571043968200684, "learning_rate": 1e-05, "loss": 1.1006, "step": 3500 }, { "epoch": 3.104517271922055, "grad_norm": 0.4492760896682739, "learning_rate": 1e-05, "loss": 1.0413, "step": 3505 }, { "epoch": 3.108945969884854, "grad_norm": 0.4131598174571991, "learning_rate": 1e-05, "loss": 1.0047, "step": 3510 }, { "epoch": 3.1133746678476526, "grad_norm": 0.44474515318870544, "learning_rate": 1e-05, "loss": 1.0001, "step": 3515 }, { "epoch": 3.1178033658104516, "grad_norm": 0.4319981634616852, "learning_rate": 1e-05, "loss": 1.0146, "step": 3520 }, { "epoch": 3.1222320637732506, "grad_norm": 0.3335142433643341, "learning_rate": 1e-05, "loss": 1.0304, "step": 3525 }, { "epoch": 3.1266607617360496, "grad_norm": 0.3271971642971039, "learning_rate": 1e-05, "loss": 1.0057, "step": 3530 }, { "epoch": 3.1310894596988486, "grad_norm": 0.30540257692337036, "learning_rate": 1e-05, "loss": 1.0638, "step": 3535 }, { "epoch": 3.1355181576616475, "grad_norm": 0.4695254862308502, "learning_rate": 1e-05, "loss": 1.0119, "step": 3540 }, { "epoch": 3.1399468556244465, "grad_norm": 0.42064210772514343, "learning_rate": 1e-05, "loss": 1.0076, "step": 3545 }, { "epoch": 3.1443755535872455, "grad_norm": 0.5295586585998535, "learning_rate": 1e-05, "loss": 1.0165, "step": 3550 }, { "epoch": 3.148804251550044, "grad_norm": 0.4814288318157196, "learning_rate": 1e-05, "loss": 0.9937, "step": 3555 }, { "epoch": 3.153232949512843, "grad_norm": 0.4444942772388458, "learning_rate": 1e-05, "loss": 1.0122, "step": 3560 }, { "epoch": 3.157661647475642, "grad_norm": 0.3699358403682709, "learning_rate": 1e-05, "loss": 1.0632, "step": 3565 }, { "epoch": 3.162090345438441, "grad_norm": 0.5086784958839417, "learning_rate": 1e-05, "loss": 1.0139, "step": 3570 }, { "epoch": 3.16651904340124, "grad_norm": 0.3851536810398102, "learning_rate": 1e-05, "loss": 1.0256, "step": 3575 }, { "epoch": 3.170947741364039, "grad_norm": 0.37175634503364563, "learning_rate": 1e-05, "loss": 1.0707, "step": 3580 }, { "epoch": 3.175376439326838, "grad_norm": 0.4465455710887909, "learning_rate": 1e-05, "loss": 1.0456, "step": 3585 }, { "epoch": 3.179805137289637, "grad_norm": 0.37006422877311707, "learning_rate": 1e-05, "loss": 1.0197, "step": 3590 }, { "epoch": 3.184233835252436, "grad_norm": 0.4222365617752075, "learning_rate": 1e-05, "loss": 1.0334, "step": 3595 }, { "epoch": 3.1886625332152345, "grad_norm": 0.3667023181915283, "learning_rate": 1e-05, "loss": 1.0511, "step": 3600 }, { "epoch": 3.1930912311780335, "grad_norm": 0.4231782555580139, "learning_rate": 1e-05, "loss": 0.999, "step": 3605 }, { "epoch": 3.1975199291408325, "grad_norm": 0.3751493990421295, "learning_rate": 1e-05, "loss": 0.9979, "step": 3610 }, { "epoch": 3.2019486271036315, "grad_norm": 0.47144243121147156, "learning_rate": 1e-05, "loss": 1.0123, "step": 3615 }, { "epoch": 3.2063773250664305, "grad_norm": 0.4281963109970093, "learning_rate": 1e-05, "loss": 1.0153, "step": 3620 }, { "epoch": 3.2108060230292295, "grad_norm": 0.6021721363067627, "learning_rate": 1e-05, "loss": 1.0396, "step": 3625 }, { "epoch": 3.2152347209920284, "grad_norm": 0.39457863569259644, "learning_rate": 1e-05, "loss": 0.9852, "step": 3630 }, { "epoch": 3.2196634189548274, "grad_norm": 0.3613373935222626, "learning_rate": 1e-05, "loss": 1.0109, "step": 3635 }, { "epoch": 3.2240921169176264, "grad_norm": 0.4260545074939728, "learning_rate": 1e-05, "loss": 1.0288, "step": 3640 }, { "epoch": 3.228520814880425, "grad_norm": 0.5399686694145203, "learning_rate": 1e-05, "loss": 1.0817, "step": 3645 }, { "epoch": 3.232949512843224, "grad_norm": 0.42582985758781433, "learning_rate": 1e-05, "loss": 1.0244, "step": 3650 }, { "epoch": 3.237378210806023, "grad_norm": 0.313127338886261, "learning_rate": 1e-05, "loss": 1.0512, "step": 3655 }, { "epoch": 3.241806908768822, "grad_norm": 0.4694111943244934, "learning_rate": 1e-05, "loss": 1.0592, "step": 3660 }, { "epoch": 3.246235606731621, "grad_norm": 0.6192038655281067, "learning_rate": 1e-05, "loss": 0.9691, "step": 3665 }, { "epoch": 3.25066430469442, "grad_norm": 0.47336092591285706, "learning_rate": 1e-05, "loss": 1.012, "step": 3670 }, { "epoch": 3.255093002657219, "grad_norm": 0.34903597831726074, "learning_rate": 1e-05, "loss": 1.0314, "step": 3675 }, { "epoch": 3.259521700620018, "grad_norm": 0.4449535310268402, "learning_rate": 1e-05, "loss": 1.0467, "step": 3680 }, { "epoch": 3.263950398582817, "grad_norm": 0.5163513422012329, "learning_rate": 1e-05, "loss": 0.985, "step": 3685 }, { "epoch": 3.2683790965456154, "grad_norm": 0.36432549357414246, "learning_rate": 1e-05, "loss": 1.001, "step": 3690 }, { "epoch": 3.2728077945084144, "grad_norm": 0.4025719165802002, "learning_rate": 1e-05, "loss": 1.0195, "step": 3695 }, { "epoch": 3.2772364924712134, "grad_norm": 0.6147280335426331, "learning_rate": 1e-05, "loss": 1.014, "step": 3700 }, { "epoch": 3.2816651904340124, "grad_norm": 0.4857144355773926, "learning_rate": 1e-05, "loss": 1.0309, "step": 3705 }, { "epoch": 3.2860938883968114, "grad_norm": 0.5235254168510437, "learning_rate": 1e-05, "loss": 1.0957, "step": 3710 }, { "epoch": 3.2905225863596104, "grad_norm": 0.5222679376602173, "learning_rate": 1e-05, "loss": 1.0711, "step": 3715 }, { "epoch": 3.2949512843224094, "grad_norm": 0.2747630476951599, "learning_rate": 1e-05, "loss": 1.0445, "step": 3720 }, { "epoch": 3.299379982285208, "grad_norm": 0.36898380517959595, "learning_rate": 1e-05, "loss": 0.9747, "step": 3725 }, { "epoch": 3.3038086802480073, "grad_norm": 0.4518287777900696, "learning_rate": 1e-05, "loss": 1.0301, "step": 3730 }, { "epoch": 3.308237378210806, "grad_norm": 0.37403711676597595, "learning_rate": 1e-05, "loss": 1.0613, "step": 3735 }, { "epoch": 3.312666076173605, "grad_norm": 0.3976152241230011, "learning_rate": 1e-05, "loss": 0.9814, "step": 3740 }, { "epoch": 3.317094774136404, "grad_norm": 0.3707811236381531, "learning_rate": 1e-05, "loss": 1.0511, "step": 3745 }, { "epoch": 3.321523472099203, "grad_norm": 0.490507036447525, "learning_rate": 1e-05, "loss": 1.0209, "step": 3750 }, { "epoch": 3.325952170062002, "grad_norm": 0.3149888515472412, "learning_rate": 1e-05, "loss": 1.0207, "step": 3755 }, { "epoch": 3.330380868024801, "grad_norm": 0.4916192591190338, "learning_rate": 1e-05, "loss": 0.9943, "step": 3760 }, { "epoch": 3.3348095659876, "grad_norm": 0.4619700312614441, "learning_rate": 1e-05, "loss": 0.9951, "step": 3765 }, { "epoch": 3.3392382639503984, "grad_norm": 0.4629688858985901, "learning_rate": 1e-05, "loss": 1.0251, "step": 3770 }, { "epoch": 3.3436669619131973, "grad_norm": 0.4954208731651306, "learning_rate": 1e-05, "loss": 1.0067, "step": 3775 }, { "epoch": 3.3480956598759963, "grad_norm": 0.5487666726112366, "learning_rate": 1e-05, "loss": 1.0343, "step": 3780 }, { "epoch": 3.3525243578387953, "grad_norm": 0.36240583658218384, "learning_rate": 1e-05, "loss": 1.0301, "step": 3785 }, { "epoch": 3.3569530558015943, "grad_norm": 0.38365602493286133, "learning_rate": 1e-05, "loss": 1.0415, "step": 3790 }, { "epoch": 3.3613817537643933, "grad_norm": 0.32367461919784546, "learning_rate": 1e-05, "loss": 1.0428, "step": 3795 }, { "epoch": 3.3658104517271923, "grad_norm": 0.38669154047966003, "learning_rate": 1e-05, "loss": 1.0663, "step": 3800 }, { "epoch": 3.3702391496899913, "grad_norm": 0.38724860548973083, "learning_rate": 1e-05, "loss": 0.9896, "step": 3805 }, { "epoch": 3.3746678476527903, "grad_norm": 0.37929293513298035, "learning_rate": 1e-05, "loss": 1.0438, "step": 3810 }, { "epoch": 3.379096545615589, "grad_norm": 0.40706324577331543, "learning_rate": 1e-05, "loss": 1.0703, "step": 3815 }, { "epoch": 3.383525243578388, "grad_norm": 0.47311824560165405, "learning_rate": 1e-05, "loss": 0.9937, "step": 3820 }, { "epoch": 3.387953941541187, "grad_norm": 0.3395290672779083, "learning_rate": 1e-05, "loss": 1.0565, "step": 3825 }, { "epoch": 3.3923826395039858, "grad_norm": 0.4534793198108673, "learning_rate": 1e-05, "loss": 1.0226, "step": 3830 }, { "epoch": 3.3968113374667848, "grad_norm": 0.33159688115119934, "learning_rate": 1e-05, "loss": 1.0085, "step": 3835 }, { "epoch": 3.4012400354295838, "grad_norm": 0.4188603460788727, "learning_rate": 1e-05, "loss": 1.0387, "step": 3840 }, { "epoch": 3.4056687333923827, "grad_norm": 0.5187346935272217, "learning_rate": 1e-05, "loss": 1.0389, "step": 3845 }, { "epoch": 3.4100974313551817, "grad_norm": 0.4985080063343048, "learning_rate": 1e-05, "loss": 1.0507, "step": 3850 }, { "epoch": 3.4145261293179807, "grad_norm": 0.38136884570121765, "learning_rate": 1e-05, "loss": 0.9879, "step": 3855 }, { "epoch": 3.4189548272807793, "grad_norm": 0.31632256507873535, "learning_rate": 1e-05, "loss": 1.0318, "step": 3860 }, { "epoch": 3.4233835252435783, "grad_norm": 0.386005163192749, "learning_rate": 1e-05, "loss": 1.0435, "step": 3865 }, { "epoch": 3.4278122232063772, "grad_norm": 0.36566078662872314, "learning_rate": 1e-05, "loss": 0.9579, "step": 3870 }, { "epoch": 3.4322409211691762, "grad_norm": 0.389634370803833, "learning_rate": 1e-05, "loss": 1.0668, "step": 3875 }, { "epoch": 3.436669619131975, "grad_norm": 0.31114742159843445, "learning_rate": 1e-05, "loss": 0.9847, "step": 3880 }, { "epoch": 3.441098317094774, "grad_norm": 0.4838503301143646, "learning_rate": 1e-05, "loss": 0.9801, "step": 3885 }, { "epoch": 3.445527015057573, "grad_norm": 0.3525892198085785, "learning_rate": 1e-05, "loss": 0.9745, "step": 3890 }, { "epoch": 3.449955713020372, "grad_norm": 0.4195665717124939, "learning_rate": 1e-05, "loss": 1.0034, "step": 3895 }, { "epoch": 3.454384410983171, "grad_norm": 0.5166270732879639, "learning_rate": 1e-05, "loss": 0.9847, "step": 3900 }, { "epoch": 3.4588131089459697, "grad_norm": 0.3707009255886078, "learning_rate": 1e-05, "loss": 1.0629, "step": 3905 }, { "epoch": 3.4632418069087687, "grad_norm": 0.5166239738464355, "learning_rate": 1e-05, "loss": 0.9989, "step": 3910 }, { "epoch": 3.4676705048715677, "grad_norm": 0.38136765360832214, "learning_rate": 1e-05, "loss": 1.0026, "step": 3915 }, { "epoch": 3.4720992028343667, "grad_norm": 0.38072213530540466, "learning_rate": 1e-05, "loss": 1.0893, "step": 3920 }, { "epoch": 3.4765279007971657, "grad_norm": 0.3407192528247833, "learning_rate": 1e-05, "loss": 0.9904, "step": 3925 }, { "epoch": 3.4809565987599647, "grad_norm": 0.4799145758152008, "learning_rate": 1e-05, "loss": 0.9873, "step": 3930 }, { "epoch": 3.4853852967227636, "grad_norm": 0.520026445388794, "learning_rate": 1e-05, "loss": 1.0846, "step": 3935 }, { "epoch": 3.4898139946855626, "grad_norm": 0.5156550407409668, "learning_rate": 1e-05, "loss": 1.0096, "step": 3940 }, { "epoch": 3.4942426926483616, "grad_norm": 0.5291915535926819, "learning_rate": 1e-05, "loss": 1.021, "step": 3945 }, { "epoch": 3.49867139061116, "grad_norm": 0.3861340880393982, "learning_rate": 1e-05, "loss": 1.0674, "step": 3950 }, { "epoch": 3.503100088573959, "grad_norm": 0.44092243909835815, "learning_rate": 1e-05, "loss": 0.9911, "step": 3955 }, { "epoch": 3.507528786536758, "grad_norm": 0.4885048270225525, "learning_rate": 1e-05, "loss": 1.03, "step": 3960 }, { "epoch": 3.511957484499557, "grad_norm": 0.4576728940010071, "learning_rate": 1e-05, "loss": 1.0867, "step": 3965 }, { "epoch": 3.516386182462356, "grad_norm": 0.380595326423645, "learning_rate": 1e-05, "loss": 1.004, "step": 3970 }, { "epoch": 3.520814880425155, "grad_norm": 0.38781315088272095, "learning_rate": 1e-05, "loss": 1.0121, "step": 3975 }, { "epoch": 3.525243578387954, "grad_norm": 0.33252057433128357, "learning_rate": 1e-05, "loss": 0.9967, "step": 3980 }, { "epoch": 3.5296722763507526, "grad_norm": 0.3965751528739929, "learning_rate": 1e-05, "loss": 1.02, "step": 3985 }, { "epoch": 3.534100974313552, "grad_norm": 0.324137806892395, "learning_rate": 1e-05, "loss": 1.0325, "step": 3990 }, { "epoch": 3.5385296722763506, "grad_norm": 0.35043829679489136, "learning_rate": 1e-05, "loss": 1.0312, "step": 3995 }, { "epoch": 3.5429583702391496, "grad_norm": 0.2821948230266571, "learning_rate": 1e-05, "loss": 1.0524, "step": 4000 }, { "epoch": 3.5473870682019486, "grad_norm": 0.3412671685218811, "learning_rate": 1e-05, "loss": 1.0251, "step": 4005 }, { "epoch": 3.5518157661647476, "grad_norm": 0.47750186920166016, "learning_rate": 1e-05, "loss": 1.0512, "step": 4010 }, { "epoch": 3.5562444641275466, "grad_norm": 0.4440419375896454, "learning_rate": 1e-05, "loss": 1.043, "step": 4015 }, { "epoch": 3.5606731620903456, "grad_norm": 0.3451400101184845, "learning_rate": 1e-05, "loss": 1.0281, "step": 4020 }, { "epoch": 3.5651018600531446, "grad_norm": 0.4672628939151764, "learning_rate": 1e-05, "loss": 1.0341, "step": 4025 }, { "epoch": 3.569530558015943, "grad_norm": 0.4284445643424988, "learning_rate": 1e-05, "loss": 0.9683, "step": 4030 }, { "epoch": 3.573959255978742, "grad_norm": 0.45521488785743713, "learning_rate": 1e-05, "loss": 1.0337, "step": 4035 }, { "epoch": 3.578387953941541, "grad_norm": 0.35136884450912476, "learning_rate": 1e-05, "loss": 1.0349, "step": 4040 }, { "epoch": 3.58281665190434, "grad_norm": 0.37805497646331787, "learning_rate": 1e-05, "loss": 0.9973, "step": 4045 }, { "epoch": 3.587245349867139, "grad_norm": 0.5786856412887573, "learning_rate": 1e-05, "loss": 1.0367, "step": 4050 }, { "epoch": 3.591674047829938, "grad_norm": 0.38704222440719604, "learning_rate": 1e-05, "loss": 1.0292, "step": 4055 }, { "epoch": 3.596102745792737, "grad_norm": 0.41924527287483215, "learning_rate": 1e-05, "loss": 1.0571, "step": 4060 }, { "epoch": 3.600531443755536, "grad_norm": 0.4547056257724762, "learning_rate": 1e-05, "loss": 1.0481, "step": 4065 }, { "epoch": 3.604960141718335, "grad_norm": 0.3528039753437042, "learning_rate": 1e-05, "loss": 0.9891, "step": 4070 }, { "epoch": 3.6093888396811336, "grad_norm": 0.3567149043083191, "learning_rate": 1e-05, "loss": 1.0298, "step": 4075 }, { "epoch": 3.6138175376439325, "grad_norm": 0.3547581434249878, "learning_rate": 1e-05, "loss": 0.9992, "step": 4080 }, { "epoch": 3.6182462356067315, "grad_norm": 0.40108251571655273, "learning_rate": 1e-05, "loss": 1.0377, "step": 4085 }, { "epoch": 3.6226749335695305, "grad_norm": 0.3745425045490265, "learning_rate": 1e-05, "loss": 0.9933, "step": 4090 }, { "epoch": 3.6271036315323295, "grad_norm": 0.38116466999053955, "learning_rate": 1e-05, "loss": 1.0318, "step": 4095 }, { "epoch": 3.6315323294951285, "grad_norm": 0.34331002831459045, "learning_rate": 1e-05, "loss": 1.0311, "step": 4100 }, { "epoch": 3.6359610274579275, "grad_norm": 0.3361397087574005, "learning_rate": 1e-05, "loss": 1.1007, "step": 4105 }, { "epoch": 3.640389725420726, "grad_norm": 0.3507891297340393, "learning_rate": 1e-05, "loss": 1.0188, "step": 4110 }, { "epoch": 3.6448184233835255, "grad_norm": 0.32863402366638184, "learning_rate": 1e-05, "loss": 1.0293, "step": 4115 }, { "epoch": 3.649247121346324, "grad_norm": 0.34813687205314636, "learning_rate": 1e-05, "loss": 1.0022, "step": 4120 }, { "epoch": 3.653675819309123, "grad_norm": 0.28008124232292175, "learning_rate": 1e-05, "loss": 0.97, "step": 4125 }, { "epoch": 3.658104517271922, "grad_norm": 0.33971190452575684, "learning_rate": 1e-05, "loss": 1.0368, "step": 4130 }, { "epoch": 3.662533215234721, "grad_norm": 0.32608115673065186, "learning_rate": 1e-05, "loss": 1.0921, "step": 4135 }, { "epoch": 3.66696191319752, "grad_norm": 0.34819838404655457, "learning_rate": 1e-05, "loss": 1.0333, "step": 4140 }, { "epoch": 3.671390611160319, "grad_norm": 0.46355897188186646, "learning_rate": 1e-05, "loss": 1.0235, "step": 4145 }, { "epoch": 3.675819309123118, "grad_norm": 0.5182165503501892, "learning_rate": 1e-05, "loss": 1.0276, "step": 4150 }, { "epoch": 3.6802480070859165, "grad_norm": 0.34073352813720703, "learning_rate": 1e-05, "loss": 1.0493, "step": 4155 }, { "epoch": 3.684676705048716, "grad_norm": 0.460151344537735, "learning_rate": 1e-05, "loss": 1.0313, "step": 4160 }, { "epoch": 3.6891054030115145, "grad_norm": 0.45734596252441406, "learning_rate": 1e-05, "loss": 1.0062, "step": 4165 }, { "epoch": 3.6935341009743134, "grad_norm": 0.3882424831390381, "learning_rate": 1e-05, "loss": 0.9735, "step": 4170 }, { "epoch": 3.6979627989371124, "grad_norm": 0.42943891882896423, "learning_rate": 1e-05, "loss": 1.0192, "step": 4175 }, { "epoch": 3.7023914968999114, "grad_norm": 0.390083372592926, "learning_rate": 1e-05, "loss": 1.0188, "step": 4180 }, { "epoch": 3.7068201948627104, "grad_norm": 0.42090412974357605, "learning_rate": 1e-05, "loss": 1.037, "step": 4185 }, { "epoch": 3.7112488928255094, "grad_norm": 0.4169060289859772, "learning_rate": 1e-05, "loss": 1.0417, "step": 4190 }, { "epoch": 3.7156775907883084, "grad_norm": 0.32734933495521545, "learning_rate": 1e-05, "loss": 1.0452, "step": 4195 }, { "epoch": 3.720106288751107, "grad_norm": 0.29035845398902893, "learning_rate": 1e-05, "loss": 1.0485, "step": 4200 }, { "epoch": 3.7245349867139064, "grad_norm": 0.3693220317363739, "learning_rate": 1e-05, "loss": 1.0334, "step": 4205 }, { "epoch": 3.728963684676705, "grad_norm": 0.39305785298347473, "learning_rate": 1e-05, "loss": 1.0245, "step": 4210 }, { "epoch": 3.733392382639504, "grad_norm": 0.335788369178772, "learning_rate": 1e-05, "loss": 0.9679, "step": 4215 }, { "epoch": 3.737821080602303, "grad_norm": 0.41085320711135864, "learning_rate": 1e-05, "loss": 1.0559, "step": 4220 }, { "epoch": 3.742249778565102, "grad_norm": 0.3124822974205017, "learning_rate": 1e-05, "loss": 1.0083, "step": 4225 }, { "epoch": 3.746678476527901, "grad_norm": 0.5323129296302795, "learning_rate": 1e-05, "loss": 1.0152, "step": 4230 }, { "epoch": 3.7511071744907, "grad_norm": 0.40696221590042114, "learning_rate": 1e-05, "loss": 1.0174, "step": 4235 }, { "epoch": 3.755535872453499, "grad_norm": 0.4836999475955963, "learning_rate": 1e-05, "loss": 1.04, "step": 4240 }, { "epoch": 3.7599645704162974, "grad_norm": 0.33299151062965393, "learning_rate": 1e-05, "loss": 1.0555, "step": 4245 }, { "epoch": 3.7643932683790964, "grad_norm": 0.2661699950695038, "learning_rate": 1e-05, "loss": 1.0672, "step": 4250 }, { "epoch": 3.7688219663418954, "grad_norm": 0.43583810329437256, "learning_rate": 1e-05, "loss": 1.0139, "step": 4255 }, { "epoch": 3.7732506643046944, "grad_norm": 0.5268107056617737, "learning_rate": 1e-05, "loss": 1.0249, "step": 4260 }, { "epoch": 3.7776793622674933, "grad_norm": 0.4620998799800873, "learning_rate": 1e-05, "loss": 1.0392, "step": 4265 }, { "epoch": 3.7821080602302923, "grad_norm": 0.5220122337341309, "learning_rate": 1e-05, "loss": 1.0096, "step": 4270 }, { "epoch": 3.7865367581930913, "grad_norm": 0.3385135233402252, "learning_rate": 1e-05, "loss": 1.0208, "step": 4275 }, { "epoch": 3.7909654561558903, "grad_norm": 0.39374107122421265, "learning_rate": 1e-05, "loss": 0.9957, "step": 4280 }, { "epoch": 3.7953941541186893, "grad_norm": 0.29298868775367737, "learning_rate": 1e-05, "loss": 1.0903, "step": 4285 }, { "epoch": 3.799822852081488, "grad_norm": 0.3600406348705292, "learning_rate": 1e-05, "loss": 0.9961, "step": 4290 }, { "epoch": 3.804251550044287, "grad_norm": 0.374218225479126, "learning_rate": 1e-05, "loss": 1.0081, "step": 4295 }, { "epoch": 3.808680248007086, "grad_norm": 0.4733864665031433, "learning_rate": 1e-05, "loss": 1.0359, "step": 4300 }, { "epoch": 3.813108945969885, "grad_norm": 0.36350077390670776, "learning_rate": 1e-05, "loss": 1.0391, "step": 4305 }, { "epoch": 3.817537643932684, "grad_norm": 0.5348228812217712, "learning_rate": 1e-05, "loss": 1.0807, "step": 4310 }, { "epoch": 3.821966341895483, "grad_norm": 0.40654081106185913, "learning_rate": 1e-05, "loss": 1.0395, "step": 4315 }, { "epoch": 3.8263950398582818, "grad_norm": 0.4531782567501068, "learning_rate": 1e-05, "loss": 1.0087, "step": 4320 }, { "epoch": 3.8308237378210808, "grad_norm": 0.3837543725967407, "learning_rate": 1e-05, "loss": 1.0258, "step": 4325 }, { "epoch": 3.8352524357838798, "grad_norm": 0.43151649832725525, "learning_rate": 1e-05, "loss": 1.021, "step": 4330 }, { "epoch": 3.8396811337466783, "grad_norm": 0.2781372368335724, "learning_rate": 1e-05, "loss": 1.0303, "step": 4335 }, { "epoch": 3.8441098317094773, "grad_norm": 0.3800680637359619, "learning_rate": 1e-05, "loss": 1.0343, "step": 4340 }, { "epoch": 3.8485385296722763, "grad_norm": 0.38863804936408997, "learning_rate": 1e-05, "loss": 1.0154, "step": 4345 }, { "epoch": 3.8529672276350753, "grad_norm": 0.42367759346961975, "learning_rate": 1e-05, "loss": 1.0513, "step": 4350 }, { "epoch": 3.8573959255978743, "grad_norm": 0.31877681612968445, "learning_rate": 1e-05, "loss": 1.0538, "step": 4355 }, { "epoch": 3.8618246235606732, "grad_norm": 0.2576321065425873, "learning_rate": 1e-05, "loss": 1.0323, "step": 4360 }, { "epoch": 3.8662533215234722, "grad_norm": 0.39924493432044983, "learning_rate": 1e-05, "loss": 1.041, "step": 4365 }, { "epoch": 3.8706820194862708, "grad_norm": 0.31105536222457886, "learning_rate": 1e-05, "loss": 0.9939, "step": 4370 }, { "epoch": 3.87511071744907, "grad_norm": 0.37262478470802307, "learning_rate": 1e-05, "loss": 1.0558, "step": 4375 }, { "epoch": 3.8795394154118688, "grad_norm": 0.5016164183616638, "learning_rate": 1e-05, "loss": 0.9926, "step": 4380 }, { "epoch": 3.8839681133746677, "grad_norm": 0.43220239877700806, "learning_rate": 1e-05, "loss": 1.0039, "step": 4385 }, { "epoch": 3.8883968113374667, "grad_norm": 0.3780975639820099, "learning_rate": 1e-05, "loss": 1.0124, "step": 4390 }, { "epoch": 3.8928255093002657, "grad_norm": 0.3637257516384125, "learning_rate": 1e-05, "loss": 0.9572, "step": 4395 }, { "epoch": 3.8972542072630647, "grad_norm": 0.49972036480903625, "learning_rate": 1e-05, "loss": 1.0102, "step": 4400 }, { "epoch": 3.9016829052258637, "grad_norm": 0.4010217487812042, "learning_rate": 1e-05, "loss": 1.0325, "step": 4405 }, { "epoch": 3.9061116031886627, "grad_norm": 0.44208645820617676, "learning_rate": 1e-05, "loss": 1.0249, "step": 4410 }, { "epoch": 3.9105403011514612, "grad_norm": 0.41336748003959656, "learning_rate": 1e-05, "loss": 1.0216, "step": 4415 }, { "epoch": 3.9149689991142607, "grad_norm": 0.3971027135848999, "learning_rate": 1e-05, "loss": 1.0143, "step": 4420 }, { "epoch": 3.919397697077059, "grad_norm": 0.406230628490448, "learning_rate": 1e-05, "loss": 1.0288, "step": 4425 }, { "epoch": 3.923826395039858, "grad_norm": 0.3609033524990082, "learning_rate": 1e-05, "loss": 0.9994, "step": 4430 }, { "epoch": 3.928255093002657, "grad_norm": 0.4624992907047272, "learning_rate": 1e-05, "loss": 1.0066, "step": 4435 }, { "epoch": 3.932683790965456, "grad_norm": 0.4618331491947174, "learning_rate": 1e-05, "loss": 1.0447, "step": 4440 }, { "epoch": 3.937112488928255, "grad_norm": 0.3795495629310608, "learning_rate": 1e-05, "loss": 1.0274, "step": 4445 }, { "epoch": 3.941541186891054, "grad_norm": 0.3128076195716858, "learning_rate": 1e-05, "loss": 0.9971, "step": 4450 }, { "epoch": 3.945969884853853, "grad_norm": 0.3996869623661041, "learning_rate": 1e-05, "loss": 1.0264, "step": 4455 }, { "epoch": 3.9503985828166517, "grad_norm": 0.39952194690704346, "learning_rate": 1e-05, "loss": 1.035, "step": 4460 }, { "epoch": 3.954827280779451, "grad_norm": 0.3561505973339081, "learning_rate": 1e-05, "loss": 1.0083, "step": 4465 }, { "epoch": 3.9592559787422497, "grad_norm": 0.32471564412117004, "learning_rate": 1e-05, "loss": 0.9992, "step": 4470 }, { "epoch": 3.9636846767050486, "grad_norm": 0.38149505853652954, "learning_rate": 1e-05, "loss": 1.002, "step": 4475 }, { "epoch": 3.9681133746678476, "grad_norm": 0.3936023414134979, "learning_rate": 1e-05, "loss": 1.026, "step": 4480 }, { "epoch": 3.9725420726306466, "grad_norm": 0.48117342591285706, "learning_rate": 1e-05, "loss": 1.0255, "step": 4485 }, { "epoch": 3.9769707705934456, "grad_norm": 0.4666324257850647, "learning_rate": 1e-05, "loss": 0.97, "step": 4490 }, { "epoch": 3.9813994685562446, "grad_norm": 0.39509865641593933, "learning_rate": 1e-05, "loss": 0.9928, "step": 4495 }, { "epoch": 3.9858281665190436, "grad_norm": 0.4648655354976654, "learning_rate": 1e-05, "loss": 0.9579, "step": 4500 }, { "epoch": 3.990256864481842, "grad_norm": 0.3578872084617615, "learning_rate": 1e-05, "loss": 1.0051, "step": 4505 }, { "epoch": 3.994685562444641, "grad_norm": 0.30674901604652405, "learning_rate": 1e-05, "loss": 1.0559, "step": 4510 }, { "epoch": 3.99911426040744, "grad_norm": 0.3139508068561554, "learning_rate": 1e-05, "loss": 1.0363, "step": 4515 }, { "epoch": 4.0035429583702395, "grad_norm": 0.3991316854953766, "learning_rate": 1e-05, "loss": 1.0293, "step": 4520 }, { "epoch": 4.007971656333038, "grad_norm": 0.4668462574481964, "learning_rate": 1e-05, "loss": 0.9964, "step": 4525 }, { "epoch": 4.012400354295837, "grad_norm": 0.32502973079681396, "learning_rate": 1e-05, "loss": 1.0315, "step": 4530 }, { "epoch": 4.016829052258636, "grad_norm": 0.40989378094673157, "learning_rate": 1e-05, "loss": 1.0713, "step": 4535 }, { "epoch": 4.021257750221435, "grad_norm": 0.3096681535243988, "learning_rate": 1e-05, "loss": 0.9595, "step": 4540 }, { "epoch": 4.025686448184234, "grad_norm": 0.32582685351371765, "learning_rate": 1e-05, "loss": 1.0243, "step": 4545 }, { "epoch": 4.030115146147033, "grad_norm": 0.40166813135147095, "learning_rate": 1e-05, "loss": 0.985, "step": 4550 }, { "epoch": 4.034543844109832, "grad_norm": 0.4353068470954895, "learning_rate": 1e-05, "loss": 1.0168, "step": 4555 }, { "epoch": 4.038972542072631, "grad_norm": 0.32297283411026, "learning_rate": 1e-05, "loss": 0.9852, "step": 4560 }, { "epoch": 4.04340124003543, "grad_norm": 0.4393858313560486, "learning_rate": 1e-05, "loss": 1.0391, "step": 4565 }, { "epoch": 4.0478299379982285, "grad_norm": 0.319776713848114, "learning_rate": 1e-05, "loss": 1.018, "step": 4570 }, { "epoch": 4.052258635961027, "grad_norm": 0.3933486044406891, "learning_rate": 1e-05, "loss": 0.9935, "step": 4575 }, { "epoch": 4.0566873339238265, "grad_norm": 0.3859753906726837, "learning_rate": 1e-05, "loss": 1.0596, "step": 4580 }, { "epoch": 4.061116031886625, "grad_norm": 0.30829575657844543, "learning_rate": 1e-05, "loss": 1.0623, "step": 4585 }, { "epoch": 4.0655447298494245, "grad_norm": 0.42734357714653015, "learning_rate": 1e-05, "loss": 1.0327, "step": 4590 }, { "epoch": 4.069973427812223, "grad_norm": 0.3717409372329712, "learning_rate": 1e-05, "loss": 0.9994, "step": 4595 }, { "epoch": 4.0744021257750225, "grad_norm": 0.3354444205760956, "learning_rate": 1e-05, "loss": 1.0411, "step": 4600 }, { "epoch": 4.078830823737821, "grad_norm": 0.39263391494750977, "learning_rate": 1e-05, "loss": 0.9954, "step": 4605 }, { "epoch": 4.0832595217006205, "grad_norm": 0.3263624608516693, "learning_rate": 1e-05, "loss": 1.039, "step": 4610 }, { "epoch": 4.087688219663419, "grad_norm": 0.499742716550827, "learning_rate": 1e-05, "loss": 1.0247, "step": 4615 }, { "epoch": 4.0921169176262175, "grad_norm": 0.3455238342285156, "learning_rate": 1e-05, "loss": 1.029, "step": 4620 }, { "epoch": 4.096545615589017, "grad_norm": 0.4965772032737732, "learning_rate": 1e-05, "loss": 1.0289, "step": 4625 }, { "epoch": 4.1009743135518155, "grad_norm": 0.5790495276451111, "learning_rate": 1e-05, "loss": 1.0334, "step": 4630 }, { "epoch": 4.105403011514615, "grad_norm": 0.44276583194732666, "learning_rate": 1e-05, "loss": 1.0035, "step": 4635 }, { "epoch": 4.1098317094774135, "grad_norm": 0.5250453948974609, "learning_rate": 1e-05, "loss": 0.9564, "step": 4640 }, { "epoch": 4.114260407440213, "grad_norm": 0.42392274737358093, "learning_rate": 1e-05, "loss": 1.006, "step": 4645 }, { "epoch": 4.1186891054030115, "grad_norm": 0.39138004183769226, "learning_rate": 1e-05, "loss": 1.0554, "step": 4650 }, { "epoch": 4.12311780336581, "grad_norm": 0.4503367841243744, "learning_rate": 1e-05, "loss": 0.9579, "step": 4655 }, { "epoch": 4.1275465013286095, "grad_norm": 0.38771429657936096, "learning_rate": 1e-05, "loss": 0.9966, "step": 4660 }, { "epoch": 4.131975199291408, "grad_norm": 0.44300052523612976, "learning_rate": 1e-05, "loss": 0.9776, "step": 4665 }, { "epoch": 4.136403897254207, "grad_norm": 0.4752773940563202, "learning_rate": 1e-05, "loss": 1.0295, "step": 4670 }, { "epoch": 4.140832595217006, "grad_norm": 0.36452868580818176, "learning_rate": 1e-05, "loss": 1.0217, "step": 4675 }, { "epoch": 4.145261293179805, "grad_norm": 0.4705621600151062, "learning_rate": 1e-05, "loss": 1.0451, "step": 4680 }, { "epoch": 4.149689991142604, "grad_norm": 0.5243046283721924, "learning_rate": 1e-05, "loss": 1.0117, "step": 4685 }, { "epoch": 4.154118689105403, "grad_norm": 0.44808846712112427, "learning_rate": 1e-05, "loss": 1.0095, "step": 4690 }, { "epoch": 4.158547387068202, "grad_norm": 0.503298819065094, "learning_rate": 1e-05, "loss": 0.9807, "step": 4695 }, { "epoch": 4.1629760850310005, "grad_norm": 0.3674895763397217, "learning_rate": 1e-05, "loss": 1.0545, "step": 4700 }, { "epoch": 4.1674047829938, "grad_norm": 0.47434374690055847, "learning_rate": 1e-05, "loss": 0.9485, "step": 4705 }, { "epoch": 4.1718334809565985, "grad_norm": 0.36587774753570557, "learning_rate": 1e-05, "loss": 1.0298, "step": 4710 }, { "epoch": 4.176262178919398, "grad_norm": 0.30373701453208923, "learning_rate": 1e-05, "loss": 1.0342, "step": 4715 }, { "epoch": 4.180690876882196, "grad_norm": 0.3133320212364197, "learning_rate": 1e-05, "loss": 1.0308, "step": 4720 }, { "epoch": 4.185119574844996, "grad_norm": 0.412952184677124, "learning_rate": 1e-05, "loss": 1.0523, "step": 4725 }, { "epoch": 4.189548272807794, "grad_norm": 0.40483686327934265, "learning_rate": 1e-05, "loss": 1.0157, "step": 4730 }, { "epoch": 4.193976970770594, "grad_norm": 0.3221893608570099, "learning_rate": 1e-05, "loss": 0.9931, "step": 4735 }, { "epoch": 4.198405668733392, "grad_norm": 0.40332549810409546, "learning_rate": 1e-05, "loss": 1.0535, "step": 4740 }, { "epoch": 4.202834366696191, "grad_norm": 0.41692882776260376, "learning_rate": 1e-05, "loss": 1.0522, "step": 4745 }, { "epoch": 4.20726306465899, "grad_norm": 0.3916093409061432, "learning_rate": 1e-05, "loss": 1.0682, "step": 4750 }, { "epoch": 4.211691762621789, "grad_norm": 0.48341992497444153, "learning_rate": 1e-05, "loss": 1.0653, "step": 4755 }, { "epoch": 4.216120460584588, "grad_norm": 0.4701288044452667, "learning_rate": 1e-05, "loss": 0.9875, "step": 4760 }, { "epoch": 4.220549158547387, "grad_norm": 0.3812442123889923, "learning_rate": 1e-05, "loss": 1.0362, "step": 4765 }, { "epoch": 4.224977856510186, "grad_norm": 0.4342661499977112, "learning_rate": 1e-05, "loss": 1.0458, "step": 4770 }, { "epoch": 4.229406554472985, "grad_norm": 0.516766369342804, "learning_rate": 1e-05, "loss": 1.0587, "step": 4775 }, { "epoch": 4.233835252435784, "grad_norm": 0.33460134267807007, "learning_rate": 1e-05, "loss": 1.0267, "step": 4780 }, { "epoch": 4.238263950398583, "grad_norm": 0.36863911151885986, "learning_rate": 1e-05, "loss": 1.004, "step": 4785 }, { "epoch": 4.242692648361381, "grad_norm": 0.4102412760257721, "learning_rate": 1e-05, "loss": 0.9861, "step": 4790 }, { "epoch": 4.247121346324181, "grad_norm": 0.3994167447090149, "learning_rate": 1e-05, "loss": 1.0491, "step": 4795 }, { "epoch": 4.251550044286979, "grad_norm": 0.26637136936187744, "learning_rate": 1e-05, "loss": 1.0732, "step": 4800 }, { "epoch": 4.255978742249779, "grad_norm": 0.27351829409599304, "learning_rate": 1e-05, "loss": 0.9597, "step": 4805 }, { "epoch": 4.260407440212577, "grad_norm": 0.3481573164463043, "learning_rate": 1e-05, "loss": 1.0641, "step": 4810 }, { "epoch": 4.264836138175377, "grad_norm": 0.27660074830055237, "learning_rate": 1e-05, "loss": 1.006, "step": 4815 }, { "epoch": 4.269264836138175, "grad_norm": 0.3420484662055969, "learning_rate": 1e-05, "loss": 1.0894, "step": 4820 }, { "epoch": 4.273693534100975, "grad_norm": 0.4297480583190918, "learning_rate": 1e-05, "loss": 0.9823, "step": 4825 }, { "epoch": 4.278122232063773, "grad_norm": 0.3223203420639038, "learning_rate": 1e-05, "loss": 1.0168, "step": 4830 }, { "epoch": 4.282550930026572, "grad_norm": 0.45501500368118286, "learning_rate": 1e-05, "loss": 1.0137, "step": 4835 }, { "epoch": 4.286979627989371, "grad_norm": 0.37511059641838074, "learning_rate": 1e-05, "loss": 0.9942, "step": 4840 }, { "epoch": 4.29140832595217, "grad_norm": 0.29621511697769165, "learning_rate": 1e-05, "loss": 1.0036, "step": 4845 }, { "epoch": 4.295837023914969, "grad_norm": 0.36821597814559937, "learning_rate": 1e-05, "loss": 1.0176, "step": 4850 }, { "epoch": 4.300265721877768, "grad_norm": 0.29799211025238037, "learning_rate": 1e-05, "loss": 0.9897, "step": 4855 }, { "epoch": 4.304694419840567, "grad_norm": 0.31813478469848633, "learning_rate": 1e-05, "loss": 1.0205, "step": 4860 }, { "epoch": 4.309123117803366, "grad_norm": 0.3424695134162903, "learning_rate": 1e-05, "loss": 1.0118, "step": 4865 }, { "epoch": 4.313551815766164, "grad_norm": 0.35493436455726624, "learning_rate": 1e-05, "loss": 1.014, "step": 4870 }, { "epoch": 4.317980513728964, "grad_norm": 0.3901638984680176, "learning_rate": 1e-05, "loss": 1.0704, "step": 4875 }, { "epoch": 4.322409211691762, "grad_norm": 0.3743135631084442, "learning_rate": 1e-05, "loss": 1.078, "step": 4880 }, { "epoch": 4.326837909654562, "grad_norm": 0.4754677414894104, "learning_rate": 1e-05, "loss": 1.0855, "step": 4885 }, { "epoch": 4.33126660761736, "grad_norm": 0.38147851824760437, "learning_rate": 1e-05, "loss": 1.0449, "step": 4890 }, { "epoch": 4.33569530558016, "grad_norm": 0.4686789810657501, "learning_rate": 1e-05, "loss": 1.0007, "step": 4895 }, { "epoch": 4.340124003542958, "grad_norm": 0.4210522770881653, "learning_rate": 1e-05, "loss": 1.0142, "step": 4900 }, { "epoch": 4.344552701505758, "grad_norm": 0.4124254286289215, "learning_rate": 1e-05, "loss": 1.0282, "step": 4905 }, { "epoch": 4.348981399468556, "grad_norm": 0.39260315895080566, "learning_rate": 1e-05, "loss": 1.037, "step": 4910 }, { "epoch": 4.353410097431356, "grad_norm": 0.3679765462875366, "learning_rate": 1e-05, "loss": 0.9806, "step": 4915 }, { "epoch": 4.357838795394154, "grad_norm": 0.3751234710216522, "learning_rate": 1e-05, "loss": 1.0035, "step": 4920 }, { "epoch": 4.362267493356953, "grad_norm": 0.3851834237575531, "learning_rate": 1e-05, "loss": 1.0381, "step": 4925 }, { "epoch": 4.366696191319752, "grad_norm": 0.33739200234413147, "learning_rate": 1e-05, "loss": 1.0989, "step": 4930 }, { "epoch": 4.371124889282551, "grad_norm": 0.30182185769081116, "learning_rate": 1e-05, "loss": 1.0277, "step": 4935 }, { "epoch": 4.37555358724535, "grad_norm": 0.26282113790512085, "learning_rate": 1e-05, "loss": 1.0479, "step": 4940 }, { "epoch": 4.379982285208149, "grad_norm": 0.37779316306114197, "learning_rate": 1e-05, "loss": 1.0029, "step": 4945 }, { "epoch": 4.384410983170948, "grad_norm": 0.3736492693424225, "learning_rate": 1e-05, "loss": 1.0037, "step": 4950 }, { "epoch": 4.388839681133747, "grad_norm": 0.4341072142124176, "learning_rate": 1e-05, "loss": 1.0483, "step": 4955 }, { "epoch": 4.393268379096545, "grad_norm": 0.34500136971473694, "learning_rate": 1e-05, "loss": 0.9791, "step": 4960 }, { "epoch": 4.397697077059345, "grad_norm": 0.29406043887138367, "learning_rate": 1e-05, "loss": 1.0256, "step": 4965 }, { "epoch": 4.402125775022143, "grad_norm": 0.3825207054615021, "learning_rate": 1e-05, "loss": 1.05, "step": 4970 }, { "epoch": 4.406554472984943, "grad_norm": 0.34601831436157227, "learning_rate": 1e-05, "loss": 1.0179, "step": 4975 }, { "epoch": 4.410983170947741, "grad_norm": 0.30903878808021545, "learning_rate": 1e-05, "loss": 0.9996, "step": 4980 }, { "epoch": 4.415411868910541, "grad_norm": 0.35714828968048096, "learning_rate": 1e-05, "loss": 0.9774, "step": 4985 }, { "epoch": 4.419840566873339, "grad_norm": 0.47219887375831604, "learning_rate": 1e-05, "loss": 0.9538, "step": 4990 }, { "epoch": 4.424269264836139, "grad_norm": 0.37371787428855896, "learning_rate": 1e-05, "loss": 1.0335, "step": 4995 }, { "epoch": 4.428697962798937, "grad_norm": 0.3369366228580475, "learning_rate": 1e-05, "loss": 0.9735, "step": 5000 }, { "epoch": 4.433126660761736, "grad_norm": 0.2944571375846863, "learning_rate": 1e-05, "loss": 1.0388, "step": 5005 }, { "epoch": 4.437555358724535, "grad_norm": 0.3116125762462616, "learning_rate": 1e-05, "loss": 1.0226, "step": 5010 }, { "epoch": 4.441984056687334, "grad_norm": 0.31112319231033325, "learning_rate": 1e-05, "loss": 0.9839, "step": 5015 }, { "epoch": 4.446412754650133, "grad_norm": 0.3763453960418701, "learning_rate": 1e-05, "loss": 1.0616, "step": 5020 }, { "epoch": 4.450841452612932, "grad_norm": 0.3956683278083801, "learning_rate": 1e-05, "loss": 1.0688, "step": 5025 }, { "epoch": 4.455270150575731, "grad_norm": 0.47690436244010925, "learning_rate": 1e-05, "loss": 0.9946, "step": 5030 }, { "epoch": 4.45969884853853, "grad_norm": 0.41445139050483704, "learning_rate": 1e-05, "loss": 1.013, "step": 5035 }, { "epoch": 4.464127546501329, "grad_norm": 0.45534929633140564, "learning_rate": 1e-05, "loss": 1.0363, "step": 5040 }, { "epoch": 4.468556244464128, "grad_norm": 0.3589898347854614, "learning_rate": 1e-05, "loss": 1.0277, "step": 5045 }, { "epoch": 4.472984942426926, "grad_norm": 0.38962066173553467, "learning_rate": 1e-05, "loss": 1.0148, "step": 5050 }, { "epoch": 4.477413640389726, "grad_norm": 0.5035517811775208, "learning_rate": 1e-05, "loss": 0.928, "step": 5055 }, { "epoch": 4.481842338352524, "grad_norm": 0.4337952733039856, "learning_rate": 1e-05, "loss": 1.0112, "step": 5060 }, { "epoch": 4.4862710363153235, "grad_norm": 0.4255777597427368, "learning_rate": 1e-05, "loss": 0.9907, "step": 5065 }, { "epoch": 4.490699734278122, "grad_norm": 0.3382212519645691, "learning_rate": 1e-05, "loss": 0.9655, "step": 5070 }, { "epoch": 4.4951284322409215, "grad_norm": 0.3560464382171631, "learning_rate": 1e-05, "loss": 0.9885, "step": 5075 }, { "epoch": 4.49955713020372, "grad_norm": 0.3801257610321045, "learning_rate": 1e-05, "loss": 1.0553, "step": 5080 }, { "epoch": 4.503985828166519, "grad_norm": 0.4087178707122803, "learning_rate": 1e-05, "loss": 1.0257, "step": 5085 }, { "epoch": 4.508414526129318, "grad_norm": 0.32567787170410156, "learning_rate": 1e-05, "loss": 0.9672, "step": 5090 }, { "epoch": 4.512843224092117, "grad_norm": 0.2973158657550812, "learning_rate": 1e-05, "loss": 0.9897, "step": 5095 }, { "epoch": 4.517271922054916, "grad_norm": 0.40281176567077637, "learning_rate": 1e-05, "loss": 1.0071, "step": 5100 }, { "epoch": 4.5217006200177146, "grad_norm": 0.3604399263858795, "learning_rate": 1e-05, "loss": 1.1224, "step": 5105 }, { "epoch": 4.526129317980514, "grad_norm": 0.3796055018901825, "learning_rate": 1e-05, "loss": 0.9879, "step": 5110 }, { "epoch": 4.5305580159433125, "grad_norm": 0.35533571243286133, "learning_rate": 1e-05, "loss": 1.0238, "step": 5115 }, { "epoch": 4.534986713906112, "grad_norm": 0.40700632333755493, "learning_rate": 1e-05, "loss": 1.0199, "step": 5120 }, { "epoch": 4.5394154118689105, "grad_norm": 0.39343124628067017, "learning_rate": 1e-05, "loss": 1.0286, "step": 5125 }, { "epoch": 4.54384410983171, "grad_norm": 0.40103879570961, "learning_rate": 1e-05, "loss": 0.9887, "step": 5130 }, { "epoch": 4.5482728077945085, "grad_norm": 0.4494595527648926, "learning_rate": 1e-05, "loss": 1.005, "step": 5135 }, { "epoch": 4.552701505757307, "grad_norm": 0.35697704553604126, "learning_rate": 1e-05, "loss": 1.0345, "step": 5140 }, { "epoch": 4.5571302037201065, "grad_norm": 0.3446771800518036, "learning_rate": 1e-05, "loss": 1.0439, "step": 5145 }, { "epoch": 4.561558901682905, "grad_norm": 0.3937239646911621, "learning_rate": 1e-05, "loss": 1.0465, "step": 5150 }, { "epoch": 4.565987599645704, "grad_norm": 0.3969931900501251, "learning_rate": 1e-05, "loss": 0.9821, "step": 5155 }, { "epoch": 4.570416297608503, "grad_norm": 0.31436094641685486, "learning_rate": 1e-05, "loss": 1.0385, "step": 5160 }, { "epoch": 4.574844995571302, "grad_norm": 0.29186370968818665, "learning_rate": 1e-05, "loss": 0.9972, "step": 5165 }, { "epoch": 4.579273693534101, "grad_norm": 0.37925904989242554, "learning_rate": 1e-05, "loss": 1.0557, "step": 5170 }, { "epoch": 4.5837023914968995, "grad_norm": 0.342354953289032, "learning_rate": 1e-05, "loss": 1.0477, "step": 5175 }, { "epoch": 4.588131089459699, "grad_norm": 0.3777170777320862, "learning_rate": 1e-05, "loss": 1.0643, "step": 5180 }, { "epoch": 4.5925597874224975, "grad_norm": 0.28083372116088867, "learning_rate": 1e-05, "loss": 1.0472, "step": 5185 }, { "epoch": 4.596988485385297, "grad_norm": 0.4039880931377411, "learning_rate": 1e-05, "loss": 1.0236, "step": 5190 }, { "epoch": 4.6014171833480955, "grad_norm": 0.41336092352867126, "learning_rate": 1e-05, "loss": 1.0065, "step": 5195 }, { "epoch": 4.605845881310895, "grad_norm": 0.30506569147109985, "learning_rate": 1e-05, "loss": 1.0062, "step": 5200 }, { "epoch": 4.610274579273693, "grad_norm": 0.33881711959838867, "learning_rate": 1e-05, "loss": 1.0301, "step": 5205 }, { "epoch": 4.614703277236492, "grad_norm": 0.30085551738739014, "learning_rate": 1e-05, "loss": 1.0335, "step": 5210 }, { "epoch": 4.619131975199291, "grad_norm": 0.28912144899368286, "learning_rate": 1e-05, "loss": 1.0556, "step": 5215 }, { "epoch": 4.62356067316209, "grad_norm": 0.3428463935852051, "learning_rate": 1e-05, "loss": 1.0124, "step": 5220 }, { "epoch": 4.627989371124889, "grad_norm": 0.3984996974468231, "learning_rate": 1e-05, "loss": 1.0655, "step": 5225 }, { "epoch": 4.632418069087688, "grad_norm": 0.39433929324150085, "learning_rate": 1e-05, "loss": 1.0554, "step": 5230 }, { "epoch": 4.636846767050487, "grad_norm": 0.3705815374851227, "learning_rate": 1e-05, "loss": 1.0477, "step": 5235 }, { "epoch": 4.641275465013286, "grad_norm": 0.366614431142807, "learning_rate": 1e-05, "loss": 0.9706, "step": 5240 }, { "epoch": 4.645704162976085, "grad_norm": 0.45391011238098145, "learning_rate": 1e-05, "loss": 1.0658, "step": 5245 }, { "epoch": 4.650132860938884, "grad_norm": 0.32938307523727417, "learning_rate": 1e-05, "loss": 0.9645, "step": 5250 }, { "epoch": 4.654561558901683, "grad_norm": 0.36539316177368164, "learning_rate": 1e-05, "loss": 1.0035, "step": 5255 }, { "epoch": 4.658990256864482, "grad_norm": 0.3959774076938629, "learning_rate": 1e-05, "loss": 1.0238, "step": 5260 }, { "epoch": 4.66341895482728, "grad_norm": 0.3841852843761444, "learning_rate": 1e-05, "loss": 1.0748, "step": 5265 }, { "epoch": 4.66784765279008, "grad_norm": 0.3290482461452484, "learning_rate": 1e-05, "loss": 1.0626, "step": 5270 }, { "epoch": 4.672276350752878, "grad_norm": 0.47915369272232056, "learning_rate": 1e-05, "loss": 1.0348, "step": 5275 }, { "epoch": 4.676705048715678, "grad_norm": 0.565383791923523, "learning_rate": 1e-05, "loss": 0.9787, "step": 5280 }, { "epoch": 4.681133746678476, "grad_norm": 0.3323745131492615, "learning_rate": 1e-05, "loss": 1.0176, "step": 5285 }, { "epoch": 4.685562444641276, "grad_norm": 0.37112122774124146, "learning_rate": 1e-05, "loss": 0.9539, "step": 5290 }, { "epoch": 4.689991142604074, "grad_norm": 0.38679757714271545, "learning_rate": 1e-05, "loss": 1.0413, "step": 5295 }, { "epoch": 4.694419840566873, "grad_norm": 0.3508121371269226, "learning_rate": 1e-05, "loss": 1.0077, "step": 5300 }, { "epoch": 4.698848538529672, "grad_norm": 0.4351249039173126, "learning_rate": 1e-05, "loss": 0.9674, "step": 5305 }, { "epoch": 4.703277236492471, "grad_norm": 0.42216530442237854, "learning_rate": 1e-05, "loss": 1.0333, "step": 5310 }, { "epoch": 4.70770593445527, "grad_norm": 0.3658766448497772, "learning_rate": 1e-05, "loss": 0.9804, "step": 5315 }, { "epoch": 4.712134632418069, "grad_norm": 0.3895967900753021, "learning_rate": 1e-05, "loss": 1.0463, "step": 5320 }, { "epoch": 4.716563330380868, "grad_norm": 0.2990342080593109, "learning_rate": 1e-05, "loss": 1.0478, "step": 5325 }, { "epoch": 4.720992028343667, "grad_norm": 0.3200947940349579, "learning_rate": 1e-05, "loss": 0.9807, "step": 5330 }, { "epoch": 4.725420726306466, "grad_norm": 0.3705001175403595, "learning_rate": 1e-05, "loss": 1.0254, "step": 5335 }, { "epoch": 4.729849424269265, "grad_norm": 0.29840797185897827, "learning_rate": 1e-05, "loss": 1.0335, "step": 5340 }, { "epoch": 4.734278122232064, "grad_norm": 0.2868799865245819, "learning_rate": 1e-05, "loss": 1.0111, "step": 5345 }, { "epoch": 4.738706820194863, "grad_norm": 0.3441306948661804, "learning_rate": 1e-05, "loss": 1.0187, "step": 5350 }, { "epoch": 4.743135518157661, "grad_norm": 0.3503575026988983, "learning_rate": 1e-05, "loss": 0.9918, "step": 5355 }, { "epoch": 4.747564216120461, "grad_norm": 0.36174270510673523, "learning_rate": 1e-05, "loss": 1.0814, "step": 5360 }, { "epoch": 4.751992914083259, "grad_norm": 0.37903475761413574, "learning_rate": 1e-05, "loss": 1.0349, "step": 5365 }, { "epoch": 4.756421612046059, "grad_norm": 0.300153523683548, "learning_rate": 1e-05, "loss": 1.0129, "step": 5370 }, { "epoch": 4.760850310008857, "grad_norm": 0.45964720845222473, "learning_rate": 1e-05, "loss": 1.0144, "step": 5375 }, { "epoch": 4.765279007971657, "grad_norm": 0.33609259128570557, "learning_rate": 1e-05, "loss": 1.0273, "step": 5380 }, { "epoch": 4.769707705934455, "grad_norm": 0.389454185962677, "learning_rate": 1e-05, "loss": 1.0324, "step": 5385 }, { "epoch": 4.774136403897254, "grad_norm": 0.3468781113624573, "learning_rate": 1e-05, "loss": 1.049, "step": 5390 }, { "epoch": 4.778565101860053, "grad_norm": 0.3363134562969208, "learning_rate": 1e-05, "loss": 0.9939, "step": 5395 }, { "epoch": 4.782993799822852, "grad_norm": 0.43030187487602234, "learning_rate": 1e-05, "loss": 1.0199, "step": 5400 }, { "epoch": 4.787422497785651, "grad_norm": 0.30518239736557007, "learning_rate": 1e-05, "loss": 1.0708, "step": 5405 }, { "epoch": 4.79185119574845, "grad_norm": 0.3768819570541382, "learning_rate": 1e-05, "loss": 1.0709, "step": 5410 }, { "epoch": 4.796279893711249, "grad_norm": 0.26274043321609497, "learning_rate": 1e-05, "loss": 0.9877, "step": 5415 }, { "epoch": 4.800708591674048, "grad_norm": 0.28756001591682434, "learning_rate": 1e-05, "loss": 0.9762, "step": 5420 }, { "epoch": 4.805137289636846, "grad_norm": 0.3681223392486572, "learning_rate": 1e-05, "loss": 0.974, "step": 5425 }, { "epoch": 4.809565987599646, "grad_norm": 0.3625973165035248, "learning_rate": 1e-05, "loss": 0.9991, "step": 5430 }, { "epoch": 4.813994685562444, "grad_norm": 0.34455597400665283, "learning_rate": 1e-05, "loss": 1.0193, "step": 5435 }, { "epoch": 4.818423383525244, "grad_norm": 0.4125065207481384, "learning_rate": 1e-05, "loss": 1.0597, "step": 5440 }, { "epoch": 4.822852081488042, "grad_norm": 0.5526455640792847, "learning_rate": 1e-05, "loss": 0.9891, "step": 5445 }, { "epoch": 4.827280779450842, "grad_norm": 0.35034605860710144, "learning_rate": 1e-05, "loss": 1.0274, "step": 5450 }, { "epoch": 4.83170947741364, "grad_norm": 0.3632095754146576, "learning_rate": 1e-05, "loss": 1.031, "step": 5455 }, { "epoch": 4.83613817537644, "grad_norm": 0.4786195158958435, "learning_rate": 1e-05, "loss": 1.0573, "step": 5460 }, { "epoch": 4.840566873339238, "grad_norm": 0.328865647315979, "learning_rate": 1e-05, "loss": 1.0393, "step": 5465 }, { "epoch": 4.844995571302038, "grad_norm": 0.3569698929786682, "learning_rate": 1e-05, "loss": 1.052, "step": 5470 }, { "epoch": 4.849424269264836, "grad_norm": 0.3329259753227234, "learning_rate": 1e-05, "loss": 0.9511, "step": 5475 }, { "epoch": 4.853852967227635, "grad_norm": 0.4242902994155884, "learning_rate": 1e-05, "loss": 0.9911, "step": 5480 }, { "epoch": 4.858281665190434, "grad_norm": 0.3652363419532776, "learning_rate": 1e-05, "loss": 1.0326, "step": 5485 }, { "epoch": 4.862710363153233, "grad_norm": 0.35778918862342834, "learning_rate": 1e-05, "loss": 0.9759, "step": 5490 }, { "epoch": 4.867139061116032, "grad_norm": 0.3289540410041809, "learning_rate": 1e-05, "loss": 1.0494, "step": 5495 }, { "epoch": 4.871567759078831, "grad_norm": 0.34093955159187317, "learning_rate": 1e-05, "loss": 0.9683, "step": 5500 }, { "epoch": 4.87599645704163, "grad_norm": 0.46902990341186523, "learning_rate": 1e-05, "loss": 1.0458, "step": 5505 }, { "epoch": 4.880425155004429, "grad_norm": 0.3542724847793579, "learning_rate": 1e-05, "loss": 1.0049, "step": 5510 }, { "epoch": 4.884853852967227, "grad_norm": 0.2924833297729492, "learning_rate": 1e-05, "loss": 1.0801, "step": 5515 }, { "epoch": 4.889282550930027, "grad_norm": 0.28937458992004395, "learning_rate": 1e-05, "loss": 0.9733, "step": 5520 }, { "epoch": 4.893711248892825, "grad_norm": 0.3082599937915802, "learning_rate": 1e-05, "loss": 1.0456, "step": 5525 }, { "epoch": 4.898139946855625, "grad_norm": 0.34604334831237793, "learning_rate": 1e-05, "loss": 0.9681, "step": 5530 }, { "epoch": 4.902568644818423, "grad_norm": 0.3090795576572418, "learning_rate": 1e-05, "loss": 1.0032, "step": 5535 }, { "epoch": 4.906997342781223, "grad_norm": 0.41077151894569397, "learning_rate": 1e-05, "loss": 1.0241, "step": 5540 }, { "epoch": 4.911426040744021, "grad_norm": 0.4031723141670227, "learning_rate": 1e-05, "loss": 1.0385, "step": 5545 }, { "epoch": 4.9158547387068205, "grad_norm": 0.4754953980445862, "learning_rate": 1e-05, "loss": 0.9347, "step": 5550 }, { "epoch": 4.920283436669619, "grad_norm": 0.43536168336868286, "learning_rate": 1e-05, "loss": 1.0214, "step": 5555 }, { "epoch": 4.9247121346324185, "grad_norm": 0.49753400683403015, "learning_rate": 1e-05, "loss": 1.0023, "step": 5560 }, { "epoch": 4.929140832595217, "grad_norm": 0.43414339423179626, "learning_rate": 1e-05, "loss": 1.0569, "step": 5565 }, { "epoch": 4.933569530558016, "grad_norm": 0.4094363749027252, "learning_rate": 1e-05, "loss": 1.0604, "step": 5570 }, { "epoch": 4.937998228520815, "grad_norm": 0.41692155599594116, "learning_rate": 1e-05, "loss": 1.018, "step": 5575 }, { "epoch": 4.942426926483614, "grad_norm": 0.38908472657203674, "learning_rate": 1e-05, "loss": 1.0606, "step": 5580 }, { "epoch": 4.946855624446413, "grad_norm": 0.3818272650241852, "learning_rate": 1e-05, "loss": 1.0042, "step": 5585 }, { "epoch": 4.951284322409212, "grad_norm": 0.3907962441444397, "learning_rate": 1e-05, "loss": 0.9875, "step": 5590 }, { "epoch": 4.955713020372011, "grad_norm": 0.33932164311408997, "learning_rate": 1e-05, "loss": 0.9696, "step": 5595 }, { "epoch": 4.9601417183348095, "grad_norm": 0.31713759899139404, "learning_rate": 1e-05, "loss": 1.0512, "step": 5600 }, { "epoch": 4.964570416297608, "grad_norm": 0.31604087352752686, "learning_rate": 1e-05, "loss": 0.959, "step": 5605 }, { "epoch": 4.9689991142604075, "grad_norm": 0.38807857036590576, "learning_rate": 1e-05, "loss": 1.0524, "step": 5610 }, { "epoch": 4.973427812223206, "grad_norm": 0.3664707839488983, "learning_rate": 1e-05, "loss": 1.0314, "step": 5615 }, { "epoch": 4.9778565101860055, "grad_norm": 0.40883156657218933, "learning_rate": 1e-05, "loss": 1.0402, "step": 5620 }, { "epoch": 4.982285208148804, "grad_norm": 0.40304985642433167, "learning_rate": 1e-05, "loss": 1.014, "step": 5625 }, { "epoch": 4.9867139061116035, "grad_norm": 0.37292009592056274, "learning_rate": 1e-05, "loss": 0.9857, "step": 5630 }, { "epoch": 4.991142604074402, "grad_norm": 0.46004778146743774, "learning_rate": 1e-05, "loss": 0.9865, "step": 5635 }, { "epoch": 4.995571302037201, "grad_norm": 0.3352963328361511, "learning_rate": 1e-05, "loss": 0.9941, "step": 5640 }, { "epoch": 5.0, "grad_norm": 0.28555047512054443, "learning_rate": 1e-05, "loss": 0.9875, "step": 5645 }, { "epoch": 5.0044286979627985, "grad_norm": 0.35492247343063354, "learning_rate": 1e-05, "loss": 0.9689, "step": 5650 }, { "epoch": 5.008857395925598, "grad_norm": 0.41121748089790344, "learning_rate": 1e-05, "loss": 1.0374, "step": 5655 }, { "epoch": 5.0132860938883965, "grad_norm": 0.3643803298473358, "learning_rate": 1e-05, "loss": 0.9788, "step": 5660 }, { "epoch": 5.017714791851196, "grad_norm": 0.373393714427948, "learning_rate": 1e-05, "loss": 1.0188, "step": 5665 }, { "epoch": 5.0221434898139945, "grad_norm": 0.44527116417884827, "learning_rate": 1e-05, "loss": 0.9633, "step": 5670 }, { "epoch": 5.026572187776794, "grad_norm": 0.3466438353061676, "learning_rate": 1e-05, "loss": 1.043, "step": 5675 }, { "epoch": 5.0310008857395925, "grad_norm": 0.36244356632232666, "learning_rate": 1e-05, "loss": 1.0249, "step": 5680 }, { "epoch": 5.035429583702392, "grad_norm": 0.39741718769073486, "learning_rate": 1e-05, "loss": 1.036, "step": 5685 }, { "epoch": 5.0398582816651905, "grad_norm": 0.28051066398620605, "learning_rate": 1e-05, "loss": 1.0045, "step": 5690 }, { "epoch": 5.044286979627989, "grad_norm": 0.3079621195793152, "learning_rate": 1e-05, "loss": 1.0129, "step": 5695 }, { "epoch": 5.048715677590788, "grad_norm": 0.3123464584350586, "learning_rate": 1e-05, "loss": 1.0388, "step": 5700 }, { "epoch": 5.053144375553587, "grad_norm": 0.40218472480773926, "learning_rate": 1e-05, "loss": 1.0483, "step": 5705 }, { "epoch": 5.057573073516386, "grad_norm": 0.3287288248538971, "learning_rate": 1e-05, "loss": 1.0642, "step": 5710 }, { "epoch": 5.062001771479185, "grad_norm": 0.30074939131736755, "learning_rate": 1e-05, "loss": 1.0579, "step": 5715 }, { "epoch": 5.066430469441984, "grad_norm": 0.37240633368492126, "learning_rate": 1e-05, "loss": 1.0123, "step": 5720 }, { "epoch": 5.070859167404783, "grad_norm": 0.3434273600578308, "learning_rate": 1e-05, "loss": 0.9954, "step": 5725 }, { "epoch": 5.075287865367582, "grad_norm": 0.44496390223503113, "learning_rate": 1e-05, "loss": 0.9956, "step": 5730 }, { "epoch": 5.079716563330381, "grad_norm": 0.3801027834415436, "learning_rate": 1e-05, "loss": 1.0091, "step": 5735 }, { "epoch": 5.0841452612931795, "grad_norm": 0.3687537908554077, "learning_rate": 1e-05, "loss": 0.9908, "step": 5740 }, { "epoch": 5.088573959255979, "grad_norm": 0.3070183992385864, "learning_rate": 1e-05, "loss": 0.9813, "step": 5745 }, { "epoch": 5.093002657218777, "grad_norm": 0.2721776068210602, "learning_rate": 1e-05, "loss": 1.0342, "step": 5750 }, { "epoch": 5.097431355181577, "grad_norm": 0.43468162417411804, "learning_rate": 1e-05, "loss": 1.0439, "step": 5755 }, { "epoch": 5.101860053144375, "grad_norm": 0.4132360816001892, "learning_rate": 1e-05, "loss": 1.0348, "step": 5760 }, { "epoch": 5.106288751107175, "grad_norm": 0.34460538625717163, "learning_rate": 1e-05, "loss": 0.9895, "step": 5765 }, { "epoch": 5.110717449069973, "grad_norm": 0.31522879004478455, "learning_rate": 1e-05, "loss": 1.0915, "step": 5770 }, { "epoch": 5.115146147032772, "grad_norm": 0.40316328406333923, "learning_rate": 1e-05, "loss": 1.0537, "step": 5775 }, { "epoch": 5.119574844995571, "grad_norm": 0.3921210765838623, "learning_rate": 1e-05, "loss": 1.0297, "step": 5780 }, { "epoch": 5.12400354295837, "grad_norm": 0.3546719253063202, "learning_rate": 1e-05, "loss": 0.9982, "step": 5785 }, { "epoch": 5.128432240921169, "grad_norm": 0.29872897267341614, "learning_rate": 1e-05, "loss": 1.0053, "step": 5790 }, { "epoch": 5.132860938883968, "grad_norm": 0.316112220287323, "learning_rate": 1e-05, "loss": 1.016, "step": 5795 }, { "epoch": 5.137289636846767, "grad_norm": 0.4852432310581207, "learning_rate": 1e-05, "loss": 0.9966, "step": 5800 }, { "epoch": 5.141718334809566, "grad_norm": 0.46665048599243164, "learning_rate": 1e-05, "loss": 1.023, "step": 5805 }, { "epoch": 5.146147032772365, "grad_norm": 0.30212077498435974, "learning_rate": 1e-05, "loss": 0.9988, "step": 5810 }, { "epoch": 5.150575730735164, "grad_norm": 0.37133854627609253, "learning_rate": 1e-05, "loss": 1.0369, "step": 5815 }, { "epoch": 5.155004428697962, "grad_norm": 0.3628295958042145, "learning_rate": 1e-05, "loss": 1.0044, "step": 5820 }, { "epoch": 5.159433126660762, "grad_norm": 0.5432576537132263, "learning_rate": 1e-05, "loss": 1.002, "step": 5825 }, { "epoch": 5.16386182462356, "grad_norm": 0.36904704570770264, "learning_rate": 1e-05, "loss": 0.9977, "step": 5830 }, { "epoch": 5.16829052258636, "grad_norm": 0.2848658859729767, "learning_rate": 1e-05, "loss": 0.9523, "step": 5835 }, { "epoch": 5.172719220549158, "grad_norm": 0.3406495451927185, "learning_rate": 1e-05, "loss": 0.9749, "step": 5840 }, { "epoch": 5.177147918511958, "grad_norm": 0.32348519563674927, "learning_rate": 1e-05, "loss": 0.9716, "step": 5845 }, { "epoch": 5.181576616474756, "grad_norm": 0.3358650803565979, "learning_rate": 1e-05, "loss": 1.0514, "step": 5850 }, { "epoch": 5.186005314437556, "grad_norm": 0.32775822281837463, "learning_rate": 1e-05, "loss": 1.0133, "step": 5855 }, { "epoch": 5.190434012400354, "grad_norm": 0.42163965106010437, "learning_rate": 1e-05, "loss": 1.0257, "step": 5860 }, { "epoch": 5.194862710363153, "grad_norm": 0.30723923444747925, "learning_rate": 1e-05, "loss": 1.0191, "step": 5865 }, { "epoch": 5.199291408325952, "grad_norm": 0.44126787781715393, "learning_rate": 1e-05, "loss": 1.0324, "step": 5870 }, { "epoch": 5.203720106288751, "grad_norm": 0.4131248891353607, "learning_rate": 1e-05, "loss": 0.9997, "step": 5875 }, { "epoch": 5.20814880425155, "grad_norm": 0.29048019647598267, "learning_rate": 1e-05, "loss": 1.0201, "step": 5880 }, { "epoch": 5.212577502214349, "grad_norm": 0.3277253806591034, "learning_rate": 1e-05, "loss": 1.0173, "step": 5885 }, { "epoch": 5.217006200177148, "grad_norm": 0.2832876145839691, "learning_rate": 1e-05, "loss": 1.0165, "step": 5890 }, { "epoch": 5.221434898139947, "grad_norm": 0.4724528193473816, "learning_rate": 1e-05, "loss": 1.0942, "step": 5895 }, { "epoch": 5.225863596102746, "grad_norm": 0.4407491087913513, "learning_rate": 1e-05, "loss": 1.0035, "step": 5900 }, { "epoch": 5.230292294065545, "grad_norm": 0.3977293074131012, "learning_rate": 1e-05, "loss": 1.0298, "step": 5905 }, { "epoch": 5.234720992028343, "grad_norm": 0.35610103607177734, "learning_rate": 1e-05, "loss": 1.0098, "step": 5910 }, { "epoch": 5.239149689991143, "grad_norm": 0.3498757779598236, "learning_rate": 1e-05, "loss": 1.0089, "step": 5915 }, { "epoch": 5.243578387953941, "grad_norm": 0.35207265615463257, "learning_rate": 1e-05, "loss": 0.998, "step": 5920 }, { "epoch": 5.248007085916741, "grad_norm": 0.287803590297699, "learning_rate": 1e-05, "loss": 1.0526, "step": 5925 }, { "epoch": 5.252435783879539, "grad_norm": 0.30702874064445496, "learning_rate": 1e-05, "loss": 1.0434, "step": 5930 }, { "epoch": 5.256864481842339, "grad_norm": 0.3165454566478729, "learning_rate": 1e-05, "loss": 1.0441, "step": 5935 }, { "epoch": 5.261293179805137, "grad_norm": 0.3398440182209015, "learning_rate": 1e-05, "loss": 1.0405, "step": 5940 }, { "epoch": 5.265721877767936, "grad_norm": 0.29266223311424255, "learning_rate": 1e-05, "loss": 1.0724, "step": 5945 }, { "epoch": 5.270150575730735, "grad_norm": 0.46261587738990784, "learning_rate": 1e-05, "loss": 1.0193, "step": 5950 }, { "epoch": 5.274579273693534, "grad_norm": 0.3286849856376648, "learning_rate": 1e-05, "loss": 0.9876, "step": 5955 }, { "epoch": 5.279007971656333, "grad_norm": 0.3285291790962219, "learning_rate": 1e-05, "loss": 0.945, "step": 5960 }, { "epoch": 5.283436669619132, "grad_norm": 0.34139150381088257, "learning_rate": 1e-05, "loss": 0.9544, "step": 5965 }, { "epoch": 5.287865367581931, "grad_norm": 0.41966500878334045, "learning_rate": 1e-05, "loss": 0.9946, "step": 5970 }, { "epoch": 5.29229406554473, "grad_norm": 0.47533154487609863, "learning_rate": 1e-05, "loss": 1.0204, "step": 5975 }, { "epoch": 5.296722763507529, "grad_norm": 0.34932243824005127, "learning_rate": 1e-05, "loss": 1.0951, "step": 5980 }, { "epoch": 5.301151461470328, "grad_norm": 0.3282047510147095, "learning_rate": 1e-05, "loss": 0.998, "step": 5985 }, { "epoch": 5.305580159433127, "grad_norm": 0.382445752620697, "learning_rate": 1e-05, "loss": 0.9552, "step": 5990 }, { "epoch": 5.310008857395926, "grad_norm": 0.39635875821113586, "learning_rate": 1e-05, "loss": 1.0158, "step": 5995 }, { "epoch": 5.314437555358724, "grad_norm": 0.5262877941131592, "learning_rate": 1e-05, "loss": 1.0692, "step": 6000 }, { "epoch": 5.318866253321524, "grad_norm": 0.4436338245868683, "learning_rate": 1e-05, "loss": 0.9783, "step": 6005 }, { "epoch": 5.323294951284322, "grad_norm": 0.26665282249450684, "learning_rate": 1e-05, "loss": 1.0315, "step": 6010 }, { "epoch": 5.327723649247122, "grad_norm": 0.27709686756134033, "learning_rate": 1e-05, "loss": 1.0408, "step": 6015 }, { "epoch": 5.33215234720992, "grad_norm": 0.25978901982307434, "learning_rate": 1e-05, "loss": 0.9783, "step": 6020 }, { "epoch": 5.33658104517272, "grad_norm": 0.4198380708694458, "learning_rate": 1e-05, "loss": 1.0478, "step": 6025 }, { "epoch": 5.341009743135518, "grad_norm": 0.35256171226501465, "learning_rate": 1e-05, "loss": 1.0658, "step": 6030 }, { "epoch": 5.345438441098317, "grad_norm": 0.3989357054233551, "learning_rate": 1e-05, "loss": 1.0334, "step": 6035 }, { "epoch": 5.349867139061116, "grad_norm": 0.3216528296470642, "learning_rate": 1e-05, "loss": 1.0454, "step": 6040 }, { "epoch": 5.354295837023915, "grad_norm": 0.3307344913482666, "learning_rate": 1e-05, "loss": 1.0137, "step": 6045 }, { "epoch": 5.358724534986714, "grad_norm": 0.3836325407028198, "learning_rate": 1e-05, "loss": 0.9952, "step": 6050 }, { "epoch": 5.363153232949513, "grad_norm": 0.29982152581214905, "learning_rate": 1e-05, "loss": 0.9876, "step": 6055 }, { "epoch": 5.367581930912312, "grad_norm": 0.3325580060482025, "learning_rate": 1e-05, "loss": 1.0861, "step": 6060 }, { "epoch": 5.372010628875111, "grad_norm": 0.3494728207588196, "learning_rate": 1e-05, "loss": 1.0044, "step": 6065 }, { "epoch": 5.37643932683791, "grad_norm": 0.3120420575141907, "learning_rate": 1e-05, "loss": 0.9633, "step": 6070 }, { "epoch": 5.380868024800709, "grad_norm": 0.2987075448036194, "learning_rate": 1e-05, "loss": 1.0438, "step": 6075 }, { "epoch": 5.385296722763507, "grad_norm": 0.28469255566596985, "learning_rate": 1e-05, "loss": 1.0209, "step": 6080 }, { "epoch": 5.389725420726307, "grad_norm": 0.333445280790329, "learning_rate": 1e-05, "loss": 1.0366, "step": 6085 }, { "epoch": 5.394154118689105, "grad_norm": 0.3890826404094696, "learning_rate": 1e-05, "loss": 0.99, "step": 6090 }, { "epoch": 5.3985828166519045, "grad_norm": 0.26968681812286377, "learning_rate": 1e-05, "loss": 0.9702, "step": 6095 }, { "epoch": 5.403011514614703, "grad_norm": 0.30162420868873596, "learning_rate": 1e-05, "loss": 1.0351, "step": 6100 }, { "epoch": 5.4074402125775025, "grad_norm": 0.3432179391384125, "learning_rate": 1e-05, "loss": 0.9585, "step": 6105 }, { "epoch": 5.411868910540301, "grad_norm": 0.3125027120113373, "learning_rate": 1e-05, "loss": 0.9931, "step": 6110 }, { "epoch": 5.4162976085031005, "grad_norm": 0.3560992479324341, "learning_rate": 1e-05, "loss": 0.9678, "step": 6115 }, { "epoch": 5.420726306465899, "grad_norm": 0.33001136779785156, "learning_rate": 1e-05, "loss": 0.9965, "step": 6120 }, { "epoch": 5.425155004428698, "grad_norm": 0.32153934240341187, "learning_rate": 1e-05, "loss": 0.9859, "step": 6125 }, { "epoch": 5.429583702391497, "grad_norm": 0.3118373453617096, "learning_rate": 1e-05, "loss": 0.9631, "step": 6130 }, { "epoch": 5.434012400354296, "grad_norm": 0.2876591980457306, "learning_rate": 1e-05, "loss": 0.986, "step": 6135 }, { "epoch": 5.438441098317095, "grad_norm": 0.29356780648231506, "learning_rate": 1e-05, "loss": 1.0502, "step": 6140 }, { "epoch": 5.4428697962798935, "grad_norm": 0.350515753030777, "learning_rate": 1e-05, "loss": 1.0198, "step": 6145 }, { "epoch": 5.447298494242693, "grad_norm": 0.388534814119339, "learning_rate": 1e-05, "loss": 0.9944, "step": 6150 }, { "epoch": 5.4517271922054915, "grad_norm": 0.40516066551208496, "learning_rate": 1e-05, "loss": 1.0262, "step": 6155 }, { "epoch": 5.45615589016829, "grad_norm": 0.4120570421218872, "learning_rate": 1e-05, "loss": 0.9993, "step": 6160 }, { "epoch": 5.4605845881310895, "grad_norm": 0.5171772241592407, "learning_rate": 1e-05, "loss": 0.967, "step": 6165 }, { "epoch": 5.465013286093888, "grad_norm": 0.28847429156303406, "learning_rate": 1e-05, "loss": 1.0448, "step": 6170 }, { "epoch": 5.4694419840566875, "grad_norm": 0.43600407242774963, "learning_rate": 1e-05, "loss": 0.9472, "step": 6175 }, { "epoch": 5.473870682019486, "grad_norm": 0.5186824202537537, "learning_rate": 1e-05, "loss": 0.9959, "step": 6180 }, { "epoch": 5.4782993799822854, "grad_norm": 0.3558749854564667, "learning_rate": 1e-05, "loss": 1.0511, "step": 6185 }, { "epoch": 5.482728077945084, "grad_norm": 0.34864541888237, "learning_rate": 1e-05, "loss": 1.0156, "step": 6190 }, { "epoch": 5.487156775907883, "grad_norm": 0.3080834448337555, "learning_rate": 1e-05, "loss": 1.0381, "step": 6195 }, { "epoch": 5.491585473870682, "grad_norm": 0.3084205985069275, "learning_rate": 1e-05, "loss": 1.0515, "step": 6200 }, { "epoch": 5.496014171833481, "grad_norm": 0.33310824632644653, "learning_rate": 1e-05, "loss": 0.9777, "step": 6205 }, { "epoch": 5.50044286979628, "grad_norm": 0.28664156794548035, "learning_rate": 1e-05, "loss": 1.0429, "step": 6210 }, { "epoch": 5.5048715677590785, "grad_norm": 0.43311434984207153, "learning_rate": 1e-05, "loss": 1.0374, "step": 6215 }, { "epoch": 5.509300265721878, "grad_norm": 0.3207617998123169, "learning_rate": 1e-05, "loss": 1.0134, "step": 6220 }, { "epoch": 5.5137289636846765, "grad_norm": 0.3104935884475708, "learning_rate": 1e-05, "loss": 1.0223, "step": 6225 }, { "epoch": 5.518157661647476, "grad_norm": 0.3671448528766632, "learning_rate": 1e-05, "loss": 0.9916, "step": 6230 }, { "epoch": 5.522586359610274, "grad_norm": 0.2939125895500183, "learning_rate": 1e-05, "loss": 1.0523, "step": 6235 }, { "epoch": 5.527015057573074, "grad_norm": 0.2815743088722229, "learning_rate": 1e-05, "loss": 0.9998, "step": 6240 }, { "epoch": 5.531443755535872, "grad_norm": 0.34176525473594666, "learning_rate": 1e-05, "loss": 1.0075, "step": 6245 }, { "epoch": 5.535872453498671, "grad_norm": 0.3273089528083801, "learning_rate": 1e-05, "loss": 1.0284, "step": 6250 }, { "epoch": 5.54030115146147, "grad_norm": 0.2730557918548584, "learning_rate": 1e-05, "loss": 0.9745, "step": 6255 }, { "epoch": 5.544729849424269, "grad_norm": 0.3903951048851013, "learning_rate": 1e-05, "loss": 1.0465, "step": 6260 }, { "epoch": 5.549158547387068, "grad_norm": 0.3390137553215027, "learning_rate": 1e-05, "loss": 1.0048, "step": 6265 }, { "epoch": 5.553587245349867, "grad_norm": 0.46740761399269104, "learning_rate": 1e-05, "loss": 1.0443, "step": 6270 }, { "epoch": 5.558015943312666, "grad_norm": 0.4413903057575226, "learning_rate": 1e-05, "loss": 0.994, "step": 6275 }, { "epoch": 5.562444641275465, "grad_norm": 0.3875576853752136, "learning_rate": 1e-05, "loss": 1.0033, "step": 6280 }, { "epoch": 5.566873339238264, "grad_norm": 0.28578558564186096, "learning_rate": 1e-05, "loss": 1.0295, "step": 6285 }, { "epoch": 5.571302037201063, "grad_norm": 0.3622230291366577, "learning_rate": 1e-05, "loss": 1.0238, "step": 6290 }, { "epoch": 5.575730735163862, "grad_norm": 0.33592015504837036, "learning_rate": 1e-05, "loss": 1.0237, "step": 6295 }, { "epoch": 5.580159433126661, "grad_norm": 0.3847084045410156, "learning_rate": 1e-05, "loss": 1.0386, "step": 6300 }, { "epoch": 5.584588131089459, "grad_norm": 0.4086059629917145, "learning_rate": 1e-05, "loss": 1.0343, "step": 6305 }, { "epoch": 5.589016829052259, "grad_norm": 0.3374808430671692, "learning_rate": 1e-05, "loss": 1.0388, "step": 6310 }, { "epoch": 5.593445527015057, "grad_norm": 0.30118468403816223, "learning_rate": 1e-05, "loss": 0.9999, "step": 6315 }, { "epoch": 5.597874224977857, "grad_norm": 0.37272441387176514, "learning_rate": 1e-05, "loss": 0.9453, "step": 6320 }, { "epoch": 5.602302922940655, "grad_norm": 0.44374728202819824, "learning_rate": 1e-05, "loss": 1.0131, "step": 6325 }, { "epoch": 5.606731620903455, "grad_norm": 0.3207448422908783, "learning_rate": 1e-05, "loss": 0.9805, "step": 6330 }, { "epoch": 5.611160318866253, "grad_norm": 0.34067943692207336, "learning_rate": 1e-05, "loss": 1.0052, "step": 6335 }, { "epoch": 5.615589016829052, "grad_norm": 0.3046121299266815, "learning_rate": 1e-05, "loss": 1.0542, "step": 6340 }, { "epoch": 5.620017714791851, "grad_norm": 0.3301253318786621, "learning_rate": 1e-05, "loss": 1.0189, "step": 6345 }, { "epoch": 5.62444641275465, "grad_norm": 0.3467637002468109, "learning_rate": 1e-05, "loss": 1.0373, "step": 6350 }, { "epoch": 5.628875110717449, "grad_norm": 0.35957252979278564, "learning_rate": 1e-05, "loss": 1.0298, "step": 6355 }, { "epoch": 5.633303808680248, "grad_norm": 0.3335168957710266, "learning_rate": 1e-05, "loss": 0.9895, "step": 6360 }, { "epoch": 5.637732506643047, "grad_norm": 0.3452107012271881, "learning_rate": 1e-05, "loss": 1.0225, "step": 6365 }, { "epoch": 5.642161204605846, "grad_norm": 0.3340684771537781, "learning_rate": 1e-05, "loss": 0.9938, "step": 6370 }, { "epoch": 5.646589902568644, "grad_norm": 0.3786199688911438, "learning_rate": 1e-05, "loss": 1.0824, "step": 6375 }, { "epoch": 5.651018600531444, "grad_norm": 0.36782315373420715, "learning_rate": 1e-05, "loss": 0.9999, "step": 6380 }, { "epoch": 5.655447298494242, "grad_norm": 0.378689706325531, "learning_rate": 1e-05, "loss": 1.0468, "step": 6385 }, { "epoch": 5.659875996457042, "grad_norm": 0.320144385099411, "learning_rate": 1e-05, "loss": 1.0104, "step": 6390 }, { "epoch": 5.66430469441984, "grad_norm": 0.3400636911392212, "learning_rate": 1e-05, "loss": 0.9859, "step": 6395 }, { "epoch": 5.66873339238264, "grad_norm": 0.3092327117919922, "learning_rate": 1e-05, "loss": 1.0057, "step": 6400 }, { "epoch": 5.673162090345438, "grad_norm": 0.30968233942985535, "learning_rate": 1e-05, "loss": 1.0092, "step": 6405 }, { "epoch": 5.677590788308238, "grad_norm": 0.3212548792362213, "learning_rate": 1e-05, "loss": 1.0531, "step": 6410 }, { "epoch": 5.682019486271036, "grad_norm": 0.4057113826274872, "learning_rate": 1e-05, "loss": 1.0164, "step": 6415 }, { "epoch": 5.686448184233836, "grad_norm": 0.3626675009727478, "learning_rate": 1e-05, "loss": 1.0395, "step": 6420 }, { "epoch": 5.690876882196634, "grad_norm": 0.3725353479385376, "learning_rate": 1e-05, "loss": 1.0125, "step": 6425 }, { "epoch": 5.695305580159433, "grad_norm": 0.3042195737361908, "learning_rate": 1e-05, "loss": 1.0061, "step": 6430 }, { "epoch": 5.699734278122232, "grad_norm": 0.321402370929718, "learning_rate": 1e-05, "loss": 1.0, "step": 6435 }, { "epoch": 5.704162976085031, "grad_norm": 0.4093253016471863, "learning_rate": 1e-05, "loss": 0.9897, "step": 6440 }, { "epoch": 5.70859167404783, "grad_norm": 0.29677829146385193, "learning_rate": 1e-05, "loss": 1.0031, "step": 6445 }, { "epoch": 5.713020372010629, "grad_norm": 0.25508174300193787, "learning_rate": 1e-05, "loss": 1.0238, "step": 6450 }, { "epoch": 5.717449069973428, "grad_norm": 0.350716233253479, "learning_rate": 1e-05, "loss": 1.045, "step": 6455 }, { "epoch": 5.721877767936227, "grad_norm": 0.4051620662212372, "learning_rate": 1e-05, "loss": 1.0576, "step": 6460 }, { "epoch": 5.726306465899025, "grad_norm": 0.3167639970779419, "learning_rate": 1e-05, "loss": 0.9678, "step": 6465 }, { "epoch": 5.730735163861825, "grad_norm": 0.39503225684165955, "learning_rate": 1e-05, "loss": 1.0222, "step": 6470 }, { "epoch": 5.735163861824623, "grad_norm": 0.4134061336517334, "learning_rate": 1e-05, "loss": 1.0518, "step": 6475 }, { "epoch": 5.739592559787423, "grad_norm": 0.3831997811794281, "learning_rate": 1e-05, "loss": 1.0047, "step": 6480 }, { "epoch": 5.744021257750221, "grad_norm": 0.4127909541130066, "learning_rate": 1e-05, "loss": 0.9556, "step": 6485 }, { "epoch": 5.748449955713021, "grad_norm": 0.37857189774513245, "learning_rate": 1e-05, "loss": 1.0099, "step": 6490 }, { "epoch": 5.752878653675819, "grad_norm": 0.47841256856918335, "learning_rate": 1e-05, "loss": 0.9687, "step": 6495 }, { "epoch": 5.757307351638619, "grad_norm": 0.4187529981136322, "learning_rate": 1e-05, "loss": 1.028, "step": 6500 }, { "epoch": 5.761736049601417, "grad_norm": 0.36477819085121155, "learning_rate": 1e-05, "loss": 0.9833, "step": 6505 }, { "epoch": 5.766164747564217, "grad_norm": 0.3008206784725189, "learning_rate": 1e-05, "loss": 1.0461, "step": 6510 }, { "epoch": 5.770593445527015, "grad_norm": 0.2751103937625885, "learning_rate": 1e-05, "loss": 1.0708, "step": 6515 }, { "epoch": 5.775022143489814, "grad_norm": 0.28461137413978577, "learning_rate": 1e-05, "loss": 1.0415, "step": 6520 }, { "epoch": 5.779450841452613, "grad_norm": 0.4289863109588623, "learning_rate": 1e-05, "loss": 1.0139, "step": 6525 }, { "epoch": 5.783879539415412, "grad_norm": 0.3430834412574768, "learning_rate": 1e-05, "loss": 1.0371, "step": 6530 }, { "epoch": 5.788308237378211, "grad_norm": 0.30186137557029724, "learning_rate": 1e-05, "loss": 1.0019, "step": 6535 }, { "epoch": 5.79273693534101, "grad_norm": 0.29980048537254333, "learning_rate": 1e-05, "loss": 1.0322, "step": 6540 }, { "epoch": 5.797165633303809, "grad_norm": 0.3881379961967468, "learning_rate": 1e-05, "loss": 0.996, "step": 6545 }, { "epoch": 5.801594331266608, "grad_norm": 0.3307119905948639, "learning_rate": 1e-05, "loss": 0.9988, "step": 6550 }, { "epoch": 5.806023029229406, "grad_norm": 0.34576040506362915, "learning_rate": 1e-05, "loss": 1.0481, "step": 6555 }, { "epoch": 5.810451727192206, "grad_norm": 0.34578219056129456, "learning_rate": 1e-05, "loss": 1.0477, "step": 6560 }, { "epoch": 5.814880425155004, "grad_norm": 0.3267001807689667, "learning_rate": 1e-05, "loss": 0.9762, "step": 6565 }, { "epoch": 5.819309123117804, "grad_norm": 0.4689924418926239, "learning_rate": 1e-05, "loss": 1.0475, "step": 6570 }, { "epoch": 5.823737821080602, "grad_norm": 0.3635270595550537, "learning_rate": 1e-05, "loss": 1.091, "step": 6575 }, { "epoch": 5.8281665190434015, "grad_norm": 0.35070130228996277, "learning_rate": 1e-05, "loss": 1.0062, "step": 6580 }, { "epoch": 5.8325952170062, "grad_norm": 0.3956962525844574, "learning_rate": 1e-05, "loss": 1.0038, "step": 6585 }, { "epoch": 5.837023914968999, "grad_norm": 0.3035469353199005, "learning_rate": 1e-05, "loss": 0.9981, "step": 6590 }, { "epoch": 5.841452612931798, "grad_norm": 0.33993586897850037, "learning_rate": 1e-05, "loss": 1.0385, "step": 6595 }, { "epoch": 5.845881310894597, "grad_norm": 0.38742855191230774, "learning_rate": 1e-05, "loss": 1.0265, "step": 6600 }, { "epoch": 5.850310008857396, "grad_norm": 0.23851977288722992, "learning_rate": 1e-05, "loss": 1.0263, "step": 6605 }, { "epoch": 5.854738706820195, "grad_norm": 0.3214780390262604, "learning_rate": 1e-05, "loss": 1.0386, "step": 6610 }, { "epoch": 5.859167404782994, "grad_norm": 0.3350125253200531, "learning_rate": 1e-05, "loss": 1.0447, "step": 6615 }, { "epoch": 5.863596102745793, "grad_norm": 0.2939947545528412, "learning_rate": 1e-05, "loss": 0.995, "step": 6620 }, { "epoch": 5.868024800708592, "grad_norm": 0.31134822964668274, "learning_rate": 1e-05, "loss": 0.9708, "step": 6625 }, { "epoch": 5.8724534986713905, "grad_norm": 0.4234241843223572, "learning_rate": 1e-05, "loss": 1.0297, "step": 6630 }, { "epoch": 5.87688219663419, "grad_norm": 0.37806475162506104, "learning_rate": 1e-05, "loss": 1.0333, "step": 6635 }, { "epoch": 5.8813108945969885, "grad_norm": 0.2995615005493164, "learning_rate": 1e-05, "loss": 1.0038, "step": 6640 }, { "epoch": 5.885739592559787, "grad_norm": 0.3165876865386963, "learning_rate": 1e-05, "loss": 1.0297, "step": 6645 }, { "epoch": 5.8901682905225865, "grad_norm": 0.32325923442840576, "learning_rate": 1e-05, "loss": 1.0019, "step": 6650 }, { "epoch": 5.894596988485385, "grad_norm": 0.45908138155937195, "learning_rate": 1e-05, "loss": 1.0219, "step": 6655 }, { "epoch": 5.8990256864481845, "grad_norm": 0.35252439975738525, "learning_rate": 1e-05, "loss": 1.0029, "step": 6660 }, { "epoch": 5.903454384410983, "grad_norm": 0.3436559736728668, "learning_rate": 1e-05, "loss": 0.9949, "step": 6665 }, { "epoch": 5.9078830823737825, "grad_norm": 0.31436410546302795, "learning_rate": 1e-05, "loss": 1.0649, "step": 6670 }, { "epoch": 5.912311780336581, "grad_norm": 0.4750252962112427, "learning_rate": 1e-05, "loss": 1.0326, "step": 6675 }, { "epoch": 5.9167404782993795, "grad_norm": 0.34484434127807617, "learning_rate": 1e-05, "loss": 1.0104, "step": 6680 }, { "epoch": 5.921169176262179, "grad_norm": 0.385506272315979, "learning_rate": 1e-05, "loss": 1.0489, "step": 6685 }, { "epoch": 5.9255978742249775, "grad_norm": 0.39610734581947327, "learning_rate": 1e-05, "loss": 1.0353, "step": 6690 }, { "epoch": 5.930026572187777, "grad_norm": 0.4324401021003723, "learning_rate": 1e-05, "loss": 0.9808, "step": 6695 }, { "epoch": 5.9344552701505755, "grad_norm": 0.3518627882003784, "learning_rate": 1e-05, "loss": 1.0505, "step": 6700 }, { "epoch": 5.938883968113375, "grad_norm": 0.32452812790870667, "learning_rate": 1e-05, "loss": 1.0417, "step": 6705 }, { "epoch": 5.9433126660761735, "grad_norm": 0.4187712073326111, "learning_rate": 1e-05, "loss": 0.9759, "step": 6710 }, { "epoch": 5.947741364038973, "grad_norm": 0.3556433320045471, "learning_rate": 1e-05, "loss": 0.9504, "step": 6715 }, { "epoch": 5.9521700620017715, "grad_norm": 0.4036000669002533, "learning_rate": 1e-05, "loss": 0.983, "step": 6720 }, { "epoch": 5.956598759964571, "grad_norm": 0.2611381709575653, "learning_rate": 1e-05, "loss": 1.0313, "step": 6725 }, { "epoch": 5.961027457927369, "grad_norm": 0.3407951593399048, "learning_rate": 1e-05, "loss": 0.9839, "step": 6730 }, { "epoch": 5.965456155890168, "grad_norm": 0.28380531072616577, "learning_rate": 1e-05, "loss": 0.9872, "step": 6735 }, { "epoch": 5.969884853852967, "grad_norm": 0.3297921121120453, "learning_rate": 1e-05, "loss": 0.992, "step": 6740 }, { "epoch": 5.974313551815766, "grad_norm": 0.3118314743041992, "learning_rate": 1e-05, "loss": 0.9686, "step": 6745 }, { "epoch": 5.978742249778565, "grad_norm": 0.30400076508522034, "learning_rate": 1e-05, "loss": 0.9891, "step": 6750 }, { "epoch": 5.983170947741364, "grad_norm": 0.4714670181274414, "learning_rate": 1e-05, "loss": 1.0102, "step": 6755 }, { "epoch": 5.987599645704163, "grad_norm": 0.40930747985839844, "learning_rate": 1e-05, "loss": 1.0222, "step": 6760 }, { "epoch": 5.992028343666962, "grad_norm": 0.29554399847984314, "learning_rate": 1e-05, "loss": 1.0344, "step": 6765 }, { "epoch": 5.9964570416297605, "grad_norm": 0.4346447288990021, "learning_rate": 1e-05, "loss": 1.0122, "step": 6770 }, { "epoch": 6.00088573959256, "grad_norm": 0.4221264719963074, "learning_rate": 1e-05, "loss": 0.9988, "step": 6775 }, { "epoch": 6.005314437555358, "grad_norm": 0.3572789430618286, "learning_rate": 1e-05, "loss": 1.0364, "step": 6780 }, { "epoch": 6.009743135518158, "grad_norm": 0.3365592360496521, "learning_rate": 1e-05, "loss": 1.0474, "step": 6785 }, { "epoch": 6.014171833480956, "grad_norm": 0.29700925946235657, "learning_rate": 1e-05, "loss": 1.0352, "step": 6790 }, { "epoch": 6.018600531443756, "grad_norm": 0.34870168566703796, "learning_rate": 1e-05, "loss": 1.0112, "step": 6795 }, { "epoch": 6.023029229406554, "grad_norm": 0.3146915137767792, "learning_rate": 1e-05, "loss": 0.9731, "step": 6800 }, { "epoch": 6.027457927369354, "grad_norm": 0.36624160408973694, "learning_rate": 1e-05, "loss": 1.0184, "step": 6805 }, { "epoch": 6.031886625332152, "grad_norm": 0.3642502725124359, "learning_rate": 1e-05, "loss": 0.9958, "step": 6810 }, { "epoch": 6.036315323294951, "grad_norm": 0.28912538290023804, "learning_rate": 1e-05, "loss": 1.0034, "step": 6815 }, { "epoch": 6.04074402125775, "grad_norm": 0.3548716902732849, "learning_rate": 1e-05, "loss": 1.0109, "step": 6820 }, { "epoch": 6.045172719220549, "grad_norm": 0.3012695908546448, "learning_rate": 1e-05, "loss": 1.0117, "step": 6825 }, { "epoch": 6.049601417183348, "grad_norm": 0.33697596192359924, "learning_rate": 1e-05, "loss": 0.964, "step": 6830 }, { "epoch": 6.054030115146147, "grad_norm": 0.35951241850852966, "learning_rate": 1e-05, "loss": 1.0524, "step": 6835 }, { "epoch": 6.058458813108946, "grad_norm": 0.3536849021911621, "learning_rate": 1e-05, "loss": 1.0057, "step": 6840 }, { "epoch": 6.062887511071745, "grad_norm": 0.3755175769329071, "learning_rate": 1e-05, "loss": 1.0212, "step": 6845 }, { "epoch": 6.067316209034544, "grad_norm": 0.4255145490169525, "learning_rate": 1e-05, "loss": 1.0248, "step": 6850 }, { "epoch": 6.071744906997343, "grad_norm": 0.39630162715911865, "learning_rate": 1e-05, "loss": 0.9911, "step": 6855 }, { "epoch": 6.076173604960141, "grad_norm": 0.36435675621032715, "learning_rate": 1e-05, "loss": 1.013, "step": 6860 }, { "epoch": 6.080602302922941, "grad_norm": 0.2913612425327301, "learning_rate": 1e-05, "loss": 0.9965, "step": 6865 }, { "epoch": 6.085031000885739, "grad_norm": 0.35636329650878906, "learning_rate": 1e-05, "loss": 1.0069, "step": 6870 }, { "epoch": 6.089459698848539, "grad_norm": 0.3127959370613098, "learning_rate": 1e-05, "loss": 0.9733, "step": 6875 }, { "epoch": 6.093888396811337, "grad_norm": 0.5212419629096985, "learning_rate": 1e-05, "loss": 0.9688, "step": 6880 }, { "epoch": 6.098317094774137, "grad_norm": 0.3824059069156647, "learning_rate": 1e-05, "loss": 1.021, "step": 6885 }, { "epoch": 6.102745792736935, "grad_norm": 0.33459386229515076, "learning_rate": 1e-05, "loss": 1.0173, "step": 6890 }, { "epoch": 6.107174490699734, "grad_norm": 0.34141600131988525, "learning_rate": 1e-05, "loss": 0.9581, "step": 6895 }, { "epoch": 6.111603188662533, "grad_norm": 0.3059738576412201, "learning_rate": 1e-05, "loss": 1.0584, "step": 6900 }, { "epoch": 6.116031886625332, "grad_norm": 0.3926936388015747, "learning_rate": 1e-05, "loss": 1.0286, "step": 6905 }, { "epoch": 6.120460584588131, "grad_norm": 0.3398391604423523, "learning_rate": 1e-05, "loss": 1.0198, "step": 6910 }, { "epoch": 6.12488928255093, "grad_norm": 0.359720379114151, "learning_rate": 1e-05, "loss": 1.0305, "step": 6915 }, { "epoch": 6.129317980513729, "grad_norm": 0.2975776493549347, "learning_rate": 1e-05, "loss": 1.0252, "step": 6920 }, { "epoch": 6.133746678476528, "grad_norm": 0.34713348746299744, "learning_rate": 1e-05, "loss": 1.027, "step": 6925 }, { "epoch": 6.138175376439327, "grad_norm": 0.3530220687389374, "learning_rate": 1e-05, "loss": 1.0053, "step": 6930 }, { "epoch": 6.142604074402126, "grad_norm": 0.3547761142253876, "learning_rate": 1e-05, "loss": 1.0022, "step": 6935 }, { "epoch": 6.147032772364924, "grad_norm": 0.3353806138038635, "learning_rate": 1e-05, "loss": 1.026, "step": 6940 }, { "epoch": 6.151461470327724, "grad_norm": 0.3081471920013428, "learning_rate": 1e-05, "loss": 0.9746, "step": 6945 }, { "epoch": 6.155890168290522, "grad_norm": 0.364685595035553, "learning_rate": 1e-05, "loss": 0.9838, "step": 6950 }, { "epoch": 6.160318866253322, "grad_norm": 0.35186582803726196, "learning_rate": 1e-05, "loss": 1.0361, "step": 6955 }, { "epoch": 6.16474756421612, "grad_norm": 0.35470500588417053, "learning_rate": 1e-05, "loss": 0.9445, "step": 6960 }, { "epoch": 6.16917626217892, "grad_norm": 0.35173505544662476, "learning_rate": 1e-05, "loss": 0.9801, "step": 6965 }, { "epoch": 6.173604960141718, "grad_norm": 0.32691577076911926, "learning_rate": 1e-05, "loss": 1.0075, "step": 6970 }, { "epoch": 6.178033658104518, "grad_norm": 0.28472381830215454, "learning_rate": 1e-05, "loss": 1.0519, "step": 6975 }, { "epoch": 6.182462356067316, "grad_norm": 0.46015751361846924, "learning_rate": 1e-05, "loss": 1.0406, "step": 6980 }, { "epoch": 6.186891054030115, "grad_norm": 0.2897954285144806, "learning_rate": 1e-05, "loss": 0.9539, "step": 6985 }, { "epoch": 6.191319751992914, "grad_norm": 0.2893534302711487, "learning_rate": 1e-05, "loss": 1.0625, "step": 6990 }, { "epoch": 6.195748449955713, "grad_norm": 0.3138471841812134, "learning_rate": 1e-05, "loss": 0.9963, "step": 6995 }, { "epoch": 6.200177147918512, "grad_norm": 0.4907361567020416, "learning_rate": 1e-05, "loss": 1.0069, "step": 7000 }, { "epoch": 6.204605845881311, "grad_norm": 0.31292858719825745, "learning_rate": 1e-05, "loss": 1.028, "step": 7005 }, { "epoch": 6.20903454384411, "grad_norm": 0.298232764005661, "learning_rate": 1e-05, "loss": 0.9961, "step": 7010 }, { "epoch": 6.213463241806909, "grad_norm": 0.47002145648002625, "learning_rate": 1e-05, "loss": 0.99, "step": 7015 }, { "epoch": 6.217891939769708, "grad_norm": 0.4330032169818878, "learning_rate": 1e-05, "loss": 1.0094, "step": 7020 }, { "epoch": 6.222320637732507, "grad_norm": 0.4534287750720978, "learning_rate": 1e-05, "loss": 1.009, "step": 7025 }, { "epoch": 6.226749335695305, "grad_norm": 0.3723717927932739, "learning_rate": 1e-05, "loss": 1.0756, "step": 7030 }, { "epoch": 6.231178033658105, "grad_norm": 0.3415564298629761, "learning_rate": 1e-05, "loss": 0.9817, "step": 7035 }, { "epoch": 6.235606731620903, "grad_norm": 0.3624410331249237, "learning_rate": 1e-05, "loss": 0.9835, "step": 7040 }, { "epoch": 6.240035429583703, "grad_norm": 0.3186006247997284, "learning_rate": 1e-05, "loss": 1.0176, "step": 7045 }, { "epoch": 6.244464127546501, "grad_norm": 0.5132346749305725, "learning_rate": 1e-05, "loss": 0.9925, "step": 7050 }, { "epoch": 6.248892825509301, "grad_norm": 0.3773728907108307, "learning_rate": 1e-05, "loss": 1.0162, "step": 7055 }, { "epoch": 6.253321523472099, "grad_norm": 0.33810749650001526, "learning_rate": 1e-05, "loss": 0.9811, "step": 7060 }, { "epoch": 6.257750221434899, "grad_norm": 0.24548912048339844, "learning_rate": 1e-05, "loss": 1.0136, "step": 7065 }, { "epoch": 6.262178919397697, "grad_norm": 0.32872912287712097, "learning_rate": 1e-05, "loss": 1.0397, "step": 7070 }, { "epoch": 6.266607617360496, "grad_norm": 0.36943259835243225, "learning_rate": 1e-05, "loss": 1.0243, "step": 7075 }, { "epoch": 6.271036315323295, "grad_norm": 0.4258362352848053, "learning_rate": 1e-05, "loss": 1.0063, "step": 7080 }, { "epoch": 6.275465013286094, "grad_norm": 0.28767505288124084, "learning_rate": 1e-05, "loss": 1.05, "step": 7085 }, { "epoch": 6.279893711248893, "grad_norm": 0.3052264451980591, "learning_rate": 1e-05, "loss": 0.999, "step": 7090 }, { "epoch": 6.284322409211692, "grad_norm": 0.3374520540237427, "learning_rate": 1e-05, "loss": 1.0456, "step": 7095 }, { "epoch": 6.288751107174491, "grad_norm": 0.3040774166584015, "learning_rate": 1e-05, "loss": 1.0237, "step": 7100 }, { "epoch": 6.29317980513729, "grad_norm": 0.2610459625720978, "learning_rate": 1e-05, "loss": 0.9719, "step": 7105 }, { "epoch": 6.297608503100088, "grad_norm": 0.31213927268981934, "learning_rate": 1e-05, "loss": 0.9671, "step": 7110 }, { "epoch": 6.302037201062888, "grad_norm": 0.2727365791797638, "learning_rate": 1e-05, "loss": 1.0013, "step": 7115 }, { "epoch": 6.306465899025686, "grad_norm": 0.266261488199234, "learning_rate": 1e-05, "loss": 1.029, "step": 7120 }, { "epoch": 6.3108945969884855, "grad_norm": 0.27193114161491394, "learning_rate": 1e-05, "loss": 0.9832, "step": 7125 }, { "epoch": 6.315323294951284, "grad_norm": 0.29136723279953003, "learning_rate": 1e-05, "loss": 0.9719, "step": 7130 }, { "epoch": 6.3197519929140835, "grad_norm": 0.2739744484424591, "learning_rate": 1e-05, "loss": 0.992, "step": 7135 }, { "epoch": 6.324180690876882, "grad_norm": 0.3441196084022522, "learning_rate": 1e-05, "loss": 1.0437, "step": 7140 }, { "epoch": 6.3286093888396815, "grad_norm": 0.30992305278778076, "learning_rate": 1e-05, "loss": 1.054, "step": 7145 }, { "epoch": 6.33303808680248, "grad_norm": 0.3201008141040802, "learning_rate": 1e-05, "loss": 1.0093, "step": 7150 }, { "epoch": 6.3374667847652795, "grad_norm": 0.3894982933998108, "learning_rate": 1e-05, "loss": 1.0232, "step": 7155 }, { "epoch": 6.341895482728078, "grad_norm": 0.2990393042564392, "learning_rate": 1e-05, "loss": 1.0056, "step": 7160 }, { "epoch": 6.346324180690877, "grad_norm": 0.3568171262741089, "learning_rate": 1e-05, "loss": 1.0426, "step": 7165 }, { "epoch": 6.350752878653676, "grad_norm": 0.3259207010269165, "learning_rate": 1e-05, "loss": 1.0143, "step": 7170 }, { "epoch": 6.3551815766164745, "grad_norm": 0.2781645357608795, "learning_rate": 1e-05, "loss": 0.9772, "step": 7175 }, { "epoch": 6.359610274579274, "grad_norm": 0.3118613660335541, "learning_rate": 1e-05, "loss": 1.0306, "step": 7180 }, { "epoch": 6.3640389725420725, "grad_norm": 0.3418436050415039, "learning_rate": 1e-05, "loss": 1.0139, "step": 7185 }, { "epoch": 6.368467670504872, "grad_norm": 0.31584247946739197, "learning_rate": 1e-05, "loss": 1.0162, "step": 7190 }, { "epoch": 6.3728963684676705, "grad_norm": 0.3011067807674408, "learning_rate": 1e-05, "loss": 1.0251, "step": 7195 }, { "epoch": 6.377325066430469, "grad_norm": 0.3170275092124939, "learning_rate": 1e-05, "loss": 0.9929, "step": 7200 }, { "epoch": 6.3817537643932685, "grad_norm": 0.31234514713287354, "learning_rate": 1e-05, "loss": 0.9694, "step": 7205 }, { "epoch": 6.386182462356067, "grad_norm": 0.34424668550491333, "learning_rate": 1e-05, "loss": 1.0356, "step": 7210 }, { "epoch": 6.3906111603188664, "grad_norm": 0.31194567680358887, "learning_rate": 1e-05, "loss": 1.0125, "step": 7215 }, { "epoch": 6.395039858281665, "grad_norm": 0.3398292660713196, "learning_rate": 1e-05, "loss": 0.9775, "step": 7220 }, { "epoch": 6.399468556244464, "grad_norm": 0.3615325391292572, "learning_rate": 1e-05, "loss": 1.0593, "step": 7225 }, { "epoch": 6.403897254207263, "grad_norm": 0.3728867471218109, "learning_rate": 1e-05, "loss": 1.0024, "step": 7230 }, { "epoch": 6.408325952170062, "grad_norm": 0.28887295722961426, "learning_rate": 1e-05, "loss": 1.0258, "step": 7235 }, { "epoch": 6.412754650132861, "grad_norm": 0.31499141454696655, "learning_rate": 1e-05, "loss": 0.9772, "step": 7240 }, { "epoch": 6.4171833480956595, "grad_norm": 0.29244956374168396, "learning_rate": 1e-05, "loss": 1.0023, "step": 7245 }, { "epoch": 6.421612046058459, "grad_norm": 0.3293982148170471, "learning_rate": 1e-05, "loss": 1.0491, "step": 7250 }, { "epoch": 6.4260407440212575, "grad_norm": 0.3518960773944855, "learning_rate": 1e-05, "loss": 1.0014, "step": 7255 }, { "epoch": 6.430469441984057, "grad_norm": 0.3628379702568054, "learning_rate": 1e-05, "loss": 1.0357, "step": 7260 }, { "epoch": 6.4348981399468554, "grad_norm": 0.36089372634887695, "learning_rate": 1e-05, "loss": 1.0492, "step": 7265 }, { "epoch": 6.439326837909655, "grad_norm": 0.4071352779865265, "learning_rate": 1e-05, "loss": 0.9814, "step": 7270 }, { "epoch": 6.443755535872453, "grad_norm": 0.32147347927093506, "learning_rate": 1e-05, "loss": 1.0138, "step": 7275 }, { "epoch": 6.448184233835253, "grad_norm": 0.316927045583725, "learning_rate": 1e-05, "loss": 1.0383, "step": 7280 }, { "epoch": 6.452612931798051, "grad_norm": 0.412502259016037, "learning_rate": 1e-05, "loss": 1.0139, "step": 7285 }, { "epoch": 6.45704162976085, "grad_norm": 0.3707854449748993, "learning_rate": 1e-05, "loss": 0.9762, "step": 7290 }, { "epoch": 6.461470327723649, "grad_norm": 0.38297298550605774, "learning_rate": 1e-05, "loss": 1.0723, "step": 7295 }, { "epoch": 6.465899025686448, "grad_norm": 0.47720569372177124, "learning_rate": 1e-05, "loss": 1.013, "step": 7300 }, { "epoch": 6.470327723649247, "grad_norm": 0.3851393163204193, "learning_rate": 1e-05, "loss": 0.9622, "step": 7305 }, { "epoch": 6.474756421612046, "grad_norm": 0.31918996572494507, "learning_rate": 1e-05, "loss": 0.984, "step": 7310 }, { "epoch": 6.479185119574845, "grad_norm": 0.3066047728061676, "learning_rate": 1e-05, "loss": 0.957, "step": 7315 }, { "epoch": 6.483613817537644, "grad_norm": 0.300331711769104, "learning_rate": 1e-05, "loss": 0.9949, "step": 7320 }, { "epoch": 6.488042515500442, "grad_norm": 0.34394770860671997, "learning_rate": 1e-05, "loss": 0.9727, "step": 7325 }, { "epoch": 6.492471213463242, "grad_norm": 0.284976601600647, "learning_rate": 1e-05, "loss": 0.966, "step": 7330 }, { "epoch": 6.49689991142604, "grad_norm": 0.39693188667297363, "learning_rate": 1e-05, "loss": 1.0073, "step": 7335 }, { "epoch": 6.50132860938884, "grad_norm": 0.29884403944015503, "learning_rate": 1e-05, "loss": 1.0163, "step": 7340 }, { "epoch": 6.505757307351638, "grad_norm": 0.3038507401943207, "learning_rate": 1e-05, "loss": 1.032, "step": 7345 }, { "epoch": 6.510186005314438, "grad_norm": 0.39658111333847046, "learning_rate": 1e-05, "loss": 1.0629, "step": 7350 }, { "epoch": 6.514614703277236, "grad_norm": 0.3359249234199524, "learning_rate": 1e-05, "loss": 0.9602, "step": 7355 }, { "epoch": 6.519043401240036, "grad_norm": 0.42337730526924133, "learning_rate": 1e-05, "loss": 0.9609, "step": 7360 }, { "epoch": 6.523472099202834, "grad_norm": 0.41874006390571594, "learning_rate": 1e-05, "loss": 1.0103, "step": 7365 }, { "epoch": 6.527900797165634, "grad_norm": 0.3224058449268341, "learning_rate": 1e-05, "loss": 0.9615, "step": 7370 }, { "epoch": 6.532329495128432, "grad_norm": 0.4191104471683502, "learning_rate": 1e-05, "loss": 1.0749, "step": 7375 }, { "epoch": 6.536758193091231, "grad_norm": 0.4798573851585388, "learning_rate": 1e-05, "loss": 0.9782, "step": 7380 }, { "epoch": 6.54118689105403, "grad_norm": 0.41390278935432434, "learning_rate": 1e-05, "loss": 1.0226, "step": 7385 }, { "epoch": 6.545615589016829, "grad_norm": 0.41620805859565735, "learning_rate": 1e-05, "loss": 1.0002, "step": 7390 }, { "epoch": 6.550044286979628, "grad_norm": 0.37604033946990967, "learning_rate": 1e-05, "loss": 1.0195, "step": 7395 }, { "epoch": 6.554472984942427, "grad_norm": 0.28521087765693665, "learning_rate": 1e-05, "loss": 1.0074, "step": 7400 }, { "epoch": 6.558901682905226, "grad_norm": 0.38479727506637573, "learning_rate": 1e-05, "loss": 1.0183, "step": 7405 }, { "epoch": 6.563330380868025, "grad_norm": 0.3014092743396759, "learning_rate": 1e-05, "loss": 0.9871, "step": 7410 }, { "epoch": 6.567759078830823, "grad_norm": 0.35304468870162964, "learning_rate": 1e-05, "loss": 1.0267, "step": 7415 }, { "epoch": 6.572187776793623, "grad_norm": 0.2886923551559448, "learning_rate": 1e-05, "loss": 0.9802, "step": 7420 }, { "epoch": 6.576616474756421, "grad_norm": 0.3872258961200714, "learning_rate": 1e-05, "loss": 1.0169, "step": 7425 }, { "epoch": 6.581045172719221, "grad_norm": 0.33828261494636536, "learning_rate": 1e-05, "loss": 1.0209, "step": 7430 }, { "epoch": 6.585473870682019, "grad_norm": 0.2929351031780243, "learning_rate": 1e-05, "loss": 1.0349, "step": 7435 }, { "epoch": 6.589902568644819, "grad_norm": 0.2872735559940338, "learning_rate": 1e-05, "loss": 0.9994, "step": 7440 }, { "epoch": 6.594331266607617, "grad_norm": 0.30917489528656006, "learning_rate": 1e-05, "loss": 1.0168, "step": 7445 }, { "epoch": 6.598759964570416, "grad_norm": 0.3415820598602295, "learning_rate": 1e-05, "loss": 0.9479, "step": 7450 }, { "epoch": 6.603188662533215, "grad_norm": 0.29959750175476074, "learning_rate": 1e-05, "loss": 1.0243, "step": 7455 }, { "epoch": 6.607617360496015, "grad_norm": 0.47754573822021484, "learning_rate": 1e-05, "loss": 0.968, "step": 7460 }, { "epoch": 6.612046058458813, "grad_norm": 0.2885790765285492, "learning_rate": 1e-05, "loss": 0.9929, "step": 7465 }, { "epoch": 6.616474756421612, "grad_norm": 0.30503877997398376, "learning_rate": 1e-05, "loss": 1.0532, "step": 7470 }, { "epoch": 6.620903454384411, "grad_norm": 0.35308367013931274, "learning_rate": 1e-05, "loss": 1.0488, "step": 7475 }, { "epoch": 6.62533215234721, "grad_norm": 0.3534207344055176, "learning_rate": 1e-05, "loss": 1.0165, "step": 7480 }, { "epoch": 6.629760850310009, "grad_norm": 0.379452645778656, "learning_rate": 1e-05, "loss": 1.0282, "step": 7485 }, { "epoch": 6.634189548272808, "grad_norm": 0.31310799717903137, "learning_rate": 1e-05, "loss": 0.9764, "step": 7490 }, { "epoch": 6.638618246235607, "grad_norm": 0.3678398132324219, "learning_rate": 1e-05, "loss": 0.9878, "step": 7495 }, { "epoch": 6.643046944198406, "grad_norm": 0.4244448244571686, "learning_rate": 1e-05, "loss": 1.0093, "step": 7500 }, { "epoch": 6.647475642161204, "grad_norm": 0.3662121891975403, "learning_rate": 1e-05, "loss": 1.0648, "step": 7505 }, { "epoch": 6.651904340124004, "grad_norm": 0.3143360912799835, "learning_rate": 1e-05, "loss": 0.9781, "step": 7510 }, { "epoch": 6.656333038086802, "grad_norm": 0.3744223713874817, "learning_rate": 1e-05, "loss": 1.0114, "step": 7515 }, { "epoch": 6.660761736049602, "grad_norm": 0.3605046272277832, "learning_rate": 1e-05, "loss": 0.9945, "step": 7520 }, { "epoch": 6.6651904340124, "grad_norm": 0.28498315811157227, "learning_rate": 1e-05, "loss": 1.0855, "step": 7525 }, { "epoch": 6.6696191319752, "grad_norm": 0.3095240592956543, "learning_rate": 1e-05, "loss": 0.9858, "step": 7530 }, { "epoch": 6.674047829937998, "grad_norm": 0.3136258125305176, "learning_rate": 1e-05, "loss": 1.0287, "step": 7535 }, { "epoch": 6.678476527900797, "grad_norm": 0.317767471075058, "learning_rate": 1e-05, "loss": 0.9835, "step": 7540 }, { "epoch": 6.682905225863596, "grad_norm": 0.39734598994255066, "learning_rate": 1e-05, "loss": 0.9959, "step": 7545 }, { "epoch": 6.687333923826395, "grad_norm": 0.36481449007987976, "learning_rate": 1e-05, "loss": 1.0662, "step": 7550 }, { "epoch": 6.691762621789194, "grad_norm": 0.261600524187088, "learning_rate": 1e-05, "loss": 1.0522, "step": 7555 }, { "epoch": 6.696191319751993, "grad_norm": 0.27716726064682007, "learning_rate": 1e-05, "loss": 1.0033, "step": 7560 }, { "epoch": 6.700620017714792, "grad_norm": 0.25577282905578613, "learning_rate": 1e-05, "loss": 0.9682, "step": 7565 }, { "epoch": 6.705048715677591, "grad_norm": 0.34284737706184387, "learning_rate": 1e-05, "loss": 1.0181, "step": 7570 }, { "epoch": 6.70947741364039, "grad_norm": 0.41684550046920776, "learning_rate": 1e-05, "loss": 1.0298, "step": 7575 }, { "epoch": 6.713906111603189, "grad_norm": 0.257420152425766, "learning_rate": 1e-05, "loss": 0.9505, "step": 7580 }, { "epoch": 6.718334809565988, "grad_norm": 0.32291096448898315, "learning_rate": 1e-05, "loss": 1.0112, "step": 7585 }, { "epoch": 6.722763507528787, "grad_norm": 0.3305424451828003, "learning_rate": 1e-05, "loss": 1.0186, "step": 7590 }, { "epoch": 6.727192205491585, "grad_norm": 0.3456735908985138, "learning_rate": 1e-05, "loss": 1.0123, "step": 7595 }, { "epoch": 6.731620903454385, "grad_norm": 0.33171603083610535, "learning_rate": 1e-05, "loss": 0.9337, "step": 7600 }, { "epoch": 6.736049601417183, "grad_norm": 0.313313364982605, "learning_rate": 1e-05, "loss": 0.984, "step": 7605 }, { "epoch": 6.7404782993799826, "grad_norm": 0.3528946340084076, "learning_rate": 1e-05, "loss": 1.0641, "step": 7610 }, { "epoch": 6.744906997342781, "grad_norm": 0.36428534984588623, "learning_rate": 1e-05, "loss": 0.9758, "step": 7615 }, { "epoch": 6.7493356953055805, "grad_norm": 0.36096686124801636, "learning_rate": 1e-05, "loss": 1.0435, "step": 7620 }, { "epoch": 6.753764393268379, "grad_norm": 0.37182632088661194, "learning_rate": 1e-05, "loss": 1.0101, "step": 7625 }, { "epoch": 6.758193091231178, "grad_norm": 0.3743254244327545, "learning_rate": 1e-05, "loss": 0.9992, "step": 7630 }, { "epoch": 6.762621789193977, "grad_norm": 0.3137091398239136, "learning_rate": 1e-05, "loss": 1.0282, "step": 7635 }, { "epoch": 6.767050487156776, "grad_norm": 0.3580295741558075, "learning_rate": 1e-05, "loss": 1.061, "step": 7640 }, { "epoch": 6.771479185119575, "grad_norm": 0.27975258231163025, "learning_rate": 1e-05, "loss": 1.0164, "step": 7645 }, { "epoch": 6.775907883082374, "grad_norm": 0.29722923040390015, "learning_rate": 1e-05, "loss": 0.9753, "step": 7650 }, { "epoch": 6.780336581045173, "grad_norm": 0.3239089846611023, "learning_rate": 1e-05, "loss": 0.9629, "step": 7655 }, { "epoch": 6.7847652790079716, "grad_norm": 0.3350318670272827, "learning_rate": 1e-05, "loss": 1.0444, "step": 7660 }, { "epoch": 6.78919397697077, "grad_norm": 0.31723690032958984, "learning_rate": 1e-05, "loss": 0.9982, "step": 7665 }, { "epoch": 6.7936226749335695, "grad_norm": 0.333515465259552, "learning_rate": 1e-05, "loss": 1.0027, "step": 7670 }, { "epoch": 6.798051372896369, "grad_norm": 0.33454298973083496, "learning_rate": 1e-05, "loss": 1.0431, "step": 7675 }, { "epoch": 6.8024800708591675, "grad_norm": 0.338902086019516, "learning_rate": 1e-05, "loss": 1.0608, "step": 7680 }, { "epoch": 6.806908768821966, "grad_norm": 0.28785985708236694, "learning_rate": 1e-05, "loss": 0.9984, "step": 7685 }, { "epoch": 6.8113374667847655, "grad_norm": 0.31911033391952515, "learning_rate": 1e-05, "loss": 1.0382, "step": 7690 }, { "epoch": 6.815766164747564, "grad_norm": 0.31498414278030396, "learning_rate": 1e-05, "loss": 1.0059, "step": 7695 }, { "epoch": 6.8201948627103635, "grad_norm": 0.34170210361480713, "learning_rate": 1e-05, "loss": 1.0198, "step": 7700 }, { "epoch": 6.824623560673162, "grad_norm": 0.3100164830684662, "learning_rate": 1e-05, "loss": 0.9807, "step": 7705 }, { "epoch": 6.829052258635961, "grad_norm": 0.32382506132125854, "learning_rate": 1e-05, "loss": 1.0102, "step": 7710 }, { "epoch": 6.83348095659876, "grad_norm": 0.3402826488018036, "learning_rate": 1e-05, "loss": 1.0115, "step": 7715 }, { "epoch": 6.8379096545615585, "grad_norm": 0.3127281665802002, "learning_rate": 1e-05, "loss": 1.0236, "step": 7720 }, { "epoch": 6.842338352524358, "grad_norm": 0.2640603482723236, "learning_rate": 1e-05, "loss": 1.0534, "step": 7725 }, { "epoch": 6.8467670504871565, "grad_norm": 0.2885826528072357, "learning_rate": 1e-05, "loss": 1.0262, "step": 7730 }, { "epoch": 6.851195748449956, "grad_norm": 0.3311317563056946, "learning_rate": 1e-05, "loss": 0.9833, "step": 7735 }, { "epoch": 6.8556244464127545, "grad_norm": 0.3045313358306885, "learning_rate": 1e-05, "loss": 0.9695, "step": 7740 }, { "epoch": 6.860053144375554, "grad_norm": 0.28137636184692383, "learning_rate": 1e-05, "loss": 1.0326, "step": 7745 }, { "epoch": 6.8644818423383525, "grad_norm": 0.30333489179611206, "learning_rate": 1e-05, "loss": 1.0267, "step": 7750 }, { "epoch": 6.868910540301151, "grad_norm": 0.2819533050060272, "learning_rate": 1e-05, "loss": 0.9943, "step": 7755 }, { "epoch": 6.87333923826395, "grad_norm": 0.3045574426651001, "learning_rate": 1e-05, "loss": 1.0149, "step": 7760 }, { "epoch": 6.877767936226749, "grad_norm": 0.3403189480304718, "learning_rate": 1e-05, "loss": 1.0472, "step": 7765 }, { "epoch": 6.882196634189548, "grad_norm": 0.3321974277496338, "learning_rate": 1e-05, "loss": 0.9697, "step": 7770 }, { "epoch": 6.886625332152347, "grad_norm": 0.3050408661365509, "learning_rate": 1e-05, "loss": 0.9499, "step": 7775 }, { "epoch": 6.891054030115146, "grad_norm": 0.2941606044769287, "learning_rate": 1e-05, "loss": 0.973, "step": 7780 }, { "epoch": 6.895482728077945, "grad_norm": 0.27922290563583374, "learning_rate": 1e-05, "loss": 1.048, "step": 7785 }, { "epoch": 6.899911426040744, "grad_norm": 0.3234655559062958, "learning_rate": 1e-05, "loss": 0.9851, "step": 7790 }, { "epoch": 6.904340124003543, "grad_norm": 0.3068198263645172, "learning_rate": 1e-05, "loss": 0.9392, "step": 7795 }, { "epoch": 6.908768821966342, "grad_norm": 0.4504777789115906, "learning_rate": 1e-05, "loss": 1.0126, "step": 7800 }, { "epoch": 6.913197519929141, "grad_norm": 0.3687359094619751, "learning_rate": 1e-05, "loss": 1.0205, "step": 7805 }, { "epoch": 6.917626217891939, "grad_norm": 0.34978917241096497, "learning_rate": 1e-05, "loss": 1.1029, "step": 7810 }, { "epoch": 6.922054915854739, "grad_norm": 0.3262089490890503, "learning_rate": 1e-05, "loss": 0.9861, "step": 7815 }, { "epoch": 6.926483613817537, "grad_norm": 0.32655099034309387, "learning_rate": 1e-05, "loss": 0.9997, "step": 7820 }, { "epoch": 6.930912311780337, "grad_norm": 0.3247694969177246, "learning_rate": 1e-05, "loss": 1.0111, "step": 7825 }, { "epoch": 6.935341009743135, "grad_norm": 0.2828722894191742, "learning_rate": 1e-05, "loss": 1.0642, "step": 7830 }, { "epoch": 6.939769707705935, "grad_norm": 0.30532127618789673, "learning_rate": 1e-05, "loss": 1.024, "step": 7835 }, { "epoch": 6.944198405668733, "grad_norm": 0.36493581533432007, "learning_rate": 1e-05, "loss": 0.9987, "step": 7840 }, { "epoch": 6.948627103631532, "grad_norm": 0.272352397441864, "learning_rate": 1e-05, "loss": 1.0293, "step": 7845 }, { "epoch": 6.953055801594331, "grad_norm": 0.39458003640174866, "learning_rate": 1e-05, "loss": 1.0017, "step": 7850 }, { "epoch": 6.95748449955713, "grad_norm": 0.3530968129634857, "learning_rate": 1e-05, "loss": 0.986, "step": 7855 }, { "epoch": 6.961913197519929, "grad_norm": 0.3298160135746002, "learning_rate": 1e-05, "loss": 1.0299, "step": 7860 }, { "epoch": 6.966341895482728, "grad_norm": 0.2778833210468292, "learning_rate": 1e-05, "loss": 1.0361, "step": 7865 }, { "epoch": 6.970770593445527, "grad_norm": 0.26152563095092773, "learning_rate": 1e-05, "loss": 1.0089, "step": 7870 }, { "epoch": 6.975199291408326, "grad_norm": 0.31589120626449585, "learning_rate": 1e-05, "loss": 1.0162, "step": 7875 }, { "epoch": 6.979627989371125, "grad_norm": 0.30574697256088257, "learning_rate": 1e-05, "loss": 1.0062, "step": 7880 }, { "epoch": 6.984056687333924, "grad_norm": 0.32277002930641174, "learning_rate": 1e-05, "loss": 0.9945, "step": 7885 }, { "epoch": 6.988485385296723, "grad_norm": 0.28040793538093567, "learning_rate": 1e-05, "loss": 1.0278, "step": 7890 }, { "epoch": 6.992914083259522, "grad_norm": 0.3733680546283722, "learning_rate": 1e-05, "loss": 1.034, "step": 7895 }, { "epoch": 6.99734278122232, "grad_norm": 0.2930971086025238, "learning_rate": 1e-05, "loss": 0.989, "step": 7900 }, { "epoch": 7.00177147918512, "grad_norm": 0.31791961193084717, "learning_rate": 1e-05, "loss": 0.9855, "step": 7905 }, { "epoch": 7.006200177147918, "grad_norm": 0.3523207902908325, "learning_rate": 1e-05, "loss": 1.0546, "step": 7910 }, { "epoch": 7.010628875110718, "grad_norm": 0.38635241985321045, "learning_rate": 1e-05, "loss": 1.0197, "step": 7915 }, { "epoch": 7.015057573073516, "grad_norm": 0.31179079413414, "learning_rate": 1e-05, "loss": 1.041, "step": 7920 }, { "epoch": 7.019486271036316, "grad_norm": 0.3547854423522949, "learning_rate": 1e-05, "loss": 0.9777, "step": 7925 }, { "epoch": 7.023914968999114, "grad_norm": 0.32669755816459656, "learning_rate": 1e-05, "loss": 1.0128, "step": 7930 }, { "epoch": 7.028343666961913, "grad_norm": 0.45759037137031555, "learning_rate": 1e-05, "loss": 0.9982, "step": 7935 }, { "epoch": 7.032772364924712, "grad_norm": 0.329446405172348, "learning_rate": 1e-05, "loss": 0.974, "step": 7940 }, { "epoch": 7.037201062887511, "grad_norm": 0.38219141960144043, "learning_rate": 1e-05, "loss": 1.0455, "step": 7945 }, { "epoch": 7.04162976085031, "grad_norm": 0.3277440369129181, "learning_rate": 1e-05, "loss": 1.0127, "step": 7950 }, { "epoch": 7.046058458813109, "grad_norm": 0.44572392106056213, "learning_rate": 1e-05, "loss": 1.0251, "step": 7955 }, { "epoch": 7.050487156775908, "grad_norm": 0.31423547863960266, "learning_rate": 1e-05, "loss": 1.0217, "step": 7960 }, { "epoch": 7.054915854738707, "grad_norm": 0.40082406997680664, "learning_rate": 1e-05, "loss": 1.0375, "step": 7965 }, { "epoch": 7.059344552701506, "grad_norm": 0.34407326579093933, "learning_rate": 1e-05, "loss": 0.9795, "step": 7970 }, { "epoch": 7.063773250664305, "grad_norm": 0.33923476934432983, "learning_rate": 1e-05, "loss": 0.9909, "step": 7975 }, { "epoch": 7.068201948627103, "grad_norm": 0.27645084261894226, "learning_rate": 1e-05, "loss": 1.0362, "step": 7980 }, { "epoch": 7.072630646589903, "grad_norm": 0.3475607931613922, "learning_rate": 1e-05, "loss": 1.0956, "step": 7985 }, { "epoch": 7.077059344552701, "grad_norm": 0.3091600835323334, "learning_rate": 1e-05, "loss": 1.0392, "step": 7990 }, { "epoch": 7.081488042515501, "grad_norm": 0.3222597539424896, "learning_rate": 1e-05, "loss": 0.9509, "step": 7995 }, { "epoch": 7.085916740478299, "grad_norm": 0.30701637268066406, "learning_rate": 1e-05, "loss": 1.0434, "step": 8000 }, { "epoch": 7.090345438441099, "grad_norm": 0.3054087162017822, "learning_rate": 1e-05, "loss": 1.0144, "step": 8005 }, { "epoch": 7.094774136403897, "grad_norm": 0.3620450794696808, "learning_rate": 1e-05, "loss": 1.0468, "step": 8010 }, { "epoch": 7.099202834366697, "grad_norm": 0.34009912610054016, "learning_rate": 1e-05, "loss": 0.9934, "step": 8015 }, { "epoch": 7.103631532329495, "grad_norm": 0.2626882493495941, "learning_rate": 1e-05, "loss": 1.0293, "step": 8020 }, { "epoch": 7.108060230292294, "grad_norm": 0.2886578440666199, "learning_rate": 1e-05, "loss": 0.9769, "step": 8025 }, { "epoch": 7.112488928255093, "grad_norm": 0.3738825023174286, "learning_rate": 1e-05, "loss": 0.9659, "step": 8030 }, { "epoch": 7.116917626217892, "grad_norm": 0.31492140889167786, "learning_rate": 1e-05, "loss": 0.9931, "step": 8035 }, { "epoch": 7.121346324180691, "grad_norm": 0.32313334941864014, "learning_rate": 1e-05, "loss": 1.0173, "step": 8040 }, { "epoch": 7.12577502214349, "grad_norm": 0.2910026013851166, "learning_rate": 1e-05, "loss": 1.04, "step": 8045 }, { "epoch": 7.130203720106289, "grad_norm": 0.3253958225250244, "learning_rate": 1e-05, "loss": 0.9959, "step": 8050 }, { "epoch": 7.134632418069088, "grad_norm": 0.3761049211025238, "learning_rate": 1e-05, "loss": 0.9976, "step": 8055 }, { "epoch": 7.139061116031886, "grad_norm": 0.3223036825656891, "learning_rate": 1e-05, "loss": 1.0268, "step": 8060 }, { "epoch": 7.143489813994686, "grad_norm": 0.3698316514492035, "learning_rate": 1e-05, "loss": 0.9987, "step": 8065 }, { "epoch": 7.147918511957484, "grad_norm": 0.28210216760635376, "learning_rate": 1e-05, "loss": 1.108, "step": 8070 }, { "epoch": 7.152347209920284, "grad_norm": 0.2705000340938568, "learning_rate": 1e-05, "loss": 1.0323, "step": 8075 }, { "epoch": 7.156775907883082, "grad_norm": 0.31756705045700073, "learning_rate": 1e-05, "loss": 0.9981, "step": 8080 }, { "epoch": 7.161204605845882, "grad_norm": 0.34153518080711365, "learning_rate": 1e-05, "loss": 1.003, "step": 8085 }, { "epoch": 7.16563330380868, "grad_norm": 0.2473386526107788, "learning_rate": 1e-05, "loss": 0.9883, "step": 8090 }, { "epoch": 7.17006200177148, "grad_norm": 0.35500189661979675, "learning_rate": 1e-05, "loss": 0.9831, "step": 8095 }, { "epoch": 7.174490699734278, "grad_norm": 0.33213868737220764, "learning_rate": 1e-05, "loss": 1.0164, "step": 8100 }, { "epoch": 7.178919397697077, "grad_norm": 0.34023064374923706, "learning_rate": 1e-05, "loss": 1.0471, "step": 8105 }, { "epoch": 7.183348095659876, "grad_norm": 0.3616093695163727, "learning_rate": 1e-05, "loss": 1.0574, "step": 8110 }, { "epoch": 7.187776793622675, "grad_norm": 0.32585567235946655, "learning_rate": 1e-05, "loss": 1.0138, "step": 8115 }, { "epoch": 7.192205491585474, "grad_norm": 0.3519092798233032, "learning_rate": 1e-05, "loss": 1.0766, "step": 8120 }, { "epoch": 7.196634189548273, "grad_norm": 0.4222314655780792, "learning_rate": 1e-05, "loss": 0.9954, "step": 8125 }, { "epoch": 7.201062887511072, "grad_norm": 0.3706689774990082, "learning_rate": 1e-05, "loss": 0.9353, "step": 8130 }, { "epoch": 7.205491585473871, "grad_norm": 0.31903958320617676, "learning_rate": 1e-05, "loss": 0.9583, "step": 8135 }, { "epoch": 7.20992028343667, "grad_norm": 0.30987489223480225, "learning_rate": 1e-05, "loss": 1.0066, "step": 8140 }, { "epoch": 7.214348981399469, "grad_norm": 0.32475340366363525, "learning_rate": 1e-05, "loss": 1.0215, "step": 8145 }, { "epoch": 7.218777679362267, "grad_norm": 0.3950023651123047, "learning_rate": 1e-05, "loss": 1.0435, "step": 8150 }, { "epoch": 7.2232063773250665, "grad_norm": 0.31994789838790894, "learning_rate": 1e-05, "loss": 1.0335, "step": 8155 }, { "epoch": 7.227635075287865, "grad_norm": 0.35178259015083313, "learning_rate": 1e-05, "loss": 0.9803, "step": 8160 }, { "epoch": 7.2320637732506645, "grad_norm": 0.2776545584201813, "learning_rate": 1e-05, "loss": 0.9995, "step": 8165 }, { "epoch": 7.236492471213463, "grad_norm": 0.35874345898628235, "learning_rate": 1e-05, "loss": 1.0581, "step": 8170 }, { "epoch": 7.2409211691762625, "grad_norm": 0.40755918622016907, "learning_rate": 1e-05, "loss": 0.975, "step": 8175 }, { "epoch": 7.245349867139061, "grad_norm": 0.30432066321372986, "learning_rate": 1e-05, "loss": 0.9568, "step": 8180 }, { "epoch": 7.2497785651018605, "grad_norm": 0.2718200385570526, "learning_rate": 1e-05, "loss": 1.0136, "step": 8185 }, { "epoch": 7.254207263064659, "grad_norm": 0.35999277234077454, "learning_rate": 1e-05, "loss": 1.0021, "step": 8190 }, { "epoch": 7.258635961027458, "grad_norm": 0.33571064472198486, "learning_rate": 1e-05, "loss": 1.026, "step": 8195 }, { "epoch": 7.263064658990257, "grad_norm": 0.41914239525794983, "learning_rate": 1e-05, "loss": 0.9991, "step": 8200 }, { "epoch": 7.2674933569530555, "grad_norm": 0.32580021023750305, "learning_rate": 1e-05, "loss": 1.0225, "step": 8205 }, { "epoch": 7.271922054915855, "grad_norm": 0.3584657311439514, "learning_rate": 1e-05, "loss": 1.0202, "step": 8210 }, { "epoch": 7.2763507528786535, "grad_norm": 0.3185144364833832, "learning_rate": 1e-05, "loss": 0.9912, "step": 8215 }, { "epoch": 7.280779450841453, "grad_norm": 0.322122722864151, "learning_rate": 1e-05, "loss": 1.0109, "step": 8220 }, { "epoch": 7.2852081488042515, "grad_norm": 0.37027859687805176, "learning_rate": 1e-05, "loss": 0.9873, "step": 8225 }, { "epoch": 7.289636846767051, "grad_norm": 0.3315444886684418, "learning_rate": 1e-05, "loss": 0.9697, "step": 8230 }, { "epoch": 7.2940655447298495, "grad_norm": 0.312578946352005, "learning_rate": 1e-05, "loss": 0.9985, "step": 8235 }, { "epoch": 7.298494242692648, "grad_norm": 0.318161278963089, "learning_rate": 1e-05, "loss": 1.0067, "step": 8240 }, { "epoch": 7.3029229406554474, "grad_norm": 0.2673146426677704, "learning_rate": 1e-05, "loss": 1.0089, "step": 8245 }, { "epoch": 7.307351638618246, "grad_norm": 0.3477378785610199, "learning_rate": 1e-05, "loss": 1.0211, "step": 8250 }, { "epoch": 7.311780336581045, "grad_norm": 0.2590877115726471, "learning_rate": 1e-05, "loss": 1.0068, "step": 8255 }, { "epoch": 7.316209034543844, "grad_norm": 0.34417563676834106, "learning_rate": 1e-05, "loss": 1.009, "step": 8260 }, { "epoch": 7.320637732506643, "grad_norm": 0.2503780126571655, "learning_rate": 1e-05, "loss": 0.9943, "step": 8265 }, { "epoch": 7.325066430469442, "grad_norm": 0.2567213177680969, "learning_rate": 1e-05, "loss": 0.9934, "step": 8270 }, { "epoch": 7.3294951284322405, "grad_norm": 0.3780187666416168, "learning_rate": 1e-05, "loss": 1.0241, "step": 8275 }, { "epoch": 7.33392382639504, "grad_norm": 0.34917977452278137, "learning_rate": 1e-05, "loss": 1.0263, "step": 8280 }, { "epoch": 7.3383525243578385, "grad_norm": 0.33574095368385315, "learning_rate": 1e-05, "loss": 1.0138, "step": 8285 }, { "epoch": 7.342781222320638, "grad_norm": 0.3511805236339569, "learning_rate": 1e-05, "loss": 0.9877, "step": 8290 }, { "epoch": 7.3472099202834364, "grad_norm": 0.3084450364112854, "learning_rate": 1e-05, "loss": 1.0196, "step": 8295 }, { "epoch": 7.351638618246236, "grad_norm": 0.38242924213409424, "learning_rate": 1e-05, "loss": 1.0022, "step": 8300 }, { "epoch": 7.356067316209034, "grad_norm": 0.37005117535591125, "learning_rate": 1e-05, "loss": 0.9708, "step": 8305 }, { "epoch": 7.360496014171834, "grad_norm": 0.34039926528930664, "learning_rate": 1e-05, "loss": 1.0064, "step": 8310 }, { "epoch": 7.364924712134632, "grad_norm": 0.37982606887817383, "learning_rate": 1e-05, "loss": 1.0188, "step": 8315 }, { "epoch": 7.369353410097431, "grad_norm": 0.37362316250801086, "learning_rate": 1e-05, "loss": 1.0159, "step": 8320 }, { "epoch": 7.37378210806023, "grad_norm": 0.28669190406799316, "learning_rate": 1e-05, "loss": 1.0134, "step": 8325 }, { "epoch": 7.378210806023029, "grad_norm": 0.2529672384262085, "learning_rate": 1e-05, "loss": 1.0548, "step": 8330 }, { "epoch": 7.382639503985828, "grad_norm": 0.28348371386528015, "learning_rate": 1e-05, "loss": 0.9809, "step": 8335 }, { "epoch": 7.387068201948627, "grad_norm": 0.34466010332107544, "learning_rate": 1e-05, "loss": 1.0318, "step": 8340 }, { "epoch": 7.391496899911426, "grad_norm": 0.28888288140296936, "learning_rate": 1e-05, "loss": 1.0145, "step": 8345 }, { "epoch": 7.395925597874225, "grad_norm": 0.3542207181453705, "learning_rate": 1e-05, "loss": 0.9818, "step": 8350 }, { "epoch": 7.400354295837024, "grad_norm": 0.2847772538661957, "learning_rate": 1e-05, "loss": 0.9832, "step": 8355 }, { "epoch": 7.404782993799823, "grad_norm": 0.32181715965270996, "learning_rate": 1e-05, "loss": 0.9866, "step": 8360 }, { "epoch": 7.409211691762621, "grad_norm": 0.34742075204849243, "learning_rate": 1e-05, "loss": 1.0237, "step": 8365 }, { "epoch": 7.413640389725421, "grad_norm": 0.3222198784351349, "learning_rate": 1e-05, "loss": 1.0181, "step": 8370 }, { "epoch": 7.418069087688219, "grad_norm": 0.38159647583961487, "learning_rate": 1e-05, "loss": 1.0122, "step": 8375 }, { "epoch": 7.422497785651019, "grad_norm": 0.25137943029403687, "learning_rate": 1e-05, "loss": 1.0101, "step": 8380 }, { "epoch": 7.426926483613817, "grad_norm": 0.24861066043376923, "learning_rate": 1e-05, "loss": 0.9878, "step": 8385 }, { "epoch": 7.431355181576617, "grad_norm": 0.31143897771835327, "learning_rate": 1e-05, "loss": 1.0321, "step": 8390 }, { "epoch": 7.435783879539415, "grad_norm": 0.43090397119522095, "learning_rate": 1e-05, "loss": 0.9907, "step": 8395 }, { "epoch": 7.440212577502215, "grad_norm": 0.3416902422904968, "learning_rate": 1e-05, "loss": 0.9662, "step": 8400 }, { "epoch": 7.444641275465013, "grad_norm": 0.3006885051727295, "learning_rate": 1e-05, "loss": 1.0134, "step": 8405 }, { "epoch": 7.449069973427812, "grad_norm": 0.304620623588562, "learning_rate": 1e-05, "loss": 0.9867, "step": 8410 }, { "epoch": 7.453498671390611, "grad_norm": 0.38202860951423645, "learning_rate": 1e-05, "loss": 1.0121, "step": 8415 }, { "epoch": 7.45792736935341, "grad_norm": 0.3281434178352356, "learning_rate": 1e-05, "loss": 1.0343, "step": 8420 }, { "epoch": 7.462356067316209, "grad_norm": 0.31972700357437134, "learning_rate": 1e-05, "loss": 1.0127, "step": 8425 }, { "epoch": 7.466784765279008, "grad_norm": 0.37000319361686707, "learning_rate": 1e-05, "loss": 1.007, "step": 8430 }, { "epoch": 7.471213463241807, "grad_norm": 0.27409106492996216, "learning_rate": 1e-05, "loss": 0.9468, "step": 8435 }, { "epoch": 7.475642161204606, "grad_norm": 0.2912721335887909, "learning_rate": 1e-05, "loss": 1.0066, "step": 8440 }, { "epoch": 7.480070859167405, "grad_norm": 0.3768261969089508, "learning_rate": 1e-05, "loss": 0.9686, "step": 8445 }, { "epoch": 7.484499557130204, "grad_norm": 0.33346015214920044, "learning_rate": 1e-05, "loss": 0.9821, "step": 8450 }, { "epoch": 7.488928255093002, "grad_norm": 0.34781017899513245, "learning_rate": 1e-05, "loss": 1.0218, "step": 8455 }, { "epoch": 7.493356953055802, "grad_norm": 0.27549079060554504, "learning_rate": 1e-05, "loss": 1.0436, "step": 8460 }, { "epoch": 7.4977856510186, "grad_norm": 0.33016541600227356, "learning_rate": 1e-05, "loss": 0.9589, "step": 8465 }, { "epoch": 7.5022143489814, "grad_norm": 0.36840030550956726, "learning_rate": 1e-05, "loss": 1.0258, "step": 8470 }, { "epoch": 7.506643046944198, "grad_norm": 0.33794450759887695, "learning_rate": 1e-05, "loss": 0.9988, "step": 8475 }, { "epoch": 7.511071744906998, "grad_norm": 0.34260109066963196, "learning_rate": 1e-05, "loss": 1.0279, "step": 8480 }, { "epoch": 7.515500442869796, "grad_norm": 0.3097268044948578, "learning_rate": 1e-05, "loss": 1.0851, "step": 8485 }, { "epoch": 7.519929140832595, "grad_norm": 0.3689298629760742, "learning_rate": 1e-05, "loss": 0.9759, "step": 8490 }, { "epoch": 7.524357838795394, "grad_norm": 0.30302244424819946, "learning_rate": 1e-05, "loss": 0.9675, "step": 8495 }, { "epoch": 7.528786536758193, "grad_norm": 0.40244540572166443, "learning_rate": 1e-05, "loss": 1.0305, "step": 8500 }, { "epoch": 7.533215234720992, "grad_norm": 0.3475583791732788, "learning_rate": 1e-05, "loss": 0.9566, "step": 8505 }, { "epoch": 7.537643932683791, "grad_norm": 0.3201814889907837, "learning_rate": 1e-05, "loss": 0.9998, "step": 8510 }, { "epoch": 7.54207263064659, "grad_norm": 0.3686660826206207, "learning_rate": 1e-05, "loss": 1.0371, "step": 8515 }, { "epoch": 7.546501328609389, "grad_norm": 0.3371261954307556, "learning_rate": 1e-05, "loss": 1.0725, "step": 8520 }, { "epoch": 7.550930026572188, "grad_norm": 0.29111093282699585, "learning_rate": 1e-05, "loss": 0.9772, "step": 8525 }, { "epoch": 7.555358724534987, "grad_norm": 0.2628686726093292, "learning_rate": 1e-05, "loss": 0.9965, "step": 8530 }, { "epoch": 7.559787422497786, "grad_norm": 0.28007084131240845, "learning_rate": 1e-05, "loss": 0.9533, "step": 8535 }, { "epoch": 7.564216120460585, "grad_norm": 0.27675244212150574, "learning_rate": 1e-05, "loss": 1.0147, "step": 8540 }, { "epoch": 7.568644818423383, "grad_norm": 0.31111079454421997, "learning_rate": 1e-05, "loss": 1.0445, "step": 8545 }, { "epoch": 7.573073516386183, "grad_norm": 0.3076365888118744, "learning_rate": 1e-05, "loss": 1.0069, "step": 8550 }, { "epoch": 7.577502214348981, "grad_norm": 0.2831222116947174, "learning_rate": 1e-05, "loss": 0.9745, "step": 8555 }, { "epoch": 7.581930912311781, "grad_norm": 0.2999606132507324, "learning_rate": 1e-05, "loss": 0.9946, "step": 8560 }, { "epoch": 7.586359610274579, "grad_norm": 0.356234073638916, "learning_rate": 1e-05, "loss": 1.034, "step": 8565 }, { "epoch": 7.590788308237379, "grad_norm": 0.4144793748855591, "learning_rate": 1e-05, "loss": 0.9582, "step": 8570 }, { "epoch": 7.595217006200177, "grad_norm": 0.3226306140422821, "learning_rate": 1e-05, "loss": 0.9452, "step": 8575 }, { "epoch": 7.599645704162976, "grad_norm": 0.2927561104297638, "learning_rate": 1e-05, "loss": 1.0042, "step": 8580 }, { "epoch": 7.604074402125775, "grad_norm": 0.358500599861145, "learning_rate": 1e-05, "loss": 0.9899, "step": 8585 }, { "epoch": 7.608503100088574, "grad_norm": 0.3640202283859253, "learning_rate": 1e-05, "loss": 1.0368, "step": 8590 }, { "epoch": 7.612931798051373, "grad_norm": 0.3261796534061432, "learning_rate": 1e-05, "loss": 0.9494, "step": 8595 }, { "epoch": 7.617360496014172, "grad_norm": 0.3256380558013916, "learning_rate": 1e-05, "loss": 1.0681, "step": 8600 }, { "epoch": 7.621789193976971, "grad_norm": 0.329542875289917, "learning_rate": 1e-05, "loss": 1.0504, "step": 8605 }, { "epoch": 7.62621789193977, "grad_norm": 0.31438469886779785, "learning_rate": 1e-05, "loss": 0.9961, "step": 8610 }, { "epoch": 7.630646589902568, "grad_norm": 0.24086038768291473, "learning_rate": 1e-05, "loss": 0.9906, "step": 8615 }, { "epoch": 7.635075287865368, "grad_norm": 0.32759779691696167, "learning_rate": 1e-05, "loss": 0.9836, "step": 8620 }, { "epoch": 7.639503985828166, "grad_norm": 0.3202802538871765, "learning_rate": 1e-05, "loss": 1.0028, "step": 8625 }, { "epoch": 7.643932683790966, "grad_norm": 0.2695578336715698, "learning_rate": 1e-05, "loss": 0.9744, "step": 8630 }, { "epoch": 7.648361381753764, "grad_norm": 0.25730758905410767, "learning_rate": 1e-05, "loss": 1.0163, "step": 8635 }, { "epoch": 7.6527900797165636, "grad_norm": 0.32701051235198975, "learning_rate": 1e-05, "loss": 0.9552, "step": 8640 }, { "epoch": 7.657218777679362, "grad_norm": 0.24104008078575134, "learning_rate": 1e-05, "loss": 0.9582, "step": 8645 }, { "epoch": 7.6616474756421615, "grad_norm": 0.35857489705085754, "learning_rate": 1e-05, "loss": 1.0023, "step": 8650 }, { "epoch": 7.66607617360496, "grad_norm": 0.3006949722766876, "learning_rate": 1e-05, "loss": 1.0431, "step": 8655 }, { "epoch": 7.6705048715677595, "grad_norm": 0.3171907663345337, "learning_rate": 1e-05, "loss": 1.0104, "step": 8660 }, { "epoch": 7.674933569530558, "grad_norm": 0.3297688663005829, "learning_rate": 1e-05, "loss": 1.0297, "step": 8665 }, { "epoch": 7.679362267493357, "grad_norm": 0.3140852451324463, "learning_rate": 1e-05, "loss": 1.0065, "step": 8670 }, { "epoch": 7.683790965456156, "grad_norm": 0.3075326979160309, "learning_rate": 1e-05, "loss": 1.0103, "step": 8675 }, { "epoch": 7.688219663418955, "grad_norm": 0.39814651012420654, "learning_rate": 1e-05, "loss": 1.0125, "step": 8680 }, { "epoch": 7.692648361381754, "grad_norm": 0.3181552588939667, "learning_rate": 1e-05, "loss": 1.0139, "step": 8685 }, { "epoch": 7.6970770593445526, "grad_norm": 0.3182726800441742, "learning_rate": 1e-05, "loss": 1.0288, "step": 8690 }, { "epoch": 7.701505757307352, "grad_norm": 0.3198147714138031, "learning_rate": 1e-05, "loss": 1.0354, "step": 8695 }, { "epoch": 7.7059344552701505, "grad_norm": 0.29476943612098694, "learning_rate": 1e-05, "loss": 1.0229, "step": 8700 }, { "epoch": 7.710363153232949, "grad_norm": 0.2936825752258301, "learning_rate": 1e-05, "loss": 0.9679, "step": 8705 }, { "epoch": 7.7147918511957485, "grad_norm": 0.29142528772354126, "learning_rate": 1e-05, "loss": 1.0325, "step": 8710 }, { "epoch": 7.719220549158547, "grad_norm": 0.30274105072021484, "learning_rate": 1e-05, "loss": 1.0506, "step": 8715 }, { "epoch": 7.7236492471213465, "grad_norm": 0.2681039869785309, "learning_rate": 1e-05, "loss": 1.0536, "step": 8720 }, { "epoch": 7.728077945084145, "grad_norm": 0.3139670491218567, "learning_rate": 1e-05, "loss": 1.0238, "step": 8725 }, { "epoch": 7.7325066430469445, "grad_norm": 0.28641027212142944, "learning_rate": 1e-05, "loss": 0.9751, "step": 8730 }, { "epoch": 7.736935341009743, "grad_norm": 0.31871822476387024, "learning_rate": 1e-05, "loss": 0.9938, "step": 8735 }, { "epoch": 7.741364038972542, "grad_norm": 0.3115043342113495, "learning_rate": 1e-05, "loss": 1.0003, "step": 8740 }, { "epoch": 7.745792736935341, "grad_norm": 0.2972046136856079, "learning_rate": 1e-05, "loss": 1.0038, "step": 8745 }, { "epoch": 7.75022143489814, "grad_norm": 0.2684633135795593, "learning_rate": 1e-05, "loss": 1.0223, "step": 8750 }, { "epoch": 7.754650132860939, "grad_norm": 0.363823264837265, "learning_rate": 1e-05, "loss": 1.0181, "step": 8755 }, { "epoch": 7.7590788308237375, "grad_norm": 0.387590229511261, "learning_rate": 1e-05, "loss": 1.0107, "step": 8760 }, { "epoch": 7.763507528786537, "grad_norm": 0.3175017535686493, "learning_rate": 1e-05, "loss": 0.9967, "step": 8765 }, { "epoch": 7.7679362267493355, "grad_norm": 0.28583285212516785, "learning_rate": 1e-05, "loss": 1.0126, "step": 8770 }, { "epoch": 7.772364924712135, "grad_norm": 0.29801133275032043, "learning_rate": 1e-05, "loss": 1.0211, "step": 8775 }, { "epoch": 7.7767936226749335, "grad_norm": 0.34351205825805664, "learning_rate": 1e-05, "loss": 1.0459, "step": 8780 }, { "epoch": 7.781222320637733, "grad_norm": 0.3609464168548584, "learning_rate": 1e-05, "loss": 1.0085, "step": 8785 }, { "epoch": 7.785651018600531, "grad_norm": 0.33855128288269043, "learning_rate": 1e-05, "loss": 1.0092, "step": 8790 }, { "epoch": 7.79007971656333, "grad_norm": 0.2705983519554138, "learning_rate": 1e-05, "loss": 0.9871, "step": 8795 }, { "epoch": 7.794508414526129, "grad_norm": 0.2934018075466156, "learning_rate": 1e-05, "loss": 1.0837, "step": 8800 }, { "epoch": 7.798937112488928, "grad_norm": 0.3145749866962433, "learning_rate": 1e-05, "loss": 1.0841, "step": 8805 }, { "epoch": 7.803365810451727, "grad_norm": 0.3640284836292267, "learning_rate": 1e-05, "loss": 0.9973, "step": 8810 }, { "epoch": 7.807794508414526, "grad_norm": 0.3094344437122345, "learning_rate": 1e-05, "loss": 1.0228, "step": 8815 }, { "epoch": 7.812223206377325, "grad_norm": 0.39733627438545227, "learning_rate": 1e-05, "loss": 1.0396, "step": 8820 }, { "epoch": 7.816651904340124, "grad_norm": 0.3264067471027374, "learning_rate": 1e-05, "loss": 0.9987, "step": 8825 }, { "epoch": 7.8210806023029225, "grad_norm": 0.40689459443092346, "learning_rate": 1e-05, "loss": 1.003, "step": 8830 }, { "epoch": 7.825509300265722, "grad_norm": 0.29561591148376465, "learning_rate": 1e-05, "loss": 1.0402, "step": 8835 }, { "epoch": 7.82993799822852, "grad_norm": 0.3005363941192627, "learning_rate": 1e-05, "loss": 0.9929, "step": 8840 }, { "epoch": 7.83436669619132, "grad_norm": 0.3895195424556732, "learning_rate": 1e-05, "loss": 1.0014, "step": 8845 }, { "epoch": 7.838795394154118, "grad_norm": 0.28007766604423523, "learning_rate": 1e-05, "loss": 1.0308, "step": 8850 }, { "epoch": 7.843224092116918, "grad_norm": 0.32203447818756104, "learning_rate": 1e-05, "loss": 1.0309, "step": 8855 }, { "epoch": 7.847652790079716, "grad_norm": 0.3152931034564972, "learning_rate": 1e-05, "loss": 1.0144, "step": 8860 }, { "epoch": 7.852081488042516, "grad_norm": 0.41865599155426025, "learning_rate": 1e-05, "loss": 1.0489, "step": 8865 }, { "epoch": 7.856510186005314, "grad_norm": 0.30333995819091797, "learning_rate": 1e-05, "loss": 1.0491, "step": 8870 }, { "epoch": 7.860938883968114, "grad_norm": 0.2809825539588928, "learning_rate": 1e-05, "loss": 1.0654, "step": 8875 }, { "epoch": 7.865367581930912, "grad_norm": 0.2574847340583801, "learning_rate": 1e-05, "loss": 1.0021, "step": 8880 }, { "epoch": 7.869796279893711, "grad_norm": 0.2967134416103363, "learning_rate": 1e-05, "loss": 1.0506, "step": 8885 }, { "epoch": 7.87422497785651, "grad_norm": 0.303195059299469, "learning_rate": 1e-05, "loss": 1.0594, "step": 8890 }, { "epoch": 7.878653675819309, "grad_norm": 0.30553334951400757, "learning_rate": 1e-05, "loss": 0.9887, "step": 8895 }, { "epoch": 7.883082373782108, "grad_norm": 0.46372443437576294, "learning_rate": 1e-05, "loss": 0.9888, "step": 8900 }, { "epoch": 7.887511071744907, "grad_norm": 0.34033286571502686, "learning_rate": 1e-05, "loss": 0.951, "step": 8905 }, { "epoch": 7.891939769707706, "grad_norm": 0.32112789154052734, "learning_rate": 1e-05, "loss": 1.0511, "step": 8910 }, { "epoch": 7.896368467670505, "grad_norm": 0.2972681522369385, "learning_rate": 1e-05, "loss": 1.0184, "step": 8915 }, { "epoch": 7.900797165633303, "grad_norm": 0.3520476520061493, "learning_rate": 1e-05, "loss": 1.0683, "step": 8920 }, { "epoch": 7.905225863596103, "grad_norm": 0.2974855601787567, "learning_rate": 1e-05, "loss": 0.9574, "step": 8925 }, { "epoch": 7.909654561558901, "grad_norm": 0.29914185404777527, "learning_rate": 1e-05, "loss": 1.0145, "step": 8930 }, { "epoch": 7.914083259521701, "grad_norm": 0.3232276439666748, "learning_rate": 1e-05, "loss": 1.0336, "step": 8935 }, { "epoch": 7.918511957484499, "grad_norm": 0.3576143682003021, "learning_rate": 1e-05, "loss": 1.0301, "step": 8940 }, { "epoch": 7.922940655447299, "grad_norm": 0.34765493869781494, "learning_rate": 1e-05, "loss": 1.0382, "step": 8945 }, { "epoch": 7.927369353410097, "grad_norm": 0.3478412926197052, "learning_rate": 1e-05, "loss": 1.0287, "step": 8950 }, { "epoch": 7.931798051372897, "grad_norm": 0.33409714698791504, "learning_rate": 1e-05, "loss": 0.9929, "step": 8955 }, { "epoch": 7.936226749335695, "grad_norm": 0.282105416059494, "learning_rate": 1e-05, "loss": 1.0038, "step": 8960 }, { "epoch": 7.940655447298495, "grad_norm": 0.35349369049072266, "learning_rate": 1e-05, "loss": 1.0158, "step": 8965 }, { "epoch": 7.945084145261293, "grad_norm": 0.27753743529319763, "learning_rate": 1e-05, "loss": 0.9748, "step": 8970 }, { "epoch": 7.949512843224092, "grad_norm": 0.34601280093193054, "learning_rate": 1e-05, "loss": 1.054, "step": 8975 }, { "epoch": 7.953941541186891, "grad_norm": 0.35734254121780396, "learning_rate": 1e-05, "loss": 1.0565, "step": 8980 }, { "epoch": 7.95837023914969, "grad_norm": 0.3318418860435486, "learning_rate": 1e-05, "loss": 1.025, "step": 8985 }, { "epoch": 7.962798937112489, "grad_norm": 0.33603084087371826, "learning_rate": 1e-05, "loss": 1.0133, "step": 8990 }, { "epoch": 7.967227635075288, "grad_norm": 0.2874302864074707, "learning_rate": 1e-05, "loss": 1.0069, "step": 8995 }, { "epoch": 7.971656333038087, "grad_norm": 0.3076196312904358, "learning_rate": 1e-05, "loss": 1.0089, "step": 9000 }, { "epoch": 7.976085031000886, "grad_norm": 0.3234718441963196, "learning_rate": 1e-05, "loss": 1.0071, "step": 9005 }, { "epoch": 7.980513728963684, "grad_norm": 0.25036486983299255, "learning_rate": 1e-05, "loss": 1.0067, "step": 9010 }, { "epoch": 7.984942426926484, "grad_norm": 0.36438146233558655, "learning_rate": 1e-05, "loss": 1.029, "step": 9015 }, { "epoch": 7.989371124889282, "grad_norm": 0.2823215425014496, "learning_rate": 1e-05, "loss": 0.9732, "step": 9020 }, { "epoch": 7.993799822852082, "grad_norm": 0.32932335138320923, "learning_rate": 1e-05, "loss": 0.9747, "step": 9025 }, { "epoch": 7.99822852081488, "grad_norm": 0.30196577310562134, "learning_rate": 1e-05, "loss": 0.9831, "step": 9030 }, { "epoch": 8.00265721877768, "grad_norm": 0.3501168489456177, "learning_rate": 1e-05, "loss": 0.9793, "step": 9035 }, { "epoch": 8.007085916740479, "grad_norm": 0.2672116458415985, "learning_rate": 1e-05, "loss": 1.0006, "step": 9040 }, { "epoch": 8.011514614703277, "grad_norm": 0.3172587454319, "learning_rate": 1e-05, "loss": 1.0075, "step": 9045 }, { "epoch": 8.015943312666076, "grad_norm": 0.33237701654434204, "learning_rate": 1e-05, "loss": 0.9905, "step": 9050 }, { "epoch": 8.020372010628876, "grad_norm": 0.3432263731956482, "learning_rate": 1e-05, "loss": 0.955, "step": 9055 }, { "epoch": 8.024800708591673, "grad_norm": 0.36441463232040405, "learning_rate": 1e-05, "loss": 0.9907, "step": 9060 }, { "epoch": 8.029229406554473, "grad_norm": 0.4518466889858246, "learning_rate": 1e-05, "loss": 1.0188, "step": 9065 }, { "epoch": 8.033658104517272, "grad_norm": 0.40251004695892334, "learning_rate": 1e-05, "loss": 0.9885, "step": 9070 }, { "epoch": 8.038086802480072, "grad_norm": 0.3918655514717102, "learning_rate": 1e-05, "loss": 1.0409, "step": 9075 }, { "epoch": 8.04251550044287, "grad_norm": 0.36182475090026855, "learning_rate": 1e-05, "loss": 0.9704, "step": 9080 }, { "epoch": 8.046944198405669, "grad_norm": 0.3470155596733093, "learning_rate": 1e-05, "loss": 1.0252, "step": 9085 }, { "epoch": 8.051372896368468, "grad_norm": 0.4093534052371979, "learning_rate": 1e-05, "loss": 1.0261, "step": 9090 }, { "epoch": 8.055801594331266, "grad_norm": 0.32986247539520264, "learning_rate": 1e-05, "loss": 0.9751, "step": 9095 }, { "epoch": 8.060230292294065, "grad_norm": 0.32325616478919983, "learning_rate": 1e-05, "loss": 1.0691, "step": 9100 }, { "epoch": 8.064658990256865, "grad_norm": 0.30846190452575684, "learning_rate": 1e-05, "loss": 1.0049, "step": 9105 }, { "epoch": 8.069087688219664, "grad_norm": 0.3124985098838806, "learning_rate": 1e-05, "loss": 0.9709, "step": 9110 }, { "epoch": 8.073516386182462, "grad_norm": 0.29013264179229736, "learning_rate": 1e-05, "loss": 0.961, "step": 9115 }, { "epoch": 8.077945084145261, "grad_norm": 0.3013417422771454, "learning_rate": 1e-05, "loss": 1.0087, "step": 9120 }, { "epoch": 8.08237378210806, "grad_norm": 0.2700832486152649, "learning_rate": 1e-05, "loss": 1.0237, "step": 9125 }, { "epoch": 8.08680248007086, "grad_norm": 0.30386435985565186, "learning_rate": 1e-05, "loss": 0.9758, "step": 9130 }, { "epoch": 8.091231178033658, "grad_norm": 0.4277620315551758, "learning_rate": 1e-05, "loss": 1.0068, "step": 9135 }, { "epoch": 8.095659875996457, "grad_norm": 0.32337477803230286, "learning_rate": 1e-05, "loss": 1.0262, "step": 9140 }, { "epoch": 8.100088573959257, "grad_norm": 0.2845984995365143, "learning_rate": 1e-05, "loss": 1.0359, "step": 9145 }, { "epoch": 8.104517271922054, "grad_norm": 0.347700834274292, "learning_rate": 1e-05, "loss": 0.962, "step": 9150 }, { "epoch": 8.108945969884854, "grad_norm": 0.3128695785999298, "learning_rate": 1e-05, "loss": 1.0078, "step": 9155 }, { "epoch": 8.113374667847653, "grad_norm": 0.3797442317008972, "learning_rate": 1e-05, "loss": 1.0006, "step": 9160 }, { "epoch": 8.117803365810452, "grad_norm": 0.35018256306648254, "learning_rate": 1e-05, "loss": 1.0162, "step": 9165 }, { "epoch": 8.12223206377325, "grad_norm": 0.3066740334033966, "learning_rate": 1e-05, "loss": 1.0031, "step": 9170 }, { "epoch": 8.12666076173605, "grad_norm": 0.35448938608169556, "learning_rate": 1e-05, "loss": 1.0447, "step": 9175 }, { "epoch": 8.131089459698849, "grad_norm": 0.27987968921661377, "learning_rate": 1e-05, "loss": 1.0344, "step": 9180 }, { "epoch": 8.135518157661647, "grad_norm": 0.3672999441623688, "learning_rate": 1e-05, "loss": 0.9779, "step": 9185 }, { "epoch": 8.139946855624446, "grad_norm": 0.2840280830860138, "learning_rate": 1e-05, "loss": 1.0145, "step": 9190 }, { "epoch": 8.144375553587246, "grad_norm": 0.3593059182167053, "learning_rate": 1e-05, "loss": 0.9732, "step": 9195 }, { "epoch": 8.148804251550045, "grad_norm": 0.3591805398464203, "learning_rate": 1e-05, "loss": 0.9847, "step": 9200 }, { "epoch": 8.153232949512843, "grad_norm": 0.36199432611465454, "learning_rate": 1e-05, "loss": 1.0571, "step": 9205 }, { "epoch": 8.157661647475642, "grad_norm": 0.31503650546073914, "learning_rate": 1e-05, "loss": 0.9946, "step": 9210 }, { "epoch": 8.162090345438441, "grad_norm": 0.4618072509765625, "learning_rate": 1e-05, "loss": 0.9938, "step": 9215 }, { "epoch": 8.166519043401241, "grad_norm": 0.3053239583969116, "learning_rate": 1e-05, "loss": 1.0075, "step": 9220 }, { "epoch": 8.170947741364039, "grad_norm": 0.3260848820209503, "learning_rate": 1e-05, "loss": 1.0124, "step": 9225 }, { "epoch": 8.175376439326838, "grad_norm": 0.2825438976287842, "learning_rate": 1e-05, "loss": 1.0181, "step": 9230 }, { "epoch": 8.179805137289637, "grad_norm": 0.3291209936141968, "learning_rate": 1e-05, "loss": 0.9975, "step": 9235 }, { "epoch": 8.184233835252435, "grad_norm": 0.39499205350875854, "learning_rate": 1e-05, "loss": 0.9824, "step": 9240 }, { "epoch": 8.188662533215235, "grad_norm": 0.34457486867904663, "learning_rate": 1e-05, "loss": 0.9985, "step": 9245 }, { "epoch": 8.193091231178034, "grad_norm": 0.3637145161628723, "learning_rate": 1e-05, "loss": 1.0091, "step": 9250 }, { "epoch": 8.197519929140833, "grad_norm": 0.34986281394958496, "learning_rate": 1e-05, "loss": 0.9747, "step": 9255 }, { "epoch": 8.201948627103631, "grad_norm": 0.3071766793727875, "learning_rate": 1e-05, "loss": 0.9849, "step": 9260 }, { "epoch": 8.20637732506643, "grad_norm": 0.3395848274230957, "learning_rate": 1e-05, "loss": 0.9695, "step": 9265 }, { "epoch": 8.21080602302923, "grad_norm": 0.30788081884384155, "learning_rate": 1e-05, "loss": 1.0404, "step": 9270 }, { "epoch": 8.215234720992028, "grad_norm": 0.3265116512775421, "learning_rate": 1e-05, "loss": 1.0473, "step": 9275 }, { "epoch": 8.219663418954827, "grad_norm": 0.3096961975097656, "learning_rate": 1e-05, "loss": 1.029, "step": 9280 }, { "epoch": 8.224092116917626, "grad_norm": 0.2829352915287018, "learning_rate": 1e-05, "loss": 0.9885, "step": 9285 }, { "epoch": 8.228520814880426, "grad_norm": 0.35683074593544006, "learning_rate": 1e-05, "loss": 1.0399, "step": 9290 }, { "epoch": 8.232949512843224, "grad_norm": 0.32789289951324463, "learning_rate": 1e-05, "loss": 1.0258, "step": 9295 }, { "epoch": 8.237378210806023, "grad_norm": 0.2792492210865021, "learning_rate": 1e-05, "loss": 0.9742, "step": 9300 }, { "epoch": 8.241806908768822, "grad_norm": 0.26763540506362915, "learning_rate": 1e-05, "loss": 0.9719, "step": 9305 }, { "epoch": 8.24623560673162, "grad_norm": 0.3517155349254608, "learning_rate": 1e-05, "loss": 1.0134, "step": 9310 }, { "epoch": 8.25066430469442, "grad_norm": 0.3247127830982208, "learning_rate": 1e-05, "loss": 0.9979, "step": 9315 }, { "epoch": 8.255093002657219, "grad_norm": 0.3012772798538208, "learning_rate": 1e-05, "loss": 1.0463, "step": 9320 }, { "epoch": 8.259521700620018, "grad_norm": 0.43280452489852905, "learning_rate": 1e-05, "loss": 1.0114, "step": 9325 }, { "epoch": 8.263950398582816, "grad_norm": 0.4521568715572357, "learning_rate": 1e-05, "loss": 1.0111, "step": 9330 }, { "epoch": 8.268379096545615, "grad_norm": 0.2858237624168396, "learning_rate": 1e-05, "loss": 1.0104, "step": 9335 }, { "epoch": 8.272807794508415, "grad_norm": 0.2798160910606384, "learning_rate": 1e-05, "loss": 0.9913, "step": 9340 }, { "epoch": 8.277236492471214, "grad_norm": 0.29831013083457947, "learning_rate": 1e-05, "loss": 1.0276, "step": 9345 }, { "epoch": 8.281665190434012, "grad_norm": 0.2587404251098633, "learning_rate": 1e-05, "loss": 0.9678, "step": 9350 }, { "epoch": 8.286093888396811, "grad_norm": 0.3005838096141815, "learning_rate": 1e-05, "loss": 1.014, "step": 9355 }, { "epoch": 8.29052258635961, "grad_norm": 0.3595026135444641, "learning_rate": 1e-05, "loss": 0.9976, "step": 9360 }, { "epoch": 8.294951284322408, "grad_norm": 0.2960871756076813, "learning_rate": 1e-05, "loss": 1.0517, "step": 9365 }, { "epoch": 8.299379982285208, "grad_norm": 0.41662898659706116, "learning_rate": 1e-05, "loss": 1.0122, "step": 9370 }, { "epoch": 8.303808680248007, "grad_norm": 0.29070064425468445, "learning_rate": 1e-05, "loss": 1.0183, "step": 9375 }, { "epoch": 8.308237378210807, "grad_norm": 0.24964648485183716, "learning_rate": 1e-05, "loss": 1.0211, "step": 9380 }, { "epoch": 8.312666076173604, "grad_norm": 0.3184340298175812, "learning_rate": 1e-05, "loss": 1.0482, "step": 9385 }, { "epoch": 8.317094774136404, "grad_norm": 0.32899683713912964, "learning_rate": 1e-05, "loss": 1.0359, "step": 9390 }, { "epoch": 8.321523472099203, "grad_norm": 0.3268736004829407, "learning_rate": 1e-05, "loss": 1.0012, "step": 9395 }, { "epoch": 8.325952170062001, "grad_norm": 0.3246442377567291, "learning_rate": 1e-05, "loss": 0.9871, "step": 9400 }, { "epoch": 8.3303808680248, "grad_norm": 0.28695979714393616, "learning_rate": 1e-05, "loss": 1.0446, "step": 9405 }, { "epoch": 8.3348095659876, "grad_norm": 0.31747546792030334, "learning_rate": 1e-05, "loss": 1.0422, "step": 9410 }, { "epoch": 8.3392382639504, "grad_norm": 0.2943037152290344, "learning_rate": 1e-05, "loss": 1.0362, "step": 9415 }, { "epoch": 8.343666961913197, "grad_norm": 0.27293357253074646, "learning_rate": 1e-05, "loss": 1.0059, "step": 9420 }, { "epoch": 8.348095659875996, "grad_norm": 0.320142537355423, "learning_rate": 1e-05, "loss": 1.0061, "step": 9425 }, { "epoch": 8.352524357838796, "grad_norm": 0.3383481204509735, "learning_rate": 1e-05, "loss": 1.002, "step": 9430 }, { "epoch": 8.356953055801593, "grad_norm": 0.2932541072368622, "learning_rate": 1e-05, "loss": 1.0018, "step": 9435 }, { "epoch": 8.361381753764393, "grad_norm": 0.3102038502693176, "learning_rate": 1e-05, "loss": 1.0203, "step": 9440 }, { "epoch": 8.365810451727192, "grad_norm": 0.2597452998161316, "learning_rate": 1e-05, "loss": 1.0226, "step": 9445 }, { "epoch": 8.370239149689992, "grad_norm": 0.2563137412071228, "learning_rate": 1e-05, "loss": 0.9587, "step": 9450 }, { "epoch": 8.37466784765279, "grad_norm": 0.3111991584300995, "learning_rate": 1e-05, "loss": 1.0021, "step": 9455 }, { "epoch": 8.379096545615589, "grad_norm": 0.3485407531261444, "learning_rate": 1e-05, "loss": 0.995, "step": 9460 }, { "epoch": 8.383525243578388, "grad_norm": 0.3654576539993286, "learning_rate": 1e-05, "loss": 0.9848, "step": 9465 }, { "epoch": 8.387953941541188, "grad_norm": 0.28132757544517517, "learning_rate": 1e-05, "loss": 1.0276, "step": 9470 }, { "epoch": 8.392382639503985, "grad_norm": 0.34759706258773804, "learning_rate": 1e-05, "loss": 1.0093, "step": 9475 }, { "epoch": 8.396811337466785, "grad_norm": 0.2717755138874054, "learning_rate": 1e-05, "loss": 0.9874, "step": 9480 }, { "epoch": 8.401240035429584, "grad_norm": 0.3453042507171631, "learning_rate": 1e-05, "loss": 0.9994, "step": 9485 }, { "epoch": 8.405668733392382, "grad_norm": 0.3692055642604828, "learning_rate": 1e-05, "loss": 0.9987, "step": 9490 }, { "epoch": 8.410097431355181, "grad_norm": 0.3445853590965271, "learning_rate": 1e-05, "loss": 0.9636, "step": 9495 }, { "epoch": 8.41452612931798, "grad_norm": 0.25680211186408997, "learning_rate": 1e-05, "loss": 1.0668, "step": 9500 }, { "epoch": 8.41895482728078, "grad_norm": 0.31134283542633057, "learning_rate": 1e-05, "loss": 1.0269, "step": 9505 }, { "epoch": 8.423383525243578, "grad_norm": 0.32206547260284424, "learning_rate": 1e-05, "loss": 1.0078, "step": 9510 }, { "epoch": 8.427812223206377, "grad_norm": 0.3236282467842102, "learning_rate": 1e-05, "loss": 1.0338, "step": 9515 }, { "epoch": 8.432240921169177, "grad_norm": 0.3217984139919281, "learning_rate": 1e-05, "loss": 1.0422, "step": 9520 }, { "epoch": 8.436669619131976, "grad_norm": 0.2910170555114746, "learning_rate": 1e-05, "loss": 1.0028, "step": 9525 }, { "epoch": 8.441098317094774, "grad_norm": 0.29590415954589844, "learning_rate": 1e-05, "loss": 0.9618, "step": 9530 }, { "epoch": 8.445527015057573, "grad_norm": 0.37308189272880554, "learning_rate": 1e-05, "loss": 1.0052, "step": 9535 }, { "epoch": 8.449955713020373, "grad_norm": 0.4636489152908325, "learning_rate": 1e-05, "loss": 1.0459, "step": 9540 }, { "epoch": 8.45438441098317, "grad_norm": 0.316521555185318, "learning_rate": 1e-05, "loss": 0.9976, "step": 9545 }, { "epoch": 8.45881310894597, "grad_norm": 0.3285221457481384, "learning_rate": 1e-05, "loss": 1.0048, "step": 9550 }, { "epoch": 8.46324180690877, "grad_norm": 0.34427571296691895, "learning_rate": 1e-05, "loss": 1.0855, "step": 9555 }, { "epoch": 8.467670504871569, "grad_norm": 0.34588485956192017, "learning_rate": 1e-05, "loss": 1.0188, "step": 9560 }, { "epoch": 8.472099202834366, "grad_norm": 0.26303595304489136, "learning_rate": 1e-05, "loss": 0.9991, "step": 9565 }, { "epoch": 8.476527900797166, "grad_norm": 0.27641695737838745, "learning_rate": 1e-05, "loss": 0.9715, "step": 9570 }, { "epoch": 8.480956598759965, "grad_norm": 0.3545122444629669, "learning_rate": 1e-05, "loss": 0.9847, "step": 9575 }, { "epoch": 8.485385296722763, "grad_norm": 0.24362750351428986, "learning_rate": 1e-05, "loss": 1.0584, "step": 9580 }, { "epoch": 8.489813994685562, "grad_norm": 0.36961522698402405, "learning_rate": 1e-05, "loss": 1.0103, "step": 9585 }, { "epoch": 8.494242692648362, "grad_norm": 0.2861690819263458, "learning_rate": 1e-05, "loss": 1.0232, "step": 9590 }, { "epoch": 8.498671390611161, "grad_norm": 0.28869161009788513, "learning_rate": 1e-05, "loss": 0.9707, "step": 9595 }, { "epoch": 8.503100088573959, "grad_norm": 0.3293612003326416, "learning_rate": 1e-05, "loss": 1.0163, "step": 9600 }, { "epoch": 8.507528786536758, "grad_norm": 0.280514121055603, "learning_rate": 1e-05, "loss": 0.9994, "step": 9605 }, { "epoch": 8.511957484499558, "grad_norm": 0.26847410202026367, "learning_rate": 1e-05, "loss": 0.9722, "step": 9610 }, { "epoch": 8.516386182462355, "grad_norm": 0.2584664225578308, "learning_rate": 1e-05, "loss": 1.0042, "step": 9615 }, { "epoch": 8.520814880425155, "grad_norm": 0.24842916429042816, "learning_rate": 1e-05, "loss": 1.003, "step": 9620 }, { "epoch": 8.525243578387954, "grad_norm": 0.45260944962501526, "learning_rate": 1e-05, "loss": 0.9425, "step": 9625 }, { "epoch": 8.529672276350754, "grad_norm": 0.3274429142475128, "learning_rate": 1e-05, "loss": 1.0137, "step": 9630 }, { "epoch": 8.534100974313551, "grad_norm": 0.2761222720146179, "learning_rate": 1e-05, "loss": 1.0058, "step": 9635 }, { "epoch": 8.53852967227635, "grad_norm": 0.29830145835876465, "learning_rate": 1e-05, "loss": 0.9974, "step": 9640 }, { "epoch": 8.54295837023915, "grad_norm": 0.3462134003639221, "learning_rate": 1e-05, "loss": 1.011, "step": 9645 }, { "epoch": 8.54738706820195, "grad_norm": 0.24739807844161987, "learning_rate": 1e-05, "loss": 0.9968, "step": 9650 }, { "epoch": 8.551815766164747, "grad_norm": 0.2875482439994812, "learning_rate": 1e-05, "loss": 0.9799, "step": 9655 }, { "epoch": 8.556244464127547, "grad_norm": 0.27581486105918884, "learning_rate": 1e-05, "loss": 0.9942, "step": 9660 }, { "epoch": 8.560673162090346, "grad_norm": 0.2932671308517456, "learning_rate": 1e-05, "loss": 0.9652, "step": 9665 }, { "epoch": 8.565101860053144, "grad_norm": 0.27838656306266785, "learning_rate": 1e-05, "loss": 1.0598, "step": 9670 }, { "epoch": 8.569530558015943, "grad_norm": 0.2761351764202118, "learning_rate": 1e-05, "loss": 1.0256, "step": 9675 }, { "epoch": 8.573959255978743, "grad_norm": 0.30724138021469116, "learning_rate": 1e-05, "loss": 1.0187, "step": 9680 }, { "epoch": 8.578387953941542, "grad_norm": 0.3189345598220825, "learning_rate": 1e-05, "loss": 0.9688, "step": 9685 }, { "epoch": 8.58281665190434, "grad_norm": 0.3159189224243164, "learning_rate": 1e-05, "loss": 1.0225, "step": 9690 }, { "epoch": 8.587245349867139, "grad_norm": 0.3966004252433777, "learning_rate": 1e-05, "loss": 1.0457, "step": 9695 }, { "epoch": 8.591674047829938, "grad_norm": 0.34525537490844727, "learning_rate": 1e-05, "loss": 1.0122, "step": 9700 }, { "epoch": 8.596102745792736, "grad_norm": 0.363925963640213, "learning_rate": 1e-05, "loss": 0.964, "step": 9705 }, { "epoch": 8.600531443755536, "grad_norm": 0.33375084400177, "learning_rate": 1e-05, "loss": 1.0079, "step": 9710 }, { "epoch": 8.604960141718335, "grad_norm": 0.2654789090156555, "learning_rate": 1e-05, "loss": 0.9727, "step": 9715 }, { "epoch": 8.609388839681134, "grad_norm": 0.299304336309433, "learning_rate": 1e-05, "loss": 1.0422, "step": 9720 }, { "epoch": 8.613817537643932, "grad_norm": 0.35462525486946106, "learning_rate": 1e-05, "loss": 1.0494, "step": 9725 }, { "epoch": 8.618246235606732, "grad_norm": 0.31569480895996094, "learning_rate": 1e-05, "loss": 0.9885, "step": 9730 }, { "epoch": 8.622674933569531, "grad_norm": 0.32788023352622986, "learning_rate": 1e-05, "loss": 0.9951, "step": 9735 }, { "epoch": 8.627103631532329, "grad_norm": 0.27471575140953064, "learning_rate": 1e-05, "loss": 1.0201, "step": 9740 }, { "epoch": 8.631532329495128, "grad_norm": 0.3337244391441345, "learning_rate": 1e-05, "loss": 1.0098, "step": 9745 }, { "epoch": 8.635961027457927, "grad_norm": 0.2886463403701782, "learning_rate": 1e-05, "loss": 1.0086, "step": 9750 }, { "epoch": 8.640389725420727, "grad_norm": 0.2794244885444641, "learning_rate": 1e-05, "loss": 1.0293, "step": 9755 }, { "epoch": 8.644818423383525, "grad_norm": 0.2633725702762604, "learning_rate": 1e-05, "loss": 1.0138, "step": 9760 }, { "epoch": 8.649247121346324, "grad_norm": 0.34546712040901184, "learning_rate": 1e-05, "loss": 1.0547, "step": 9765 }, { "epoch": 8.653675819309123, "grad_norm": 0.3061356842517853, "learning_rate": 1e-05, "loss": 1.0237, "step": 9770 }, { "epoch": 8.658104517271923, "grad_norm": 0.25276973843574524, "learning_rate": 1e-05, "loss": 1.0123, "step": 9775 }, { "epoch": 8.66253321523472, "grad_norm": 0.3296339809894562, "learning_rate": 1e-05, "loss": 1.0244, "step": 9780 }, { "epoch": 8.66696191319752, "grad_norm": 0.4279148578643799, "learning_rate": 1e-05, "loss": 1.0461, "step": 9785 }, { "epoch": 8.67139061116032, "grad_norm": 0.4023708701133728, "learning_rate": 1e-05, "loss": 0.9904, "step": 9790 }, { "epoch": 8.675819309123117, "grad_norm": 0.30620500445365906, "learning_rate": 1e-05, "loss": 1.0208, "step": 9795 }, { "epoch": 8.680248007085916, "grad_norm": 0.28190991282463074, "learning_rate": 1e-05, "loss": 0.9703, "step": 9800 }, { "epoch": 8.684676705048716, "grad_norm": 0.3588258922100067, "learning_rate": 1e-05, "loss": 0.9747, "step": 9805 }, { "epoch": 8.689105403011515, "grad_norm": 0.2799982726573944, "learning_rate": 1e-05, "loss": 1.011, "step": 9810 }, { "epoch": 8.693534100974313, "grad_norm": 0.3799862265586853, "learning_rate": 1e-05, "loss": 0.9549, "step": 9815 }, { "epoch": 8.697962798937112, "grad_norm": 0.3779396712779999, "learning_rate": 1e-05, "loss": 1.0214, "step": 9820 }, { "epoch": 8.702391496899912, "grad_norm": 0.2611728608608246, "learning_rate": 1e-05, "loss": 0.9521, "step": 9825 }, { "epoch": 8.706820194862711, "grad_norm": 0.30146512389183044, "learning_rate": 1e-05, "loss": 1.0539, "step": 9830 }, { "epoch": 8.711248892825509, "grad_norm": 0.3535897433757782, "learning_rate": 1e-05, "loss": 0.9956, "step": 9835 }, { "epoch": 8.715677590788308, "grad_norm": 0.32646411657333374, "learning_rate": 1e-05, "loss": 1.0389, "step": 9840 }, { "epoch": 8.720106288751108, "grad_norm": 0.2533220052719116, "learning_rate": 1e-05, "loss": 1.0082, "step": 9845 }, { "epoch": 8.724534986713905, "grad_norm": 0.29469966888427734, "learning_rate": 1e-05, "loss": 1.0357, "step": 9850 }, { "epoch": 8.728963684676705, "grad_norm": 0.34019947052001953, "learning_rate": 1e-05, "loss": 0.9814, "step": 9855 }, { "epoch": 8.733392382639504, "grad_norm": 0.3179537057876587, "learning_rate": 1e-05, "loss": 1.0134, "step": 9860 }, { "epoch": 8.737821080602302, "grad_norm": 0.270094633102417, "learning_rate": 1e-05, "loss": 1.002, "step": 9865 }, { "epoch": 8.742249778565101, "grad_norm": 0.3085406720638275, "learning_rate": 1e-05, "loss": 1.0119, "step": 9870 }, { "epoch": 8.7466784765279, "grad_norm": 0.35700610280036926, "learning_rate": 1e-05, "loss": 0.9198, "step": 9875 }, { "epoch": 8.7511071744907, "grad_norm": 0.29298990964889526, "learning_rate": 1e-05, "loss": 1.0118, "step": 9880 }, { "epoch": 8.755535872453498, "grad_norm": 0.28406137228012085, "learning_rate": 1e-05, "loss": 1.0337, "step": 9885 }, { "epoch": 8.759964570416297, "grad_norm": 0.3093281686306, "learning_rate": 1e-05, "loss": 0.9931, "step": 9890 }, { "epoch": 8.764393268379097, "grad_norm": 0.3061217963695526, "learning_rate": 1e-05, "loss": 1.0209, "step": 9895 }, { "epoch": 8.768821966341896, "grad_norm": 0.34029045701026917, "learning_rate": 1e-05, "loss": 1.0251, "step": 9900 }, { "epoch": 8.773250664304694, "grad_norm": 0.3023642897605896, "learning_rate": 1e-05, "loss": 0.9824, "step": 9905 }, { "epoch": 8.777679362267493, "grad_norm": 0.2699114680290222, "learning_rate": 1e-05, "loss": 0.9999, "step": 9910 }, { "epoch": 8.782108060230293, "grad_norm": 0.34366509318351746, "learning_rate": 1e-05, "loss": 0.9874, "step": 9915 }, { "epoch": 8.78653675819309, "grad_norm": 0.2885497212409973, "learning_rate": 1e-05, "loss": 0.9702, "step": 9920 }, { "epoch": 8.79096545615589, "grad_norm": 0.3078930675983429, "learning_rate": 1e-05, "loss": 0.9798, "step": 9925 }, { "epoch": 8.79539415411869, "grad_norm": 0.2534559369087219, "learning_rate": 1e-05, "loss": 0.9818, "step": 9930 }, { "epoch": 8.799822852081489, "grad_norm": 0.20130226016044617, "learning_rate": 1e-05, "loss": 1.0258, "step": 9935 }, { "epoch": 8.804251550044286, "grad_norm": 0.3134182095527649, "learning_rate": 1e-05, "loss": 0.9739, "step": 9940 }, { "epoch": 8.808680248007086, "grad_norm": 0.31002235412597656, "learning_rate": 1e-05, "loss": 1.0227, "step": 9945 }, { "epoch": 8.813108945969885, "grad_norm": 0.27708548307418823, "learning_rate": 1e-05, "loss": 1.0169, "step": 9950 }, { "epoch": 8.817537643932685, "grad_norm": 0.30106914043426514, "learning_rate": 1e-05, "loss": 1.0626, "step": 9955 }, { "epoch": 8.821966341895482, "grad_norm": 0.26456499099731445, "learning_rate": 1e-05, "loss": 1.0455, "step": 9960 }, { "epoch": 8.826395039858282, "grad_norm": 0.2438918799161911, "learning_rate": 1e-05, "loss": 0.9629, "step": 9965 }, { "epoch": 8.830823737821081, "grad_norm": 0.26591259241104126, "learning_rate": 1e-05, "loss": 1.0171, "step": 9970 }, { "epoch": 8.835252435783879, "grad_norm": 0.32115495204925537, "learning_rate": 1e-05, "loss": 0.9923, "step": 9975 }, { "epoch": 8.839681133746678, "grad_norm": 0.2797943949699402, "learning_rate": 1e-05, "loss": 1.058, "step": 9980 }, { "epoch": 8.844109831709478, "grad_norm": 0.2658204138278961, "learning_rate": 1e-05, "loss": 1.0326, "step": 9985 }, { "epoch": 8.848538529672277, "grad_norm": 0.32358720898628235, "learning_rate": 1e-05, "loss": 1.0605, "step": 9990 }, { "epoch": 8.852967227635075, "grad_norm": 0.25266775488853455, "learning_rate": 1e-05, "loss": 1.0497, "step": 9995 }, { "epoch": 8.857395925597874, "grad_norm": 0.2671074867248535, "learning_rate": 1e-05, "loss": 0.988, "step": 10000 }, { "epoch": 8.861824623560674, "grad_norm": 0.26102587580680847, "learning_rate": 1e-05, "loss": 1.0306, "step": 10005 }, { "epoch": 8.866253321523471, "grad_norm": 0.26402416825294495, "learning_rate": 1e-05, "loss": 0.9852, "step": 10010 }, { "epoch": 8.87068201948627, "grad_norm": 0.28006434440612793, "learning_rate": 1e-05, "loss": 0.9578, "step": 10015 }, { "epoch": 8.87511071744907, "grad_norm": 0.3361985981464386, "learning_rate": 1e-05, "loss": 1.0118, "step": 10020 }, { "epoch": 8.87953941541187, "grad_norm": 0.26976171135902405, "learning_rate": 1e-05, "loss": 1.006, "step": 10025 }, { "epoch": 8.883968113374667, "grad_norm": 0.36532947421073914, "learning_rate": 1e-05, "loss": 1.009, "step": 10030 }, { "epoch": 8.888396811337467, "grad_norm": 0.3031025528907776, "learning_rate": 1e-05, "loss": 1.0172, "step": 10035 }, { "epoch": 8.892825509300266, "grad_norm": 0.27416276931762695, "learning_rate": 1e-05, "loss": 1.0093, "step": 10040 }, { "epoch": 8.897254207263064, "grad_norm": 0.3802085518836975, "learning_rate": 1e-05, "loss": 0.9393, "step": 10045 }, { "epoch": 8.901682905225863, "grad_norm": 0.34512272477149963, "learning_rate": 1e-05, "loss": 1.0137, "step": 10050 }, { "epoch": 8.906111603188663, "grad_norm": 0.27620893716812134, "learning_rate": 1e-05, "loss": 0.9972, "step": 10055 }, { "epoch": 8.910540301151462, "grad_norm": 0.3376884162425995, "learning_rate": 1e-05, "loss": 0.9846, "step": 10060 }, { "epoch": 8.91496899911426, "grad_norm": 0.2946058213710785, "learning_rate": 1e-05, "loss": 0.9983, "step": 10065 }, { "epoch": 8.91939769707706, "grad_norm": 0.3219498097896576, "learning_rate": 1e-05, "loss": 0.9661, "step": 10070 }, { "epoch": 8.923826395039859, "grad_norm": 0.3931785225868225, "learning_rate": 1e-05, "loss": 0.9771, "step": 10075 }, { "epoch": 8.928255093002658, "grad_norm": 0.29029181599617004, "learning_rate": 1e-05, "loss": 0.9878, "step": 10080 }, { "epoch": 8.932683790965456, "grad_norm": 0.3026534914970398, "learning_rate": 1e-05, "loss": 0.9589, "step": 10085 }, { "epoch": 8.937112488928255, "grad_norm": 0.25995296239852905, "learning_rate": 1e-05, "loss": 0.9875, "step": 10090 }, { "epoch": 8.941541186891055, "grad_norm": 0.2809275686740875, "learning_rate": 1e-05, "loss": 0.9628, "step": 10095 }, { "epoch": 8.945969884853852, "grad_norm": 0.2803073823451996, "learning_rate": 1e-05, "loss": 1.0117, "step": 10100 }, { "epoch": 8.950398582816652, "grad_norm": 0.2728479206562042, "learning_rate": 1e-05, "loss": 1.0069, "step": 10105 }, { "epoch": 8.954827280779451, "grad_norm": 0.3069097101688385, "learning_rate": 1e-05, "loss": 0.9885, "step": 10110 }, { "epoch": 8.95925597874225, "grad_norm": 0.36019957065582275, "learning_rate": 1e-05, "loss": 0.9857, "step": 10115 }, { "epoch": 8.963684676705048, "grad_norm": 0.3736335039138794, "learning_rate": 1e-05, "loss": 1.0473, "step": 10120 }, { "epoch": 8.968113374667848, "grad_norm": 0.3105757534503937, "learning_rate": 1e-05, "loss": 0.9378, "step": 10125 }, { "epoch": 8.972542072630647, "grad_norm": 0.422336608171463, "learning_rate": 1e-05, "loss": 1.1144, "step": 10130 }, { "epoch": 8.976970770593445, "grad_norm": 0.3580183982849121, "learning_rate": 1e-05, "loss": 0.9618, "step": 10135 }, { "epoch": 8.981399468556244, "grad_norm": 0.3884885609149933, "learning_rate": 1e-05, "loss": 1.0335, "step": 10140 }, { "epoch": 8.985828166519044, "grad_norm": 0.35174691677093506, "learning_rate": 1e-05, "loss": 0.9616, "step": 10145 }, { "epoch": 8.990256864481843, "grad_norm": 0.3623945116996765, "learning_rate": 1e-05, "loss": 1.0066, "step": 10150 }, { "epoch": 8.99468556244464, "grad_norm": 0.3279879689216614, "learning_rate": 1e-05, "loss": 0.9947, "step": 10155 }, { "epoch": 8.99911426040744, "grad_norm": 0.33078688383102417, "learning_rate": 1e-05, "loss": 0.9929, "step": 10160 }, { "epoch": 9.00354295837024, "grad_norm": 0.31664299964904785, "learning_rate": 1e-05, "loss": 1.0159, "step": 10165 }, { "epoch": 9.007971656333037, "grad_norm": 0.35084065794944763, "learning_rate": 1e-05, "loss": 1.052, "step": 10170 }, { "epoch": 9.012400354295837, "grad_norm": 0.28195109963417053, "learning_rate": 1e-05, "loss": 1.0429, "step": 10175 }, { "epoch": 9.016829052258636, "grad_norm": 0.3798409700393677, "learning_rate": 1e-05, "loss": 0.9701, "step": 10180 }, { "epoch": 9.021257750221436, "grad_norm": 0.2715992331504822, "learning_rate": 1e-05, "loss": 1.0245, "step": 10185 }, { "epoch": 9.025686448184233, "grad_norm": 0.2833086848258972, "learning_rate": 1e-05, "loss": 1.0119, "step": 10190 }, { "epoch": 9.030115146147033, "grad_norm": 0.4003225266933441, "learning_rate": 1e-05, "loss": 1.0237, "step": 10195 }, { "epoch": 9.034543844109832, "grad_norm": 0.37667742371559143, "learning_rate": 1e-05, "loss": 0.9631, "step": 10200 }, { "epoch": 9.038972542072631, "grad_norm": 0.3836769461631775, "learning_rate": 1e-05, "loss": 0.9634, "step": 10205 }, { "epoch": 9.043401240035429, "grad_norm": 0.36238908767700195, "learning_rate": 1e-05, "loss": 0.9912, "step": 10210 }, { "epoch": 9.047829937998229, "grad_norm": 0.3144984841346741, "learning_rate": 1e-05, "loss": 0.9637, "step": 10215 }, { "epoch": 9.052258635961028, "grad_norm": 0.30484485626220703, "learning_rate": 1e-05, "loss": 0.9605, "step": 10220 }, { "epoch": 9.056687333923826, "grad_norm": 0.3397437334060669, "learning_rate": 1e-05, "loss": 1.022, "step": 10225 }, { "epoch": 9.061116031886625, "grad_norm": 0.28322675824165344, "learning_rate": 1e-05, "loss": 1.0097, "step": 10230 }, { "epoch": 9.065544729849424, "grad_norm": 0.3227088153362274, "learning_rate": 1e-05, "loss": 0.9831, "step": 10235 }, { "epoch": 9.069973427812224, "grad_norm": 0.28987735509872437, "learning_rate": 1e-05, "loss": 0.9912, "step": 10240 }, { "epoch": 9.074402125775022, "grad_norm": 0.33533304929733276, "learning_rate": 1e-05, "loss": 0.9971, "step": 10245 }, { "epoch": 9.078830823737821, "grad_norm": 0.2863968014717102, "learning_rate": 1e-05, "loss": 0.9735, "step": 10250 }, { "epoch": 9.08325952170062, "grad_norm": 0.320400595664978, "learning_rate": 1e-05, "loss": 1.0281, "step": 10255 }, { "epoch": 9.087688219663418, "grad_norm": 0.43020936846733093, "learning_rate": 1e-05, "loss": 1.0243, "step": 10260 }, { "epoch": 9.092116917626218, "grad_norm": 0.350770503282547, "learning_rate": 1e-05, "loss": 1.0572, "step": 10265 }, { "epoch": 9.096545615589017, "grad_norm": 0.31009382009506226, "learning_rate": 1e-05, "loss": 0.9995, "step": 10270 }, { "epoch": 9.100974313551816, "grad_norm": 0.3370135724544525, "learning_rate": 1e-05, "loss": 1.0231, "step": 10275 }, { "epoch": 9.105403011514614, "grad_norm": 0.31504905223846436, "learning_rate": 1e-05, "loss": 0.992, "step": 10280 }, { "epoch": 9.109831709477413, "grad_norm": 0.3323787748813629, "learning_rate": 1e-05, "loss": 1.022, "step": 10285 }, { "epoch": 9.114260407440213, "grad_norm": 0.23321537673473358, "learning_rate": 1e-05, "loss": 1.0055, "step": 10290 }, { "epoch": 9.118689105403012, "grad_norm": 0.29984867572784424, "learning_rate": 1e-05, "loss": 0.9648, "step": 10295 }, { "epoch": 9.12311780336581, "grad_norm": 0.28411856293678284, "learning_rate": 1e-05, "loss": 0.9846, "step": 10300 }, { "epoch": 9.12754650132861, "grad_norm": 0.2836872935295105, "learning_rate": 1e-05, "loss": 1.0034, "step": 10305 }, { "epoch": 9.131975199291409, "grad_norm": 0.2888641953468323, "learning_rate": 1e-05, "loss": 1.0088, "step": 10310 }, { "epoch": 9.136403897254207, "grad_norm": 0.2679941952228546, "learning_rate": 1e-05, "loss": 0.9365, "step": 10315 }, { "epoch": 9.140832595217006, "grad_norm": 0.35586997866630554, "learning_rate": 1e-05, "loss": 1.0633, "step": 10320 }, { "epoch": 9.145261293179805, "grad_norm": 0.2879399359226227, "learning_rate": 1e-05, "loss": 0.9852, "step": 10325 }, { "epoch": 9.149689991142605, "grad_norm": 0.35500773787498474, "learning_rate": 1e-05, "loss": 0.9504, "step": 10330 }, { "epoch": 9.154118689105402, "grad_norm": 0.27988946437835693, "learning_rate": 1e-05, "loss": 0.9226, "step": 10335 }, { "epoch": 9.158547387068202, "grad_norm": 0.33403074741363525, "learning_rate": 1e-05, "loss": 0.981, "step": 10340 }, { "epoch": 9.162976085031001, "grad_norm": 0.3000040650367737, "learning_rate": 1e-05, "loss": 0.9984, "step": 10345 }, { "epoch": 9.167404782993799, "grad_norm": 0.35046878457069397, "learning_rate": 1e-05, "loss": 1.0125, "step": 10350 }, { "epoch": 9.171833480956598, "grad_norm": 0.35079461336135864, "learning_rate": 1e-05, "loss": 1.0178, "step": 10355 }, { "epoch": 9.176262178919398, "grad_norm": 0.3019295036792755, "learning_rate": 1e-05, "loss": 0.9984, "step": 10360 }, { "epoch": 9.180690876882197, "grad_norm": 0.3077620565891266, "learning_rate": 1e-05, "loss": 1.0257, "step": 10365 }, { "epoch": 9.185119574844995, "grad_norm": 0.2527737319469452, "learning_rate": 1e-05, "loss": 0.9764, "step": 10370 }, { "epoch": 9.189548272807794, "grad_norm": 0.32669419050216675, "learning_rate": 1e-05, "loss": 0.966, "step": 10375 }, { "epoch": 9.193976970770594, "grad_norm": 0.31431785225868225, "learning_rate": 1e-05, "loss": 1.0353, "step": 10380 }, { "epoch": 9.198405668733393, "grad_norm": 0.30633842945098877, "learning_rate": 1e-05, "loss": 0.9954, "step": 10385 }, { "epoch": 9.202834366696191, "grad_norm": 0.34725508093833923, "learning_rate": 1e-05, "loss": 1.0085, "step": 10390 }, { "epoch": 9.20726306465899, "grad_norm": 0.26888903975486755, "learning_rate": 1e-05, "loss": 1.0028, "step": 10395 }, { "epoch": 9.21169176262179, "grad_norm": 0.34096434712409973, "learning_rate": 1e-05, "loss": 1.0123, "step": 10400 }, { "epoch": 9.216120460584587, "grad_norm": 0.3352886736392975, "learning_rate": 1e-05, "loss": 0.9587, "step": 10405 }, { "epoch": 9.220549158547387, "grad_norm": 0.35172680020332336, "learning_rate": 1e-05, "loss": 0.9917, "step": 10410 }, { "epoch": 9.224977856510186, "grad_norm": 0.24038206040859222, "learning_rate": 1e-05, "loss": 1.034, "step": 10415 }, { "epoch": 9.229406554472986, "grad_norm": 0.32749372720718384, "learning_rate": 1e-05, "loss": 1.0401, "step": 10420 }, { "epoch": 9.233835252435783, "grad_norm": 0.3877907395362854, "learning_rate": 1e-05, "loss": 0.9865, "step": 10425 }, { "epoch": 9.238263950398583, "grad_norm": 0.29924723505973816, "learning_rate": 1e-05, "loss": 1.0018, "step": 10430 }, { "epoch": 9.242692648361382, "grad_norm": 0.3780488073825836, "learning_rate": 1e-05, "loss": 0.9628, "step": 10435 }, { "epoch": 9.24712134632418, "grad_norm": 0.3642600476741791, "learning_rate": 1e-05, "loss": 0.9071, "step": 10440 }, { "epoch": 9.25155004428698, "grad_norm": 0.30567747354507446, "learning_rate": 1e-05, "loss": 1.063, "step": 10445 }, { "epoch": 9.255978742249779, "grad_norm": 0.2744263708591461, "learning_rate": 1e-05, "loss": 1.0059, "step": 10450 }, { "epoch": 9.260407440212578, "grad_norm": 0.3133305013179779, "learning_rate": 1e-05, "loss": 1.0339, "step": 10455 }, { "epoch": 9.264836138175376, "grad_norm": 0.30763304233551025, "learning_rate": 1e-05, "loss": 0.9958, "step": 10460 }, { "epoch": 9.269264836138175, "grad_norm": 0.3267073333263397, "learning_rate": 1e-05, "loss": 0.982, "step": 10465 }, { "epoch": 9.273693534100975, "grad_norm": 0.3251609206199646, "learning_rate": 1e-05, "loss": 1.0058, "step": 10470 }, { "epoch": 9.278122232063772, "grad_norm": 0.29866859316825867, "learning_rate": 1e-05, "loss": 1.0516, "step": 10475 }, { "epoch": 9.282550930026572, "grad_norm": 0.4649989902973175, "learning_rate": 1e-05, "loss": 0.9627, "step": 10480 }, { "epoch": 9.286979627989371, "grad_norm": 0.27347126603126526, "learning_rate": 1e-05, "loss": 0.9984, "step": 10485 }, { "epoch": 9.29140832595217, "grad_norm": 0.2536153793334961, "learning_rate": 1e-05, "loss": 1.0164, "step": 10490 }, { "epoch": 9.295837023914968, "grad_norm": 0.23823118209838867, "learning_rate": 1e-05, "loss": 0.9748, "step": 10495 }, { "epoch": 9.300265721877768, "grad_norm": 0.31557103991508484, "learning_rate": 1e-05, "loss": 1.0212, "step": 10500 }, { "epoch": 9.304694419840567, "grad_norm": 0.2430633157491684, "learning_rate": 1e-05, "loss": 1.0457, "step": 10505 }, { "epoch": 9.309123117803367, "grad_norm": 0.23746606707572937, "learning_rate": 1e-05, "loss": 1.009, "step": 10510 }, { "epoch": 9.313551815766164, "grad_norm": 0.26026999950408936, "learning_rate": 1e-05, "loss": 1.0742, "step": 10515 }, { "epoch": 9.317980513728964, "grad_norm": 0.23958712816238403, "learning_rate": 1e-05, "loss": 0.9672, "step": 10520 }, { "epoch": 9.322409211691763, "grad_norm": 0.25504070520401, "learning_rate": 1e-05, "loss": 1.0252, "step": 10525 }, { "epoch": 9.32683790965456, "grad_norm": 0.40648138523101807, "learning_rate": 1e-05, "loss": 0.9703, "step": 10530 }, { "epoch": 9.33126660761736, "grad_norm": 0.2843705415725708, "learning_rate": 1e-05, "loss": 0.9861, "step": 10535 }, { "epoch": 9.33569530558016, "grad_norm": 0.3267786502838135, "learning_rate": 1e-05, "loss": 1.0063, "step": 10540 }, { "epoch": 9.34012400354296, "grad_norm": 0.28439798951148987, "learning_rate": 1e-05, "loss": 1.0967, "step": 10545 }, { "epoch": 9.344552701505757, "grad_norm": 0.3294345736503601, "learning_rate": 1e-05, "loss": 1.0258, "step": 10550 }, { "epoch": 9.348981399468556, "grad_norm": 0.3477773070335388, "learning_rate": 1e-05, "loss": 1.0435, "step": 10555 }, { "epoch": 9.353410097431356, "grad_norm": 0.40877801179885864, "learning_rate": 1e-05, "loss": 0.993, "step": 10560 }, { "epoch": 9.357838795394153, "grad_norm": 0.3087904751300812, "learning_rate": 1e-05, "loss": 1.0046, "step": 10565 }, { "epoch": 9.362267493356953, "grad_norm": 0.28086230158805847, "learning_rate": 1e-05, "loss": 1.0215, "step": 10570 }, { "epoch": 9.366696191319752, "grad_norm": 0.23268984258174896, "learning_rate": 1e-05, "loss": 1.0567, "step": 10575 }, { "epoch": 9.371124889282552, "grad_norm": 0.2668893337249756, "learning_rate": 1e-05, "loss": 1.0388, "step": 10580 }, { "epoch": 9.37555358724535, "grad_norm": 0.23762261867523193, "learning_rate": 1e-05, "loss": 1.0222, "step": 10585 }, { "epoch": 9.379982285208149, "grad_norm": 0.31048086285591125, "learning_rate": 1e-05, "loss": 1.0092, "step": 10590 }, { "epoch": 9.384410983170948, "grad_norm": 0.2556580603122711, "learning_rate": 1e-05, "loss": 1.0041, "step": 10595 }, { "epoch": 9.388839681133746, "grad_norm": 0.35179075598716736, "learning_rate": 1e-05, "loss": 1.0039, "step": 10600 }, { "epoch": 9.393268379096545, "grad_norm": 0.3239005208015442, "learning_rate": 1e-05, "loss": 1.0904, "step": 10605 }, { "epoch": 9.397697077059345, "grad_norm": 0.3315274119377136, "learning_rate": 1e-05, "loss": 0.9746, "step": 10610 }, { "epoch": 9.402125775022144, "grad_norm": 0.3327401578426361, "learning_rate": 1e-05, "loss": 0.9865, "step": 10615 }, { "epoch": 9.406554472984942, "grad_norm": 0.39855074882507324, "learning_rate": 1e-05, "loss": 0.964, "step": 10620 }, { "epoch": 9.410983170947741, "grad_norm": 0.386879563331604, "learning_rate": 1e-05, "loss": 0.981, "step": 10625 }, { "epoch": 9.41541186891054, "grad_norm": 0.3346370756626129, "learning_rate": 1e-05, "loss": 1.0192, "step": 10630 }, { "epoch": 9.41984056687334, "grad_norm": 0.37306782603263855, "learning_rate": 1e-05, "loss": 0.9706, "step": 10635 }, { "epoch": 9.424269264836138, "grad_norm": 0.29044109582901, "learning_rate": 1e-05, "loss": 0.9315, "step": 10640 }, { "epoch": 9.428697962798937, "grad_norm": 0.3142159879207611, "learning_rate": 1e-05, "loss": 1.0227, "step": 10645 }, { "epoch": 9.433126660761737, "grad_norm": 0.32744020223617554, "learning_rate": 1e-05, "loss": 1.0255, "step": 10650 }, { "epoch": 9.437555358724534, "grad_norm": 0.2751801013946533, "learning_rate": 1e-05, "loss": 1.0497, "step": 10655 }, { "epoch": 9.441984056687334, "grad_norm": 0.21770940721035004, "learning_rate": 1e-05, "loss": 1.0506, "step": 10660 }, { "epoch": 9.446412754650133, "grad_norm": 0.261288046836853, "learning_rate": 1e-05, "loss": 0.9607, "step": 10665 }, { "epoch": 9.450841452612933, "grad_norm": 0.3091477155685425, "learning_rate": 1e-05, "loss": 1.0214, "step": 10670 }, { "epoch": 9.45527015057573, "grad_norm": 0.3383648991584778, "learning_rate": 1e-05, "loss": 1.0377, "step": 10675 }, { "epoch": 9.45969884853853, "grad_norm": 0.30078986287117004, "learning_rate": 1e-05, "loss": 1.0248, "step": 10680 }, { "epoch": 9.464127546501329, "grad_norm": 0.3739725947380066, "learning_rate": 1e-05, "loss": 1.032, "step": 10685 }, { "epoch": 9.468556244464127, "grad_norm": 0.3867415487766266, "learning_rate": 1e-05, "loss": 1.041, "step": 10690 }, { "epoch": 9.472984942426926, "grad_norm": 0.28349366784095764, "learning_rate": 1e-05, "loss": 0.987, "step": 10695 }, { "epoch": 9.477413640389726, "grad_norm": 0.3332308828830719, "learning_rate": 1e-05, "loss": 0.9907, "step": 10700 }, { "epoch": 9.481842338352525, "grad_norm": 0.304450124502182, "learning_rate": 1e-05, "loss": 1.0218, "step": 10705 }, { "epoch": 9.486271036315323, "grad_norm": 0.3437693119049072, "learning_rate": 1e-05, "loss": 1.0044, "step": 10710 }, { "epoch": 9.490699734278122, "grad_norm": 0.2844833731651306, "learning_rate": 1e-05, "loss": 0.9974, "step": 10715 }, { "epoch": 9.495128432240922, "grad_norm": 0.30860450863838196, "learning_rate": 1e-05, "loss": 0.9737, "step": 10720 }, { "epoch": 9.499557130203721, "grad_norm": 0.23085078597068787, "learning_rate": 1e-05, "loss": 1.0431, "step": 10725 }, { "epoch": 9.503985828166519, "grad_norm": 0.3047216832637787, "learning_rate": 1e-05, "loss": 1.0284, "step": 10730 }, { "epoch": 9.508414526129318, "grad_norm": 0.2583774924278259, "learning_rate": 1e-05, "loss": 0.9987, "step": 10735 }, { "epoch": 9.512843224092117, "grad_norm": 0.18981026113033295, "learning_rate": 1e-05, "loss": 0.997, "step": 10740 }, { "epoch": 9.517271922054915, "grad_norm": 0.2867439091205597, "learning_rate": 1e-05, "loss": 0.9586, "step": 10745 }, { "epoch": 9.521700620017715, "grad_norm": 0.32015806436538696, "learning_rate": 1e-05, "loss": 1.0314, "step": 10750 }, { "epoch": 9.526129317980514, "grad_norm": 0.31143102049827576, "learning_rate": 1e-05, "loss": 1.0306, "step": 10755 }, { "epoch": 9.530558015943313, "grad_norm": 0.32476815581321716, "learning_rate": 1e-05, "loss": 0.9525, "step": 10760 }, { "epoch": 9.534986713906111, "grad_norm": 0.34490182995796204, "learning_rate": 1e-05, "loss": 1.0005, "step": 10765 }, { "epoch": 9.53941541186891, "grad_norm": 0.26716142892837524, "learning_rate": 1e-05, "loss": 0.9746, "step": 10770 }, { "epoch": 9.54384410983171, "grad_norm": 0.2967308461666107, "learning_rate": 1e-05, "loss": 0.9568, "step": 10775 }, { "epoch": 9.548272807794508, "grad_norm": 0.28547367453575134, "learning_rate": 1e-05, "loss": 1.0229, "step": 10780 }, { "epoch": 9.552701505757307, "grad_norm": 0.29418671131134033, "learning_rate": 1e-05, "loss": 1.0324, "step": 10785 }, { "epoch": 9.557130203720106, "grad_norm": 0.30048003792762756, "learning_rate": 1e-05, "loss": 1.0245, "step": 10790 }, { "epoch": 9.561558901682906, "grad_norm": 0.28666961193084717, "learning_rate": 1e-05, "loss": 1.0528, "step": 10795 }, { "epoch": 9.565987599645704, "grad_norm": 0.3216293454170227, "learning_rate": 1e-05, "loss": 1.0026, "step": 10800 }, { "epoch": 9.570416297608503, "grad_norm": 0.2466462254524231, "learning_rate": 1e-05, "loss": 1.0177, "step": 10805 }, { "epoch": 9.574844995571302, "grad_norm": 0.24364490807056427, "learning_rate": 1e-05, "loss": 1.0021, "step": 10810 }, { "epoch": 9.579273693534102, "grad_norm": 0.2657265067100525, "learning_rate": 1e-05, "loss": 1.0224, "step": 10815 }, { "epoch": 9.5837023914969, "grad_norm": 0.28962242603302, "learning_rate": 1e-05, "loss": 1.0327, "step": 10820 }, { "epoch": 9.588131089459699, "grad_norm": 0.34054914116859436, "learning_rate": 1e-05, "loss": 1.011, "step": 10825 }, { "epoch": 9.592559787422498, "grad_norm": 0.290404736995697, "learning_rate": 1e-05, "loss": 1.0206, "step": 10830 }, { "epoch": 9.596988485385296, "grad_norm": 0.34479761123657227, "learning_rate": 1e-05, "loss": 0.9877, "step": 10835 }, { "epoch": 9.601417183348095, "grad_norm": 0.3987703323364258, "learning_rate": 1e-05, "loss": 1.069, "step": 10840 }, { "epoch": 9.605845881310895, "grad_norm": 0.2594773471355438, "learning_rate": 1e-05, "loss": 0.9835, "step": 10845 }, { "epoch": 9.610274579273694, "grad_norm": 0.30426791310310364, "learning_rate": 1e-05, "loss": 1.0138, "step": 10850 }, { "epoch": 9.614703277236492, "grad_norm": 0.31898564100265503, "learning_rate": 1e-05, "loss": 1.0365, "step": 10855 }, { "epoch": 9.619131975199291, "grad_norm": 0.35877910256385803, "learning_rate": 1e-05, "loss": 0.9947, "step": 10860 }, { "epoch": 9.62356067316209, "grad_norm": 0.2700461149215698, "learning_rate": 1e-05, "loss": 0.97, "step": 10865 }, { "epoch": 9.627989371124889, "grad_norm": 0.36071059107780457, "learning_rate": 1e-05, "loss": 1.0061, "step": 10870 }, { "epoch": 9.632418069087688, "grad_norm": 0.25075316429138184, "learning_rate": 1e-05, "loss": 1.0199, "step": 10875 }, { "epoch": 9.636846767050487, "grad_norm": 0.2619266211986542, "learning_rate": 1e-05, "loss": 1.0147, "step": 10880 }, { "epoch": 9.641275465013287, "grad_norm": 0.23985588550567627, "learning_rate": 1e-05, "loss": 1.0571, "step": 10885 }, { "epoch": 9.645704162976084, "grad_norm": 0.3363223671913147, "learning_rate": 1e-05, "loss": 1.0073, "step": 10890 }, { "epoch": 9.650132860938884, "grad_norm": 0.3055192828178406, "learning_rate": 1e-05, "loss": 1.0496, "step": 10895 }, { "epoch": 9.654561558901683, "grad_norm": 0.3000132739543915, "learning_rate": 1e-05, "loss": 0.9977, "step": 10900 }, { "epoch": 9.658990256864481, "grad_norm": 0.282427579164505, "learning_rate": 1e-05, "loss": 0.9955, "step": 10905 }, { "epoch": 9.66341895482728, "grad_norm": 0.3415239453315735, "learning_rate": 1e-05, "loss": 1.0121, "step": 10910 }, { "epoch": 9.66784765279008, "grad_norm": 0.2961592972278595, "learning_rate": 1e-05, "loss": 1.0306, "step": 10915 }, { "epoch": 9.67227635075288, "grad_norm": 0.2643917202949524, "learning_rate": 1e-05, "loss": 1.0346, "step": 10920 }, { "epoch": 9.676705048715677, "grad_norm": 0.41411229968070984, "learning_rate": 1e-05, "loss": 1.0229, "step": 10925 }, { "epoch": 9.681133746678476, "grad_norm": 0.33263343572616577, "learning_rate": 1e-05, "loss": 1.0492, "step": 10930 }, { "epoch": 9.685562444641276, "grad_norm": 0.26576805114746094, "learning_rate": 1e-05, "loss": 1.0486, "step": 10935 }, { "epoch": 9.689991142604075, "grad_norm": 0.3351614773273468, "learning_rate": 1e-05, "loss": 1.0281, "step": 10940 }, { "epoch": 9.694419840566873, "grad_norm": 0.296962171792984, "learning_rate": 1e-05, "loss": 1.0511, "step": 10945 }, { "epoch": 9.698848538529672, "grad_norm": 0.247733473777771, "learning_rate": 1e-05, "loss": 0.982, "step": 10950 }, { "epoch": 9.703277236492472, "grad_norm": 0.23276546597480774, "learning_rate": 1e-05, "loss": 1.0407, "step": 10955 }, { "epoch": 9.70770593445527, "grad_norm": 0.2448033094406128, "learning_rate": 1e-05, "loss": 1.0233, "step": 10960 }, { "epoch": 9.712134632418069, "grad_norm": 0.31032833456993103, "learning_rate": 1e-05, "loss": 0.9511, "step": 10965 }, { "epoch": 9.716563330380868, "grad_norm": 0.2827463746070862, "learning_rate": 1e-05, "loss": 1.0408, "step": 10970 }, { "epoch": 9.720992028343668, "grad_norm": 0.2876776456832886, "learning_rate": 1e-05, "loss": 1.0078, "step": 10975 }, { "epoch": 9.725420726306465, "grad_norm": 0.28047484159469604, "learning_rate": 1e-05, "loss": 1.011, "step": 10980 }, { "epoch": 9.729849424269265, "grad_norm": 0.31798845529556274, "learning_rate": 1e-05, "loss": 1.0118, "step": 10985 }, { "epoch": 9.734278122232064, "grad_norm": 0.2628239393234253, "learning_rate": 1e-05, "loss": 0.942, "step": 10990 }, { "epoch": 9.738706820194862, "grad_norm": 0.3181191384792328, "learning_rate": 1e-05, "loss": 0.9899, "step": 10995 }, { "epoch": 9.743135518157661, "grad_norm": 0.26345333456993103, "learning_rate": 1e-05, "loss": 1.0044, "step": 11000 }, { "epoch": 9.74756421612046, "grad_norm": 0.30158525705337524, "learning_rate": 1e-05, "loss": 0.9904, "step": 11005 }, { "epoch": 9.75199291408326, "grad_norm": 0.34285974502563477, "learning_rate": 1e-05, "loss": 1.0368, "step": 11010 }, { "epoch": 9.756421612046058, "grad_norm": 0.2689751386642456, "learning_rate": 1e-05, "loss": 1.0067, "step": 11015 }, { "epoch": 9.760850310008857, "grad_norm": 0.2519364356994629, "learning_rate": 1e-05, "loss": 1.0259, "step": 11020 }, { "epoch": 9.765279007971657, "grad_norm": 0.2979722321033478, "learning_rate": 1e-05, "loss": 0.9825, "step": 11025 }, { "epoch": 9.769707705934454, "grad_norm": 0.316421240568161, "learning_rate": 1e-05, "loss": 1.0198, "step": 11030 }, { "epoch": 9.774136403897254, "grad_norm": 0.31883957982063293, "learning_rate": 1e-05, "loss": 1.0124, "step": 11035 }, { "epoch": 9.778565101860053, "grad_norm": 0.29930931329727173, "learning_rate": 1e-05, "loss": 0.9788, "step": 11040 }, { "epoch": 9.782993799822853, "grad_norm": 0.31545814871788025, "learning_rate": 1e-05, "loss": 1.0091, "step": 11045 }, { "epoch": 9.78742249778565, "grad_norm": 0.31619441509246826, "learning_rate": 1e-05, "loss": 1.0063, "step": 11050 }, { "epoch": 9.79185119574845, "grad_norm": 0.3094756305217743, "learning_rate": 1e-05, "loss": 1.0282, "step": 11055 }, { "epoch": 9.79627989371125, "grad_norm": 0.31359949707984924, "learning_rate": 1e-05, "loss": 1.0584, "step": 11060 }, { "epoch": 9.800708591674049, "grad_norm": 0.3016626238822937, "learning_rate": 1e-05, "loss": 1.0002, "step": 11065 }, { "epoch": 9.805137289636846, "grad_norm": 0.3067277669906616, "learning_rate": 1e-05, "loss": 0.9824, "step": 11070 }, { "epoch": 9.809565987599646, "grad_norm": 0.3074761927127838, "learning_rate": 1e-05, "loss": 1.0229, "step": 11075 }, { "epoch": 9.813994685562445, "grad_norm": 0.23297877609729767, "learning_rate": 1e-05, "loss": 1.0027, "step": 11080 }, { "epoch": 9.818423383525243, "grad_norm": 0.26705634593963623, "learning_rate": 1e-05, "loss": 1.0453, "step": 11085 }, { "epoch": 9.822852081488042, "grad_norm": 0.45414841175079346, "learning_rate": 1e-05, "loss": 1.0549, "step": 11090 }, { "epoch": 9.827280779450842, "grad_norm": 0.26370587944984436, "learning_rate": 1e-05, "loss": 1.0193, "step": 11095 }, { "epoch": 9.831709477413641, "grad_norm": 0.33747348189353943, "learning_rate": 1e-05, "loss": 1.0473, "step": 11100 }, { "epoch": 9.836138175376439, "grad_norm": 0.2905241549015045, "learning_rate": 1e-05, "loss": 1.0047, "step": 11105 }, { "epoch": 9.840566873339238, "grad_norm": 0.2959686517715454, "learning_rate": 1e-05, "loss": 0.9582, "step": 11110 }, { "epoch": 9.844995571302038, "grad_norm": 0.24476315081119537, "learning_rate": 1e-05, "loss": 1.0106, "step": 11115 }, { "epoch": 9.849424269264837, "grad_norm": 0.2643963396549225, "learning_rate": 1e-05, "loss": 1.0019, "step": 11120 }, { "epoch": 9.853852967227635, "grad_norm": 0.2926819622516632, "learning_rate": 1e-05, "loss": 0.9601, "step": 11125 }, { "epoch": 9.858281665190434, "grad_norm": 0.2401208132505417, "learning_rate": 1e-05, "loss": 1.0082, "step": 11130 }, { "epoch": 9.862710363153234, "grad_norm": 0.3018626272678375, "learning_rate": 1e-05, "loss": 1.0404, "step": 11135 }, { "epoch": 9.867139061116031, "grad_norm": 0.298589289188385, "learning_rate": 1e-05, "loss": 0.9861, "step": 11140 }, { "epoch": 9.87156775907883, "grad_norm": 0.30023714900016785, "learning_rate": 1e-05, "loss": 0.9972, "step": 11145 }, { "epoch": 9.87599645704163, "grad_norm": 0.2657444477081299, "learning_rate": 1e-05, "loss": 0.9862, "step": 11150 }, { "epoch": 9.88042515500443, "grad_norm": 0.29500603675842285, "learning_rate": 1e-05, "loss": 0.9714, "step": 11155 }, { "epoch": 9.884853852967227, "grad_norm": 0.29076966643333435, "learning_rate": 1e-05, "loss": 1.0063, "step": 11160 }, { "epoch": 9.889282550930027, "grad_norm": 0.3475627899169922, "learning_rate": 1e-05, "loss": 0.9348, "step": 11165 }, { "epoch": 9.893711248892826, "grad_norm": 0.26014071702957153, "learning_rate": 1e-05, "loss": 1.0137, "step": 11170 }, { "epoch": 9.898139946855624, "grad_norm": 0.2554040253162384, "learning_rate": 1e-05, "loss": 1.0628, "step": 11175 }, { "epoch": 9.902568644818423, "grad_norm": 0.25429725646972656, "learning_rate": 1e-05, "loss": 0.948, "step": 11180 }, { "epoch": 9.906997342781223, "grad_norm": 0.3491935729980469, "learning_rate": 1e-05, "loss": 1.0082, "step": 11185 }, { "epoch": 9.911426040744022, "grad_norm": 0.40521156787872314, "learning_rate": 1e-05, "loss": 1.0203, "step": 11190 }, { "epoch": 9.91585473870682, "grad_norm": 0.37809327244758606, "learning_rate": 1e-05, "loss": 0.998, "step": 11195 }, { "epoch": 9.920283436669619, "grad_norm": 0.32339635491371155, "learning_rate": 1e-05, "loss": 1.0253, "step": 11200 }, { "epoch": 9.924712134632419, "grad_norm": 0.3003422021865845, "learning_rate": 1e-05, "loss": 1.038, "step": 11205 }, { "epoch": 9.929140832595216, "grad_norm": 0.23051877319812775, "learning_rate": 1e-05, "loss": 1.0041, "step": 11210 }, { "epoch": 9.933569530558016, "grad_norm": 0.30091825127601624, "learning_rate": 1e-05, "loss": 1.0168, "step": 11215 }, { "epoch": 9.937998228520815, "grad_norm": 0.3318042755126953, "learning_rate": 1e-05, "loss": 1.0056, "step": 11220 }, { "epoch": 9.942426926483614, "grad_norm": 0.24081188440322876, "learning_rate": 1e-05, "loss": 0.9717, "step": 11225 }, { "epoch": 9.946855624446412, "grad_norm": 0.26931074261665344, "learning_rate": 1e-05, "loss": 1.0551, "step": 11230 }, { "epoch": 9.951284322409212, "grad_norm": 0.2333841174840927, "learning_rate": 1e-05, "loss": 1.0119, "step": 11235 }, { "epoch": 9.955713020372011, "grad_norm": 0.29254716634750366, "learning_rate": 1e-05, "loss": 1.0125, "step": 11240 }, { "epoch": 9.96014171833481, "grad_norm": 0.2870270609855652, "learning_rate": 1e-05, "loss": 1.0122, "step": 11245 }, { "epoch": 9.964570416297608, "grad_norm": 0.22885331511497498, "learning_rate": 1e-05, "loss": 0.9905, "step": 11250 }, { "epoch": 9.968999114260408, "grad_norm": 0.3018409311771393, "learning_rate": 1e-05, "loss": 0.9629, "step": 11255 }, { "epoch": 9.973427812223207, "grad_norm": 0.2703056335449219, "learning_rate": 1e-05, "loss": 1.0063, "step": 11260 }, { "epoch": 9.977856510186005, "grad_norm": 0.23221834003925323, "learning_rate": 1e-05, "loss": 0.9202, "step": 11265 }, { "epoch": 9.982285208148804, "grad_norm": 0.2505277395248413, "learning_rate": 1e-05, "loss": 0.9671, "step": 11270 }, { "epoch": 9.986713906111603, "grad_norm": 0.32141977548599243, "learning_rate": 1e-05, "loss": 0.9702, "step": 11275 }, { "epoch": 9.991142604074403, "grad_norm": 0.285922110080719, "learning_rate": 1e-05, "loss": 1.0192, "step": 11280 }, { "epoch": 9.9955713020372, "grad_norm": 0.3130255341529846, "learning_rate": 1e-05, "loss": 0.9756, "step": 11285 }, { "epoch": 10.0, "grad_norm": 0.29459288716316223, "learning_rate": 1e-05, "loss": 1.0096, "step": 11290 }, { "epoch": 10.0044286979628, "grad_norm": 0.3114648461341858, "learning_rate": 1e-05, "loss": 0.9961, "step": 11295 }, { "epoch": 10.008857395925597, "grad_norm": 0.34349703788757324, "learning_rate": 1e-05, "loss": 1.0328, "step": 11300 }, { "epoch": 10.013286093888397, "grad_norm": 0.3219708800315857, "learning_rate": 1e-05, "loss": 1.0006, "step": 11305 }, { "epoch": 10.017714791851196, "grad_norm": 0.34660136699676514, "learning_rate": 1e-05, "loss": 1.0064, "step": 11310 }, { "epoch": 10.022143489813995, "grad_norm": 0.3349578380584717, "learning_rate": 1e-05, "loss": 0.9859, "step": 11315 }, { "epoch": 10.026572187776793, "grad_norm": 0.3159612715244293, "learning_rate": 1e-05, "loss": 1.0121, "step": 11320 }, { "epoch": 10.031000885739592, "grad_norm": 0.28448784351348877, "learning_rate": 1e-05, "loss": 0.9483, "step": 11325 }, { "epoch": 10.035429583702392, "grad_norm": 0.2708514928817749, "learning_rate": 1e-05, "loss": 0.9812, "step": 11330 }, { "epoch": 10.03985828166519, "grad_norm": 0.22899767756462097, "learning_rate": 1e-05, "loss": 1.0341, "step": 11335 }, { "epoch": 10.044286979627989, "grad_norm": 0.2686099410057068, "learning_rate": 1e-05, "loss": 0.9375, "step": 11340 }, { "epoch": 10.048715677590788, "grad_norm": 0.3064838647842407, "learning_rate": 1e-05, "loss": 0.9643, "step": 11345 }, { "epoch": 10.053144375553588, "grad_norm": 0.3376131057739258, "learning_rate": 1e-05, "loss": 1.0043, "step": 11350 }, { "epoch": 10.057573073516386, "grad_norm": 0.26854780316352844, "learning_rate": 1e-05, "loss": 1.0448, "step": 11355 }, { "epoch": 10.062001771479185, "grad_norm": 0.27277448773384094, "learning_rate": 1e-05, "loss": 1.0456, "step": 11360 }, { "epoch": 10.066430469441984, "grad_norm": 0.29424813389778137, "learning_rate": 1e-05, "loss": 1.0828, "step": 11365 }, { "epoch": 10.070859167404784, "grad_norm": 0.4012397229671478, "learning_rate": 1e-05, "loss": 1.0095, "step": 11370 }, { "epoch": 10.075287865367581, "grad_norm": 0.34967800974845886, "learning_rate": 1e-05, "loss": 0.9597, "step": 11375 }, { "epoch": 10.079716563330381, "grad_norm": 0.3148069381713867, "learning_rate": 1e-05, "loss": 1.0482, "step": 11380 }, { "epoch": 10.08414526129318, "grad_norm": 0.36094632744789124, "learning_rate": 1e-05, "loss": 1.0106, "step": 11385 }, { "epoch": 10.088573959255978, "grad_norm": 0.31681621074676514, "learning_rate": 1e-05, "loss": 0.965, "step": 11390 }, { "epoch": 10.093002657218777, "grad_norm": 0.264861136674881, "learning_rate": 1e-05, "loss": 1.0488, "step": 11395 }, { "epoch": 10.097431355181577, "grad_norm": 0.3462587594985962, "learning_rate": 1e-05, "loss": 0.9834, "step": 11400 }, { "epoch": 10.101860053144376, "grad_norm": 0.2840697467327118, "learning_rate": 1e-05, "loss": 1.008, "step": 11405 }, { "epoch": 10.106288751107174, "grad_norm": 0.2997097373008728, "learning_rate": 1e-05, "loss": 1.031, "step": 11410 }, { "epoch": 10.110717449069973, "grad_norm": 0.2907783091068268, "learning_rate": 1e-05, "loss": 1.0462, "step": 11415 }, { "epoch": 10.115146147032773, "grad_norm": 0.2680007517337799, "learning_rate": 1e-05, "loss": 1.001, "step": 11420 }, { "epoch": 10.11957484499557, "grad_norm": 0.29785585403442383, "learning_rate": 1e-05, "loss": 0.9853, "step": 11425 }, { "epoch": 10.12400354295837, "grad_norm": 0.2777506709098816, "learning_rate": 1e-05, "loss": 0.9833, "step": 11430 }, { "epoch": 10.12843224092117, "grad_norm": 0.30228719115257263, "learning_rate": 1e-05, "loss": 0.9936, "step": 11435 }, { "epoch": 10.132860938883969, "grad_norm": 0.2686280608177185, "learning_rate": 1e-05, "loss": 1.0511, "step": 11440 }, { "epoch": 10.137289636846766, "grad_norm": 0.29569387435913086, "learning_rate": 1e-05, "loss": 1.0104, "step": 11445 }, { "epoch": 10.141718334809566, "grad_norm": 0.3754440248012543, "learning_rate": 1e-05, "loss": 0.9704, "step": 11450 }, { "epoch": 10.146147032772365, "grad_norm": 0.30589839816093445, "learning_rate": 1e-05, "loss": 0.9581, "step": 11455 }, { "epoch": 10.150575730735165, "grad_norm": 0.2895824611186981, "learning_rate": 1e-05, "loss": 1.0415, "step": 11460 }, { "epoch": 10.155004428697962, "grad_norm": 0.23917438089847565, "learning_rate": 1e-05, "loss": 1.0302, "step": 11465 }, { "epoch": 10.159433126660762, "grad_norm": 0.3071250021457672, "learning_rate": 1e-05, "loss": 0.9583, "step": 11470 }, { "epoch": 10.163861824623561, "grad_norm": 0.25300514698028564, "learning_rate": 1e-05, "loss": 1.0335, "step": 11475 }, { "epoch": 10.168290522586359, "grad_norm": 0.29810330271720886, "learning_rate": 1e-05, "loss": 1.0029, "step": 11480 }, { "epoch": 10.172719220549158, "grad_norm": 0.26134008169174194, "learning_rate": 1e-05, "loss": 0.9866, "step": 11485 }, { "epoch": 10.177147918511958, "grad_norm": 0.28486084938049316, "learning_rate": 1e-05, "loss": 1.0226, "step": 11490 }, { "epoch": 10.181576616474757, "grad_norm": 0.2644582688808441, "learning_rate": 1e-05, "loss": 0.9702, "step": 11495 }, { "epoch": 10.186005314437555, "grad_norm": 0.25823184847831726, "learning_rate": 1e-05, "loss": 1.0306, "step": 11500 }, { "epoch": 10.190434012400354, "grad_norm": 0.2837287485599518, "learning_rate": 1e-05, "loss": 0.9845, "step": 11505 }, { "epoch": 10.194862710363154, "grad_norm": 0.31140053272247314, "learning_rate": 1e-05, "loss": 0.9989, "step": 11510 }, { "epoch": 10.199291408325951, "grad_norm": 0.2545853555202484, "learning_rate": 1e-05, "loss": 0.9783, "step": 11515 }, { "epoch": 10.20372010628875, "grad_norm": 0.26009130477905273, "learning_rate": 1e-05, "loss": 1.0256, "step": 11520 }, { "epoch": 10.20814880425155, "grad_norm": 0.3061967194080353, "learning_rate": 1e-05, "loss": 0.9674, "step": 11525 }, { "epoch": 10.21257750221435, "grad_norm": 0.28326642513275146, "learning_rate": 1e-05, "loss": 0.9845, "step": 11530 }, { "epoch": 10.217006200177147, "grad_norm": 0.27370598912239075, "learning_rate": 1e-05, "loss": 1.0474, "step": 11535 }, { "epoch": 10.221434898139947, "grad_norm": 0.3138013482093811, "learning_rate": 1e-05, "loss": 1.023, "step": 11540 }, { "epoch": 10.225863596102746, "grad_norm": 0.3052850067615509, "learning_rate": 1e-05, "loss": 0.9618, "step": 11545 }, { "epoch": 10.230292294065544, "grad_norm": 0.30269312858581543, "learning_rate": 1e-05, "loss": 0.9602, "step": 11550 }, { "epoch": 10.234720992028343, "grad_norm": 0.3532915711402893, "learning_rate": 1e-05, "loss": 1.0414, "step": 11555 }, { "epoch": 10.239149689991143, "grad_norm": 0.30972394347190857, "learning_rate": 1e-05, "loss": 0.9582, "step": 11560 }, { "epoch": 10.243578387953942, "grad_norm": 0.27440938353538513, "learning_rate": 1e-05, "loss": 0.9856, "step": 11565 }, { "epoch": 10.24800708591674, "grad_norm": 0.28840890526771545, "learning_rate": 1e-05, "loss": 1.0256, "step": 11570 }, { "epoch": 10.25243578387954, "grad_norm": 0.2969173192977905, "learning_rate": 1e-05, "loss": 0.9558, "step": 11575 }, { "epoch": 10.256864481842339, "grad_norm": 0.3268223702907562, "learning_rate": 1e-05, "loss": 1.003, "step": 11580 }, { "epoch": 10.261293179805138, "grad_norm": 0.2664327025413513, "learning_rate": 1e-05, "loss": 1.0215, "step": 11585 }, { "epoch": 10.265721877767936, "grad_norm": 0.28675204515457153, "learning_rate": 1e-05, "loss": 1.002, "step": 11590 }, { "epoch": 10.270150575730735, "grad_norm": 0.34164151549339294, "learning_rate": 1e-05, "loss": 0.9972, "step": 11595 }, { "epoch": 10.274579273693535, "grad_norm": 0.3371868431568146, "learning_rate": 1e-05, "loss": 1.0177, "step": 11600 }, { "epoch": 10.279007971656332, "grad_norm": 0.3159741461277008, "learning_rate": 1e-05, "loss": 0.9768, "step": 11605 }, { "epoch": 10.283436669619132, "grad_norm": 0.2591995298862457, "learning_rate": 1e-05, "loss": 1.0298, "step": 11610 }, { "epoch": 10.287865367581931, "grad_norm": 0.26970839500427246, "learning_rate": 1e-05, "loss": 1.0302, "step": 11615 }, { "epoch": 10.29229406554473, "grad_norm": 0.29870346188545227, "learning_rate": 1e-05, "loss": 0.9685, "step": 11620 }, { "epoch": 10.296722763507528, "grad_norm": 0.30782654881477356, "learning_rate": 1e-05, "loss": 0.9689, "step": 11625 }, { "epoch": 10.301151461470328, "grad_norm": 0.35799652338027954, "learning_rate": 1e-05, "loss": 1.0563, "step": 11630 }, { "epoch": 10.305580159433127, "grad_norm": 0.28341639041900635, "learning_rate": 1e-05, "loss": 0.9909, "step": 11635 }, { "epoch": 10.310008857395925, "grad_norm": 0.31674161553382874, "learning_rate": 1e-05, "loss": 0.9779, "step": 11640 }, { "epoch": 10.314437555358724, "grad_norm": 0.29714614152908325, "learning_rate": 1e-05, "loss": 1.0111, "step": 11645 }, { "epoch": 10.318866253321524, "grad_norm": 0.28980761766433716, "learning_rate": 1e-05, "loss": 0.9897, "step": 11650 }, { "epoch": 10.323294951284323, "grad_norm": 0.25496095418930054, "learning_rate": 1e-05, "loss": 0.9539, "step": 11655 }, { "epoch": 10.32772364924712, "grad_norm": 0.2615630030632019, "learning_rate": 1e-05, "loss": 1.0467, "step": 11660 }, { "epoch": 10.33215234720992, "grad_norm": 0.24543699622154236, "learning_rate": 1e-05, "loss": 1.005, "step": 11665 }, { "epoch": 10.33658104517272, "grad_norm": 0.3174920082092285, "learning_rate": 1e-05, "loss": 1.0152, "step": 11670 }, { "epoch": 10.341009743135519, "grad_norm": 0.26769909262657166, "learning_rate": 1e-05, "loss": 0.9479, "step": 11675 }, { "epoch": 10.345438441098317, "grad_norm": 0.29964545369148254, "learning_rate": 1e-05, "loss": 0.9785, "step": 11680 }, { "epoch": 10.349867139061116, "grad_norm": 0.28694555163383484, "learning_rate": 1e-05, "loss": 0.9608, "step": 11685 }, { "epoch": 10.354295837023916, "grad_norm": 0.303021103143692, "learning_rate": 1e-05, "loss": 0.944, "step": 11690 }, { "epoch": 10.358724534986713, "grad_norm": 0.32018807530403137, "learning_rate": 1e-05, "loss": 0.9604, "step": 11695 }, { "epoch": 10.363153232949513, "grad_norm": 0.252888023853302, "learning_rate": 1e-05, "loss": 0.9835, "step": 11700 }, { "epoch": 10.367581930912312, "grad_norm": 0.2665899991989136, "learning_rate": 1e-05, "loss": 1.0022, "step": 11705 }, { "epoch": 10.372010628875111, "grad_norm": 0.3029266893863678, "learning_rate": 1e-05, "loss": 0.9924, "step": 11710 }, { "epoch": 10.37643932683791, "grad_norm": 0.32538360357284546, "learning_rate": 1e-05, "loss": 0.9505, "step": 11715 }, { "epoch": 10.380868024800709, "grad_norm": 0.3245810568332672, "learning_rate": 1e-05, "loss": 0.9942, "step": 11720 }, { "epoch": 10.385296722763508, "grad_norm": 0.2661267817020416, "learning_rate": 1e-05, "loss": 1.0415, "step": 11725 }, { "epoch": 10.389725420726306, "grad_norm": 0.26853328943252563, "learning_rate": 1e-05, "loss": 1.0365, "step": 11730 }, { "epoch": 10.394154118689105, "grad_norm": 0.27677589654922485, "learning_rate": 1e-05, "loss": 0.9624, "step": 11735 }, { "epoch": 10.398582816651905, "grad_norm": 0.3096354305744171, "learning_rate": 1e-05, "loss": 0.9889, "step": 11740 }, { "epoch": 10.403011514614704, "grad_norm": 0.2818625867366791, "learning_rate": 1e-05, "loss": 0.9972, "step": 11745 }, { "epoch": 10.407440212577502, "grad_norm": 0.3005274534225464, "learning_rate": 1e-05, "loss": 1.0306, "step": 11750 }, { "epoch": 10.411868910540301, "grad_norm": 0.2543414235115051, "learning_rate": 1e-05, "loss": 0.9969, "step": 11755 }, { "epoch": 10.4162976085031, "grad_norm": 0.2781553864479065, "learning_rate": 1e-05, "loss": 0.9961, "step": 11760 }, { "epoch": 10.420726306465898, "grad_norm": 0.3184894919395447, "learning_rate": 1e-05, "loss": 0.9701, "step": 11765 }, { "epoch": 10.425155004428698, "grad_norm": 0.27732446789741516, "learning_rate": 1e-05, "loss": 0.91, "step": 11770 }, { "epoch": 10.429583702391497, "grad_norm": 0.28109610080718994, "learning_rate": 1e-05, "loss": 1.0137, "step": 11775 }, { "epoch": 10.434012400354296, "grad_norm": 0.3391228914260864, "learning_rate": 1e-05, "loss": 1.051, "step": 11780 }, { "epoch": 10.438441098317094, "grad_norm": 0.3156225383281708, "learning_rate": 1e-05, "loss": 0.9617, "step": 11785 }, { "epoch": 10.442869796279894, "grad_norm": 0.3333657681941986, "learning_rate": 1e-05, "loss": 0.9938, "step": 11790 }, { "epoch": 10.447298494242693, "grad_norm": 0.29760661721229553, "learning_rate": 1e-05, "loss": 0.9493, "step": 11795 }, { "epoch": 10.451727192205492, "grad_norm": 0.3054805099964142, "learning_rate": 1e-05, "loss": 1.0229, "step": 11800 }, { "epoch": 10.45615589016829, "grad_norm": 0.31078213453292847, "learning_rate": 1e-05, "loss": 1.0386, "step": 11805 }, { "epoch": 10.46058458813109, "grad_norm": 0.3612339198589325, "learning_rate": 1e-05, "loss": 0.9643, "step": 11810 }, { "epoch": 10.465013286093889, "grad_norm": 0.23194549977779388, "learning_rate": 1e-05, "loss": 0.9859, "step": 11815 }, { "epoch": 10.469441984056687, "grad_norm": 0.32136034965515137, "learning_rate": 1e-05, "loss": 0.9882, "step": 11820 }, { "epoch": 10.473870682019486, "grad_norm": 0.2960103452205658, "learning_rate": 1e-05, "loss": 0.9632, "step": 11825 }, { "epoch": 10.478299379982285, "grad_norm": 0.27054598927497864, "learning_rate": 1e-05, "loss": 1.002, "step": 11830 }, { "epoch": 10.482728077945085, "grad_norm": 0.29483795166015625, "learning_rate": 1e-05, "loss": 1.0041, "step": 11835 }, { "epoch": 10.487156775907883, "grad_norm": 0.37229597568511963, "learning_rate": 1e-05, "loss": 1.024, "step": 11840 }, { "epoch": 10.491585473870682, "grad_norm": 0.28565678000450134, "learning_rate": 1e-05, "loss": 1.0018, "step": 11845 }, { "epoch": 10.496014171833481, "grad_norm": 0.4019598662853241, "learning_rate": 1e-05, "loss": 0.9844, "step": 11850 }, { "epoch": 10.50044286979628, "grad_norm": 0.36128920316696167, "learning_rate": 1e-05, "loss": 0.9985, "step": 11855 }, { "epoch": 10.504871567759078, "grad_norm": 0.3604027032852173, "learning_rate": 1e-05, "loss": 1.065, "step": 11860 }, { "epoch": 10.509300265721878, "grad_norm": 0.34436139464378357, "learning_rate": 1e-05, "loss": 1.0079, "step": 11865 }, { "epoch": 10.513728963684677, "grad_norm": 0.2565373182296753, "learning_rate": 1e-05, "loss": 1.0445, "step": 11870 }, { "epoch": 10.518157661647475, "grad_norm": 0.2698712646961212, "learning_rate": 1e-05, "loss": 1.0166, "step": 11875 }, { "epoch": 10.522586359610274, "grad_norm": 0.32578006386756897, "learning_rate": 1e-05, "loss": 1.0117, "step": 11880 }, { "epoch": 10.527015057573074, "grad_norm": 0.2972579896450043, "learning_rate": 1e-05, "loss": 0.9533, "step": 11885 }, { "epoch": 10.531443755535872, "grad_norm": 0.3600703477859497, "learning_rate": 1e-05, "loss": 0.9979, "step": 11890 }, { "epoch": 10.535872453498671, "grad_norm": 0.2374672293663025, "learning_rate": 1e-05, "loss": 0.9774, "step": 11895 }, { "epoch": 10.54030115146147, "grad_norm": 0.250801146030426, "learning_rate": 1e-05, "loss": 0.9971, "step": 11900 }, { "epoch": 10.54472984942427, "grad_norm": 0.3013153672218323, "learning_rate": 1e-05, "loss": 0.9733, "step": 11905 }, { "epoch": 10.549158547387067, "grad_norm": 0.3373948037624359, "learning_rate": 1e-05, "loss": 1.0122, "step": 11910 }, { "epoch": 10.553587245349867, "grad_norm": 0.30375102162361145, "learning_rate": 1e-05, "loss": 1.0259, "step": 11915 }, { "epoch": 10.558015943312666, "grad_norm": 0.2554892599582672, "learning_rate": 1e-05, "loss": 1.0051, "step": 11920 }, { "epoch": 10.562444641275466, "grad_norm": 0.29160356521606445, "learning_rate": 1e-05, "loss": 1.0085, "step": 11925 }, { "epoch": 10.566873339238263, "grad_norm": 0.27245867252349854, "learning_rate": 1e-05, "loss": 1.0546, "step": 11930 }, { "epoch": 10.571302037201063, "grad_norm": 0.2780960500240326, "learning_rate": 1e-05, "loss": 1.0347, "step": 11935 }, { "epoch": 10.575730735163862, "grad_norm": 0.31066030263900757, "learning_rate": 1e-05, "loss": 1.0016, "step": 11940 }, { "epoch": 10.58015943312666, "grad_norm": 0.2587489187717438, "learning_rate": 1e-05, "loss": 1.0267, "step": 11945 }, { "epoch": 10.58458813108946, "grad_norm": 0.44822588562965393, "learning_rate": 1e-05, "loss": 0.9048, "step": 11950 }, { "epoch": 10.589016829052259, "grad_norm": 0.352801650762558, "learning_rate": 1e-05, "loss": 0.9725, "step": 11955 }, { "epoch": 10.593445527015058, "grad_norm": 0.3258765935897827, "learning_rate": 1e-05, "loss": 1.049, "step": 11960 }, { "epoch": 10.597874224977856, "grad_norm": 0.31580856442451477, "learning_rate": 1e-05, "loss": 1.0204, "step": 11965 }, { "epoch": 10.602302922940655, "grad_norm": 0.2915458381175995, "learning_rate": 1e-05, "loss": 1.0549, "step": 11970 }, { "epoch": 10.606731620903455, "grad_norm": 0.31409668922424316, "learning_rate": 1e-05, "loss": 1.0103, "step": 11975 }, { "epoch": 10.611160318866254, "grad_norm": 0.28547248244285583, "learning_rate": 1e-05, "loss": 1.0299, "step": 11980 }, { "epoch": 10.615589016829052, "grad_norm": 0.28606781363487244, "learning_rate": 1e-05, "loss": 0.9729, "step": 11985 }, { "epoch": 10.620017714791851, "grad_norm": 0.3044525980949402, "learning_rate": 1e-05, "loss": 1.0183, "step": 11990 }, { "epoch": 10.62444641275465, "grad_norm": 0.31772077083587646, "learning_rate": 1e-05, "loss": 1.0424, "step": 11995 }, { "epoch": 10.628875110717448, "grad_norm": 0.24412225186824799, "learning_rate": 1e-05, "loss": 1.0245, "step": 12000 }, { "epoch": 10.633303808680248, "grad_norm": 0.3045209050178528, "learning_rate": 1e-05, "loss": 0.985, "step": 12005 }, { "epoch": 10.637732506643047, "grad_norm": 0.2466321438550949, "learning_rate": 1e-05, "loss": 1.0087, "step": 12010 }, { "epoch": 10.642161204605847, "grad_norm": 0.30908432602882385, "learning_rate": 1e-05, "loss": 1.013, "step": 12015 }, { "epoch": 10.646589902568644, "grad_norm": 0.27930381894111633, "learning_rate": 1e-05, "loss": 1.023, "step": 12020 }, { "epoch": 10.651018600531444, "grad_norm": 0.26829177141189575, "learning_rate": 1e-05, "loss": 0.9891, "step": 12025 }, { "epoch": 10.655447298494243, "grad_norm": 0.28117358684539795, "learning_rate": 1e-05, "loss": 0.9982, "step": 12030 }, { "epoch": 10.65987599645704, "grad_norm": 0.30929335951805115, "learning_rate": 1e-05, "loss": 1.0337, "step": 12035 }, { "epoch": 10.66430469441984, "grad_norm": 0.327304869890213, "learning_rate": 1e-05, "loss": 1.0243, "step": 12040 }, { "epoch": 10.66873339238264, "grad_norm": 0.25552797317504883, "learning_rate": 1e-05, "loss": 0.9571, "step": 12045 }, { "epoch": 10.67316209034544, "grad_norm": 0.3285534083843231, "learning_rate": 1e-05, "loss": 0.9914, "step": 12050 }, { "epoch": 10.677590788308237, "grad_norm": 0.2624794542789459, "learning_rate": 1e-05, "loss": 1.0296, "step": 12055 }, { "epoch": 10.682019486271036, "grad_norm": 0.2602158784866333, "learning_rate": 1e-05, "loss": 1.0089, "step": 12060 }, { "epoch": 10.686448184233836, "grad_norm": 0.2626660466194153, "learning_rate": 1e-05, "loss": 1.0035, "step": 12065 }, { "epoch": 10.690876882196633, "grad_norm": 0.33077991008758545, "learning_rate": 1e-05, "loss": 1.0034, "step": 12070 }, { "epoch": 10.695305580159433, "grad_norm": 0.31237027049064636, "learning_rate": 1e-05, "loss": 0.9974, "step": 12075 }, { "epoch": 10.699734278122232, "grad_norm": 0.25750496983528137, "learning_rate": 1e-05, "loss": 1.0114, "step": 12080 }, { "epoch": 10.704162976085032, "grad_norm": 0.3051992654800415, "learning_rate": 1e-05, "loss": 0.9797, "step": 12085 }, { "epoch": 10.70859167404783, "grad_norm": 0.2875952422618866, "learning_rate": 1e-05, "loss": 0.967, "step": 12090 }, { "epoch": 10.713020372010629, "grad_norm": 0.3118644952774048, "learning_rate": 1e-05, "loss": 0.9906, "step": 12095 }, { "epoch": 10.717449069973428, "grad_norm": 0.19404920935630798, "learning_rate": 1e-05, "loss": 1.0126, "step": 12100 }, { "epoch": 10.721877767936228, "grad_norm": 0.25898414850234985, "learning_rate": 1e-05, "loss": 0.9918, "step": 12105 }, { "epoch": 10.726306465899025, "grad_norm": 0.3232988715171814, "learning_rate": 1e-05, "loss": 1.043, "step": 12110 }, { "epoch": 10.730735163861825, "grad_norm": 0.35013917088508606, "learning_rate": 1e-05, "loss": 1.0174, "step": 12115 }, { "epoch": 10.735163861824624, "grad_norm": 0.3086259663105011, "learning_rate": 1e-05, "loss": 1.0359, "step": 12120 }, { "epoch": 10.739592559787422, "grad_norm": 0.2938600480556488, "learning_rate": 1e-05, "loss": 0.9971, "step": 12125 }, { "epoch": 10.744021257750221, "grad_norm": 0.32055458426475525, "learning_rate": 1e-05, "loss": 1.0428, "step": 12130 }, { "epoch": 10.74844995571302, "grad_norm": 0.3220217823982239, "learning_rate": 1e-05, "loss": 0.9794, "step": 12135 }, { "epoch": 10.75287865367582, "grad_norm": 0.3461529612541199, "learning_rate": 1e-05, "loss": 0.9985, "step": 12140 }, { "epoch": 10.757307351638618, "grad_norm": 0.29279544949531555, "learning_rate": 1e-05, "loss": 1.0688, "step": 12145 }, { "epoch": 10.761736049601417, "grad_norm": 0.2685081660747528, "learning_rate": 1e-05, "loss": 1.029, "step": 12150 }, { "epoch": 10.766164747564217, "grad_norm": 0.26211896538734436, "learning_rate": 1e-05, "loss": 0.9755, "step": 12155 }, { "epoch": 10.770593445527014, "grad_norm": 0.30870041251182556, "learning_rate": 1e-05, "loss": 0.9656, "step": 12160 }, { "epoch": 10.775022143489814, "grad_norm": 0.27631425857543945, "learning_rate": 1e-05, "loss": 1.0581, "step": 12165 }, { "epoch": 10.779450841452613, "grad_norm": 0.2699381113052368, "learning_rate": 1e-05, "loss": 1.0047, "step": 12170 }, { "epoch": 10.783879539415413, "grad_norm": 0.27515122294425964, "learning_rate": 1e-05, "loss": 0.9757, "step": 12175 }, { "epoch": 10.78830823737821, "grad_norm": 0.2865341305732727, "learning_rate": 1e-05, "loss": 0.9994, "step": 12180 }, { "epoch": 10.79273693534101, "grad_norm": 0.4199225902557373, "learning_rate": 1e-05, "loss": 0.9726, "step": 12185 }, { "epoch": 10.797165633303809, "grad_norm": 0.2637953758239746, "learning_rate": 1e-05, "loss": 1.0183, "step": 12190 }, { "epoch": 10.801594331266607, "grad_norm": 0.32217082381248474, "learning_rate": 1e-05, "loss": 0.9703, "step": 12195 }, { "epoch": 10.806023029229406, "grad_norm": 0.3124724328517914, "learning_rate": 1e-05, "loss": 0.9965, "step": 12200 }, { "epoch": 10.810451727192206, "grad_norm": 0.2634015679359436, "learning_rate": 1e-05, "loss": 1.0098, "step": 12205 }, { "epoch": 10.814880425155005, "grad_norm": 0.3478938341140747, "learning_rate": 1e-05, "loss": 0.9959, "step": 12210 }, { "epoch": 10.819309123117803, "grad_norm": 0.25399765372276306, "learning_rate": 1e-05, "loss": 1.0039, "step": 12215 }, { "epoch": 10.823737821080602, "grad_norm": 0.22675226628780365, "learning_rate": 1e-05, "loss": 0.9866, "step": 12220 }, { "epoch": 10.828166519043402, "grad_norm": 0.28305405378341675, "learning_rate": 1e-05, "loss": 1.0386, "step": 12225 }, { "epoch": 10.832595217006201, "grad_norm": 0.3156740069389343, "learning_rate": 1e-05, "loss": 0.9989, "step": 12230 }, { "epoch": 10.837023914968999, "grad_norm": 0.29016852378845215, "learning_rate": 1e-05, "loss": 0.9913, "step": 12235 }, { "epoch": 10.841452612931798, "grad_norm": 0.3123618960380554, "learning_rate": 1e-05, "loss": 1.0325, "step": 12240 }, { "epoch": 10.845881310894598, "grad_norm": 0.3538264334201813, "learning_rate": 1e-05, "loss": 1.0114, "step": 12245 }, { "epoch": 10.850310008857395, "grad_norm": 0.31346404552459717, "learning_rate": 1e-05, "loss": 1.0139, "step": 12250 }, { "epoch": 10.854738706820195, "grad_norm": 0.22529560327529907, "learning_rate": 1e-05, "loss": 0.9705, "step": 12255 }, { "epoch": 10.859167404782994, "grad_norm": 0.3076067268848419, "learning_rate": 1e-05, "loss": 1.0149, "step": 12260 }, { "epoch": 10.863596102745793, "grad_norm": 0.255454421043396, "learning_rate": 1e-05, "loss": 1.0041, "step": 12265 }, { "epoch": 10.868024800708591, "grad_norm": 0.2832607328891754, "learning_rate": 1e-05, "loss": 1.0156, "step": 12270 }, { "epoch": 10.87245349867139, "grad_norm": 0.2525108754634857, "learning_rate": 1e-05, "loss": 1.0244, "step": 12275 }, { "epoch": 10.87688219663419, "grad_norm": 0.2743304669857025, "learning_rate": 1e-05, "loss": 0.9826, "step": 12280 }, { "epoch": 10.88131089459699, "grad_norm": 0.26145845651626587, "learning_rate": 1e-05, "loss": 1.0282, "step": 12285 }, { "epoch": 10.885739592559787, "grad_norm": 0.2718985676765442, "learning_rate": 1e-05, "loss": 1.017, "step": 12290 }, { "epoch": 10.890168290522587, "grad_norm": 0.26267197728157043, "learning_rate": 1e-05, "loss": 1.0114, "step": 12295 }, { "epoch": 10.894596988485386, "grad_norm": 0.28427135944366455, "learning_rate": 1e-05, "loss": 0.9977, "step": 12300 }, { "epoch": 10.899025686448184, "grad_norm": 0.3193788528442383, "learning_rate": 1e-05, "loss": 0.9904, "step": 12305 }, { "epoch": 10.903454384410983, "grad_norm": 0.27316877245903015, "learning_rate": 1e-05, "loss": 1.0352, "step": 12310 }, { "epoch": 10.907883082373782, "grad_norm": 0.36816081404685974, "learning_rate": 1e-05, "loss": 0.9777, "step": 12315 }, { "epoch": 10.91231178033658, "grad_norm": 0.35779786109924316, "learning_rate": 1e-05, "loss": 1.0015, "step": 12320 }, { "epoch": 10.91674047829938, "grad_norm": 0.3075735867023468, "learning_rate": 1e-05, "loss": 0.9848, "step": 12325 }, { "epoch": 10.921169176262179, "grad_norm": 0.287017285823822, "learning_rate": 1e-05, "loss": 0.9527, "step": 12330 }, { "epoch": 10.925597874224978, "grad_norm": 0.36687302589416504, "learning_rate": 1e-05, "loss": 1.0233, "step": 12335 }, { "epoch": 10.930026572187776, "grad_norm": 0.3728812336921692, "learning_rate": 1e-05, "loss": 1.0074, "step": 12340 }, { "epoch": 10.934455270150576, "grad_norm": 0.34260761737823486, "learning_rate": 1e-05, "loss": 0.9832, "step": 12345 }, { "epoch": 10.938883968113375, "grad_norm": 0.46748384833335876, "learning_rate": 1e-05, "loss": 1.0141, "step": 12350 }, { "epoch": 10.943312666076174, "grad_norm": 0.3414219617843628, "learning_rate": 1e-05, "loss": 0.987, "step": 12355 }, { "epoch": 10.947741364038972, "grad_norm": 0.35716310143470764, "learning_rate": 1e-05, "loss": 1.0143, "step": 12360 }, { "epoch": 10.952170062001771, "grad_norm": 0.35552752017974854, "learning_rate": 1e-05, "loss": 1.0158, "step": 12365 }, { "epoch": 10.956598759964571, "grad_norm": 0.29145801067352295, "learning_rate": 1e-05, "loss": 1.0005, "step": 12370 }, { "epoch": 10.961027457927369, "grad_norm": 0.26375558972358704, "learning_rate": 1e-05, "loss": 1.0521, "step": 12375 }, { "epoch": 10.965456155890168, "grad_norm": 0.2923826575279236, "learning_rate": 1e-05, "loss": 1.0387, "step": 12380 }, { "epoch": 10.969884853852967, "grad_norm": 0.2746538519859314, "learning_rate": 1e-05, "loss": 0.9809, "step": 12385 }, { "epoch": 10.974313551815767, "grad_norm": 0.298706591129303, "learning_rate": 1e-05, "loss": 1.0061, "step": 12390 }, { "epoch": 10.978742249778564, "grad_norm": 0.29562684893608093, "learning_rate": 1e-05, "loss": 0.9846, "step": 12395 }, { "epoch": 10.983170947741364, "grad_norm": 0.2692628502845764, "learning_rate": 1e-05, "loss": 0.9591, "step": 12400 }, { "epoch": 10.987599645704163, "grad_norm": 0.3113064467906952, "learning_rate": 1e-05, "loss": 0.9657, "step": 12405 }, { "epoch": 10.992028343666963, "grad_norm": 0.2874666154384613, "learning_rate": 1e-05, "loss": 1.0035, "step": 12410 }, { "epoch": 10.99645704162976, "grad_norm": 0.33159539103507996, "learning_rate": 1e-05, "loss": 0.994, "step": 12415 }, { "epoch": 11.00088573959256, "grad_norm": 0.29299935698509216, "learning_rate": 1e-05, "loss": 0.9564, "step": 12420 }, { "epoch": 11.00531443755536, "grad_norm": 0.3103204369544983, "learning_rate": 1e-05, "loss": 0.932, "step": 12425 }, { "epoch": 11.009743135518157, "grad_norm": 0.27742722630500793, "learning_rate": 1e-05, "loss": 0.9934, "step": 12430 }, { "epoch": 11.014171833480956, "grad_norm": 0.26999253034591675, "learning_rate": 1e-05, "loss": 0.9958, "step": 12435 }, { "epoch": 11.018600531443756, "grad_norm": 0.314456969499588, "learning_rate": 1e-05, "loss": 0.9813, "step": 12440 }, { "epoch": 11.023029229406555, "grad_norm": 0.26246508955955505, "learning_rate": 1e-05, "loss": 1.0273, "step": 12445 }, { "epoch": 11.027457927369353, "grad_norm": 0.3206881582736969, "learning_rate": 1e-05, "loss": 1.0321, "step": 12450 }, { "epoch": 11.031886625332152, "grad_norm": 0.2791249752044678, "learning_rate": 1e-05, "loss": 1.0301, "step": 12455 }, { "epoch": 11.036315323294952, "grad_norm": 0.3195202350616455, "learning_rate": 1e-05, "loss": 1.0487, "step": 12460 }, { "epoch": 11.04074402125775, "grad_norm": 0.3175537586212158, "learning_rate": 1e-05, "loss": 1.0673, "step": 12465 }, { "epoch": 11.045172719220549, "grad_norm": 0.29901549220085144, "learning_rate": 1e-05, "loss": 1.0052, "step": 12470 }, { "epoch": 11.049601417183348, "grad_norm": 0.3121190369129181, "learning_rate": 1e-05, "loss": 1.0398, "step": 12475 }, { "epoch": 11.054030115146148, "grad_norm": 0.3043995797634125, "learning_rate": 1e-05, "loss": 1.0293, "step": 12480 }, { "epoch": 11.058458813108945, "grad_norm": 0.2613113224506378, "learning_rate": 1e-05, "loss": 0.9806, "step": 12485 }, { "epoch": 11.062887511071745, "grad_norm": 0.28122082352638245, "learning_rate": 1e-05, "loss": 0.9967, "step": 12490 }, { "epoch": 11.067316209034544, "grad_norm": 0.2508530020713806, "learning_rate": 1e-05, "loss": 0.9593, "step": 12495 }, { "epoch": 11.071744906997342, "grad_norm": 0.31216961145401, "learning_rate": 1e-05, "loss": 0.9842, "step": 12500 }, { "epoch": 11.076173604960141, "grad_norm": 0.23662249743938446, "learning_rate": 1e-05, "loss": 0.9767, "step": 12505 }, { "epoch": 11.08060230292294, "grad_norm": 0.2580465078353882, "learning_rate": 1e-05, "loss": 1.0339, "step": 12510 }, { "epoch": 11.08503100088574, "grad_norm": 0.30637606978416443, "learning_rate": 1e-05, "loss": 1.0275, "step": 12515 }, { "epoch": 11.089459698848538, "grad_norm": 0.28177255392074585, "learning_rate": 1e-05, "loss": 0.9752, "step": 12520 }, { "epoch": 11.093888396811337, "grad_norm": 0.23359251022338867, "learning_rate": 1e-05, "loss": 0.9877, "step": 12525 }, { "epoch": 11.098317094774137, "grad_norm": 0.28313735127449036, "learning_rate": 1e-05, "loss": 1.0054, "step": 12530 }, { "epoch": 11.102745792736936, "grad_norm": 0.25916263461112976, "learning_rate": 1e-05, "loss": 1.021, "step": 12535 }, { "epoch": 11.107174490699734, "grad_norm": 0.223174050450325, "learning_rate": 1e-05, "loss": 1.0284, "step": 12540 }, { "epoch": 11.111603188662533, "grad_norm": 0.270033597946167, "learning_rate": 1e-05, "loss": 1.0227, "step": 12545 }, { "epoch": 11.116031886625333, "grad_norm": 0.294217586517334, "learning_rate": 1e-05, "loss": 0.9873, "step": 12550 }, { "epoch": 11.12046058458813, "grad_norm": 0.254237562417984, "learning_rate": 1e-05, "loss": 1.0095, "step": 12555 }, { "epoch": 11.12488928255093, "grad_norm": 0.2888711392879486, "learning_rate": 1e-05, "loss": 1.0198, "step": 12560 }, { "epoch": 11.12931798051373, "grad_norm": 0.3375939428806305, "learning_rate": 1e-05, "loss": 1.0444, "step": 12565 }, { "epoch": 11.133746678476529, "grad_norm": 0.33700940012931824, "learning_rate": 1e-05, "loss": 0.9994, "step": 12570 }, { "epoch": 11.138175376439326, "grad_norm": 0.25875553488731384, "learning_rate": 1e-05, "loss": 0.9966, "step": 12575 }, { "epoch": 11.142604074402126, "grad_norm": 0.29349377751350403, "learning_rate": 1e-05, "loss": 1.0147, "step": 12580 }, { "epoch": 11.147032772364925, "grad_norm": 0.27773380279541016, "learning_rate": 1e-05, "loss": 1.0146, "step": 12585 }, { "epoch": 11.151461470327723, "grad_norm": 0.26748961210250854, "learning_rate": 1e-05, "loss": 0.9918, "step": 12590 }, { "epoch": 11.155890168290522, "grad_norm": 0.2450910359621048, "learning_rate": 1e-05, "loss": 0.9953, "step": 12595 }, { "epoch": 11.160318866253322, "grad_norm": 0.2941138744354248, "learning_rate": 1e-05, "loss": 1.0197, "step": 12600 }, { "epoch": 11.164747564216121, "grad_norm": 0.25659123063087463, "learning_rate": 1e-05, "loss": 1.0049, "step": 12605 }, { "epoch": 11.169176262178919, "grad_norm": 0.29851406812667847, "learning_rate": 1e-05, "loss": 0.9903, "step": 12610 }, { "epoch": 11.173604960141718, "grad_norm": 0.26001042127609253, "learning_rate": 1e-05, "loss": 0.9854, "step": 12615 }, { "epoch": 11.178033658104518, "grad_norm": 0.27933594584465027, "learning_rate": 1e-05, "loss": 0.956, "step": 12620 }, { "epoch": 11.182462356067315, "grad_norm": 0.3293021023273468, "learning_rate": 1e-05, "loss": 0.9985, "step": 12625 }, { "epoch": 11.186891054030115, "grad_norm": 0.3214893937110901, "learning_rate": 1e-05, "loss": 0.9927, "step": 12630 }, { "epoch": 11.191319751992914, "grad_norm": 0.29869943857192993, "learning_rate": 1e-05, "loss": 1.0047, "step": 12635 }, { "epoch": 11.195748449955714, "grad_norm": 0.27732065320014954, "learning_rate": 1e-05, "loss": 0.9427, "step": 12640 }, { "epoch": 11.200177147918511, "grad_norm": 0.3572227954864502, "learning_rate": 1e-05, "loss": 0.9825, "step": 12645 }, { "epoch": 11.20460584588131, "grad_norm": 0.3276127576828003, "learning_rate": 1e-05, "loss": 0.9817, "step": 12650 }, { "epoch": 11.20903454384411, "grad_norm": 0.31231093406677246, "learning_rate": 1e-05, "loss": 0.9963, "step": 12655 }, { "epoch": 11.21346324180691, "grad_norm": 0.27797722816467285, "learning_rate": 1e-05, "loss": 1.0233, "step": 12660 }, { "epoch": 11.217891939769707, "grad_norm": 0.23629514873027802, "learning_rate": 1e-05, "loss": 0.9672, "step": 12665 }, { "epoch": 11.222320637732507, "grad_norm": 0.2862871587276459, "learning_rate": 1e-05, "loss": 1.033, "step": 12670 }, { "epoch": 11.226749335695306, "grad_norm": 0.2610045075416565, "learning_rate": 1e-05, "loss": 1.0209, "step": 12675 }, { "epoch": 11.231178033658104, "grad_norm": 0.28060027956962585, "learning_rate": 1e-05, "loss": 0.9963, "step": 12680 }, { "epoch": 11.235606731620903, "grad_norm": 0.2740086019039154, "learning_rate": 1e-05, "loss": 1.0302, "step": 12685 }, { "epoch": 11.240035429583703, "grad_norm": 0.27540123462677, "learning_rate": 1e-05, "loss": 0.9757, "step": 12690 }, { "epoch": 11.244464127546502, "grad_norm": 0.3479073643684387, "learning_rate": 1e-05, "loss": 0.9375, "step": 12695 }, { "epoch": 11.2488928255093, "grad_norm": 0.26659345626831055, "learning_rate": 1e-05, "loss": 0.9968, "step": 12700 }, { "epoch": 11.2533215234721, "grad_norm": 0.2560499906539917, "learning_rate": 1e-05, "loss": 0.9975, "step": 12705 }, { "epoch": 11.257750221434899, "grad_norm": 0.2852429747581482, "learning_rate": 1e-05, "loss": 0.983, "step": 12710 }, { "epoch": 11.262178919397696, "grad_norm": 0.30607637763023376, "learning_rate": 1e-05, "loss": 0.9711, "step": 12715 }, { "epoch": 11.266607617360496, "grad_norm": 0.2971040904521942, "learning_rate": 1e-05, "loss": 1.0265, "step": 12720 }, { "epoch": 11.271036315323295, "grad_norm": 0.2594987154006958, "learning_rate": 1e-05, "loss": 0.9661, "step": 12725 }, { "epoch": 11.275465013286095, "grad_norm": 0.2745828628540039, "learning_rate": 1e-05, "loss": 1.0201, "step": 12730 }, { "epoch": 11.279893711248892, "grad_norm": 0.22700214385986328, "learning_rate": 1e-05, "loss": 1.0297, "step": 12735 }, { "epoch": 11.284322409211692, "grad_norm": 0.2602710425853729, "learning_rate": 1e-05, "loss": 1.0453, "step": 12740 }, { "epoch": 11.288751107174491, "grad_norm": 0.27829915285110474, "learning_rate": 1e-05, "loss": 1.0223, "step": 12745 }, { "epoch": 11.29317980513729, "grad_norm": 0.26040542125701904, "learning_rate": 1e-05, "loss": 1.0311, "step": 12750 }, { "epoch": 11.297608503100088, "grad_norm": 0.32715657353401184, "learning_rate": 1e-05, "loss": 1.0022, "step": 12755 }, { "epoch": 11.302037201062888, "grad_norm": 0.3321079611778259, "learning_rate": 1e-05, "loss": 0.988, "step": 12760 }, { "epoch": 11.306465899025687, "grad_norm": 0.3088231384754181, "learning_rate": 1e-05, "loss": 1.0032, "step": 12765 }, { "epoch": 11.310894596988485, "grad_norm": 0.2821297347545624, "learning_rate": 1e-05, "loss": 0.9737, "step": 12770 }, { "epoch": 11.315323294951284, "grad_norm": 0.30211564898490906, "learning_rate": 1e-05, "loss": 0.983, "step": 12775 }, { "epoch": 11.319751992914084, "grad_norm": 0.2941362261772156, "learning_rate": 1e-05, "loss": 1.0202, "step": 12780 }, { "epoch": 11.324180690876883, "grad_norm": 0.26926636695861816, "learning_rate": 1e-05, "loss": 0.997, "step": 12785 }, { "epoch": 11.32860938883968, "grad_norm": 0.26685798168182373, "learning_rate": 1e-05, "loss": 0.9959, "step": 12790 }, { "epoch": 11.33303808680248, "grad_norm": 0.262614369392395, "learning_rate": 1e-05, "loss": 0.9814, "step": 12795 }, { "epoch": 11.33746678476528, "grad_norm": 0.2847999036312103, "learning_rate": 1e-05, "loss": 0.9846, "step": 12800 }, { "epoch": 11.341895482728077, "grad_norm": 0.2977809011936188, "learning_rate": 1e-05, "loss": 1.0024, "step": 12805 }, { "epoch": 11.346324180690877, "grad_norm": 0.250999391078949, "learning_rate": 1e-05, "loss": 1.0754, "step": 12810 }, { "epoch": 11.350752878653676, "grad_norm": 0.268209844827652, "learning_rate": 1e-05, "loss": 1.0267, "step": 12815 }, { "epoch": 11.355181576616475, "grad_norm": 0.2630920708179474, "learning_rate": 1e-05, "loss": 0.9639, "step": 12820 }, { "epoch": 11.359610274579273, "grad_norm": 0.24793335795402527, "learning_rate": 1e-05, "loss": 0.9927, "step": 12825 }, { "epoch": 11.364038972542073, "grad_norm": 0.26168251037597656, "learning_rate": 1e-05, "loss": 1.0051, "step": 12830 }, { "epoch": 11.368467670504872, "grad_norm": 0.27676111459732056, "learning_rate": 1e-05, "loss": 0.9773, "step": 12835 }, { "epoch": 11.372896368467671, "grad_norm": 0.2518727779388428, "learning_rate": 1e-05, "loss": 0.9637, "step": 12840 }, { "epoch": 11.377325066430469, "grad_norm": 0.3044617176055908, "learning_rate": 1e-05, "loss": 1.0361, "step": 12845 }, { "epoch": 11.381753764393268, "grad_norm": 0.2791688144207001, "learning_rate": 1e-05, "loss": 0.977, "step": 12850 }, { "epoch": 11.386182462356068, "grad_norm": 0.2488403022289276, "learning_rate": 1e-05, "loss": 0.987, "step": 12855 }, { "epoch": 11.390611160318866, "grad_norm": 0.26252153515815735, "learning_rate": 1e-05, "loss": 1.0202, "step": 12860 }, { "epoch": 11.395039858281665, "grad_norm": 0.283425509929657, "learning_rate": 1e-05, "loss": 0.9912, "step": 12865 }, { "epoch": 11.399468556244464, "grad_norm": 0.2645430266857147, "learning_rate": 1e-05, "loss": 0.9811, "step": 12870 }, { "epoch": 11.403897254207264, "grad_norm": 0.27936238050460815, "learning_rate": 1e-05, "loss": 1.0344, "step": 12875 }, { "epoch": 11.408325952170062, "grad_norm": 0.2836114168167114, "learning_rate": 1e-05, "loss": 0.9801, "step": 12880 }, { "epoch": 11.412754650132861, "grad_norm": 0.28614330291748047, "learning_rate": 1e-05, "loss": 1.0234, "step": 12885 }, { "epoch": 11.41718334809566, "grad_norm": 0.2818671464920044, "learning_rate": 1e-05, "loss": 0.9776, "step": 12890 }, { "epoch": 11.421612046058458, "grad_norm": 0.24952994287014008, "learning_rate": 1e-05, "loss": 1.0162, "step": 12895 }, { "epoch": 11.426040744021257, "grad_norm": 0.3071129024028778, "learning_rate": 1e-05, "loss": 0.985, "step": 12900 }, { "epoch": 11.430469441984057, "grad_norm": 0.31698107719421387, "learning_rate": 1e-05, "loss": 1.0105, "step": 12905 }, { "epoch": 11.434898139946856, "grad_norm": 0.2559114396572113, "learning_rate": 1e-05, "loss": 1.0012, "step": 12910 }, { "epoch": 11.439326837909654, "grad_norm": 0.23474229872226715, "learning_rate": 1e-05, "loss": 1.0444, "step": 12915 }, { "epoch": 11.443755535872453, "grad_norm": 0.25353118777275085, "learning_rate": 1e-05, "loss": 0.9401, "step": 12920 }, { "epoch": 11.448184233835253, "grad_norm": 0.3092925250530243, "learning_rate": 1e-05, "loss": 1.0526, "step": 12925 }, { "epoch": 11.45261293179805, "grad_norm": 0.31358668208122253, "learning_rate": 1e-05, "loss": 0.997, "step": 12930 }, { "epoch": 11.45704162976085, "grad_norm": 0.3259412348270416, "learning_rate": 1e-05, "loss": 1.0068, "step": 12935 }, { "epoch": 11.46147032772365, "grad_norm": 0.3386422395706177, "learning_rate": 1e-05, "loss": 0.9786, "step": 12940 }, { "epoch": 11.465899025686449, "grad_norm": 0.28312399983406067, "learning_rate": 1e-05, "loss": 1.0378, "step": 12945 }, { "epoch": 11.470327723649246, "grad_norm": 0.25567251443862915, "learning_rate": 1e-05, "loss": 1.0006, "step": 12950 }, { "epoch": 11.474756421612046, "grad_norm": 0.24410630762577057, "learning_rate": 1e-05, "loss": 0.9616, "step": 12955 }, { "epoch": 11.479185119574845, "grad_norm": 0.28404220938682556, "learning_rate": 1e-05, "loss": 0.9869, "step": 12960 }, { "epoch": 11.483613817537645, "grad_norm": 0.2507125437259674, "learning_rate": 1e-05, "loss": 0.9562, "step": 12965 }, { "epoch": 11.488042515500442, "grad_norm": 0.286196768283844, "learning_rate": 1e-05, "loss": 0.9965, "step": 12970 }, { "epoch": 11.492471213463242, "grad_norm": 0.2457255721092224, "learning_rate": 1e-05, "loss": 1.0308, "step": 12975 }, { "epoch": 11.496899911426041, "grad_norm": 0.2932203710079193, "learning_rate": 1e-05, "loss": 1.0113, "step": 12980 }, { "epoch": 11.501328609388839, "grad_norm": 0.2764868140220642, "learning_rate": 1e-05, "loss": 0.9974, "step": 12985 }, { "epoch": 11.505757307351638, "grad_norm": 0.2516365349292755, "learning_rate": 1e-05, "loss": 0.9637, "step": 12990 }, { "epoch": 11.510186005314438, "grad_norm": 0.2501620650291443, "learning_rate": 1e-05, "loss": 0.9753, "step": 12995 }, { "epoch": 11.514614703277237, "grad_norm": 0.23135757446289062, "learning_rate": 1e-05, "loss": 1.0501, "step": 13000 }, { "epoch": 11.519043401240035, "grad_norm": 0.2551914155483246, "learning_rate": 1e-05, "loss": 0.9991, "step": 13005 }, { "epoch": 11.523472099202834, "grad_norm": 0.30834686756134033, "learning_rate": 1e-05, "loss": 1.0328, "step": 13010 }, { "epoch": 11.527900797165634, "grad_norm": 0.2901008427143097, "learning_rate": 1e-05, "loss": 0.9791, "step": 13015 }, { "epoch": 11.532329495128431, "grad_norm": 0.3713749945163727, "learning_rate": 1e-05, "loss": 1.0256, "step": 13020 }, { "epoch": 11.53675819309123, "grad_norm": 0.25785550475120544, "learning_rate": 1e-05, "loss": 1.0338, "step": 13025 }, { "epoch": 11.54118689105403, "grad_norm": 0.28028014302253723, "learning_rate": 1e-05, "loss": 0.9467, "step": 13030 }, { "epoch": 11.54561558901683, "grad_norm": 0.27772173285484314, "learning_rate": 1e-05, "loss": 0.9345, "step": 13035 }, { "epoch": 11.550044286979627, "grad_norm": 0.2639353573322296, "learning_rate": 1e-05, "loss": 1.0016, "step": 13040 }, { "epoch": 11.554472984942427, "grad_norm": 0.26178157329559326, "learning_rate": 1e-05, "loss": 1.0429, "step": 13045 }, { "epoch": 11.558901682905226, "grad_norm": 0.29230260848999023, "learning_rate": 1e-05, "loss": 0.9653, "step": 13050 }, { "epoch": 11.563330380868024, "grad_norm": 0.27721700072288513, "learning_rate": 1e-05, "loss": 1.0588, "step": 13055 }, { "epoch": 11.567759078830823, "grad_norm": 0.30772387981414795, "learning_rate": 1e-05, "loss": 1.0074, "step": 13060 }, { "epoch": 11.572187776793623, "grad_norm": 0.21189692616462708, "learning_rate": 1e-05, "loss": 1.0594, "step": 13065 }, { "epoch": 11.576616474756422, "grad_norm": 0.3092624545097351, "learning_rate": 1e-05, "loss": 1.0176, "step": 13070 }, { "epoch": 11.58104517271922, "grad_norm": 0.24715109169483185, "learning_rate": 1e-05, "loss": 1.0549, "step": 13075 }, { "epoch": 11.58547387068202, "grad_norm": 0.25297677516937256, "learning_rate": 1e-05, "loss": 0.9527, "step": 13080 }, { "epoch": 11.589902568644819, "grad_norm": 0.23887625336647034, "learning_rate": 1e-05, "loss": 0.9649, "step": 13085 }, { "epoch": 11.594331266607618, "grad_norm": 0.31239053606987, "learning_rate": 1e-05, "loss": 1.0046, "step": 13090 }, { "epoch": 11.598759964570416, "grad_norm": 0.2441648691892624, "learning_rate": 1e-05, "loss": 1.0524, "step": 13095 }, { "epoch": 11.603188662533215, "grad_norm": 0.22316597402095795, "learning_rate": 1e-05, "loss": 1.0002, "step": 13100 }, { "epoch": 11.607617360496015, "grad_norm": 0.25960877537727356, "learning_rate": 1e-05, "loss": 0.956, "step": 13105 }, { "epoch": 11.612046058458812, "grad_norm": 0.2770078182220459, "learning_rate": 1e-05, "loss": 1.028, "step": 13110 }, { "epoch": 11.616474756421612, "grad_norm": 0.24256984889507294, "learning_rate": 1e-05, "loss": 1.0198, "step": 13115 }, { "epoch": 11.620903454384411, "grad_norm": 0.2775229811668396, "learning_rate": 1e-05, "loss": 1.0283, "step": 13120 }, { "epoch": 11.62533215234721, "grad_norm": 0.21990017592906952, "learning_rate": 1e-05, "loss": 1.0061, "step": 13125 }, { "epoch": 11.629760850310008, "grad_norm": 0.25390660762786865, "learning_rate": 1e-05, "loss": 0.9775, "step": 13130 }, { "epoch": 11.634189548272808, "grad_norm": 0.27939173579216003, "learning_rate": 1e-05, "loss": 1.0173, "step": 13135 }, { "epoch": 11.638618246235607, "grad_norm": 0.31400325894355774, "learning_rate": 1e-05, "loss": 1.0566, "step": 13140 }, { "epoch": 11.643046944198407, "grad_norm": 0.2791631519794464, "learning_rate": 1e-05, "loss": 1.0582, "step": 13145 }, { "epoch": 11.647475642161204, "grad_norm": 0.3174920976161957, "learning_rate": 1e-05, "loss": 1.0546, "step": 13150 }, { "epoch": 11.651904340124004, "grad_norm": 0.2516086995601654, "learning_rate": 1e-05, "loss": 0.9927, "step": 13155 }, { "epoch": 11.656333038086803, "grad_norm": 0.33027520775794983, "learning_rate": 1e-05, "loss": 0.9931, "step": 13160 }, { "epoch": 11.6607617360496, "grad_norm": 0.2980208992958069, "learning_rate": 1e-05, "loss": 0.9565, "step": 13165 }, { "epoch": 11.6651904340124, "grad_norm": 0.25837254524230957, "learning_rate": 1e-05, "loss": 0.9832, "step": 13170 }, { "epoch": 11.6696191319752, "grad_norm": 0.3473252058029175, "learning_rate": 1e-05, "loss": 0.9634, "step": 13175 }, { "epoch": 11.674047829937999, "grad_norm": 0.2550885081291199, "learning_rate": 1e-05, "loss": 0.994, "step": 13180 }, { "epoch": 11.678476527900797, "grad_norm": 0.27462732791900635, "learning_rate": 1e-05, "loss": 0.9709, "step": 13185 }, { "epoch": 11.682905225863596, "grad_norm": 0.336978018283844, "learning_rate": 1e-05, "loss": 1.0146, "step": 13190 }, { "epoch": 11.687333923826396, "grad_norm": 0.2903445065021515, "learning_rate": 1e-05, "loss": 0.9438, "step": 13195 }, { "epoch": 11.691762621789193, "grad_norm": 0.3348015248775482, "learning_rate": 1e-05, "loss": 1.0301, "step": 13200 }, { "epoch": 11.696191319751993, "grad_norm": 0.25575220584869385, "learning_rate": 1e-05, "loss": 1.0248, "step": 13205 }, { "epoch": 11.700620017714792, "grad_norm": 0.24518495798110962, "learning_rate": 1e-05, "loss": 1.0165, "step": 13210 }, { "epoch": 11.705048715677592, "grad_norm": 0.3174123167991638, "learning_rate": 1e-05, "loss": 1.009, "step": 13215 }, { "epoch": 11.70947741364039, "grad_norm": 0.33312612771987915, "learning_rate": 1e-05, "loss": 0.9234, "step": 13220 }, { "epoch": 11.713906111603189, "grad_norm": 0.27455177903175354, "learning_rate": 1e-05, "loss": 1.0444, "step": 13225 }, { "epoch": 11.718334809565988, "grad_norm": 0.2920316159725189, "learning_rate": 1e-05, "loss": 1.0, "step": 13230 }, { "epoch": 11.722763507528786, "grad_norm": 0.28126898407936096, "learning_rate": 1e-05, "loss": 1.0243, "step": 13235 }, { "epoch": 11.727192205491585, "grad_norm": 0.26298972964286804, "learning_rate": 1e-05, "loss": 1.046, "step": 13240 }, { "epoch": 11.731620903454385, "grad_norm": 0.27447426319122314, "learning_rate": 1e-05, "loss": 0.9542, "step": 13245 }, { "epoch": 11.736049601417184, "grad_norm": 0.27569296956062317, "learning_rate": 1e-05, "loss": 0.9615, "step": 13250 }, { "epoch": 11.740478299379982, "grad_norm": 0.28101348876953125, "learning_rate": 1e-05, "loss": 1.0523, "step": 13255 }, { "epoch": 11.744906997342781, "grad_norm": 0.28040438890457153, "learning_rate": 1e-05, "loss": 0.9541, "step": 13260 }, { "epoch": 11.74933569530558, "grad_norm": 0.3497065007686615, "learning_rate": 1e-05, "loss": 0.9929, "step": 13265 }, { "epoch": 11.75376439326838, "grad_norm": 0.26495182514190674, "learning_rate": 1e-05, "loss": 0.977, "step": 13270 }, { "epoch": 11.758193091231178, "grad_norm": 0.3278653621673584, "learning_rate": 1e-05, "loss": 1.0062, "step": 13275 }, { "epoch": 11.762621789193977, "grad_norm": 0.26182252168655396, "learning_rate": 1e-05, "loss": 0.99, "step": 13280 }, { "epoch": 11.767050487156776, "grad_norm": 0.2797078490257263, "learning_rate": 1e-05, "loss": 1.022, "step": 13285 }, { "epoch": 11.771479185119574, "grad_norm": 0.2660171091556549, "learning_rate": 1e-05, "loss": 0.9592, "step": 13290 }, { "epoch": 11.775907883082374, "grad_norm": 0.26958000659942627, "learning_rate": 1e-05, "loss": 1.0105, "step": 13295 }, { "epoch": 11.780336581045173, "grad_norm": 0.3416246771812439, "learning_rate": 1e-05, "loss": 0.997, "step": 13300 }, { "epoch": 11.784765279007972, "grad_norm": 0.314456045627594, "learning_rate": 1e-05, "loss": 0.9988, "step": 13305 }, { "epoch": 11.78919397697077, "grad_norm": 0.31488505005836487, "learning_rate": 1e-05, "loss": 1.0002, "step": 13310 }, { "epoch": 11.79362267493357, "grad_norm": 0.25869429111480713, "learning_rate": 1e-05, "loss": 0.9688, "step": 13315 }, { "epoch": 11.798051372896369, "grad_norm": 0.28862735629081726, "learning_rate": 1e-05, "loss": 0.9689, "step": 13320 }, { "epoch": 11.802480070859167, "grad_norm": 0.2620314657688141, "learning_rate": 1e-05, "loss": 1.0031, "step": 13325 }, { "epoch": 11.806908768821966, "grad_norm": 0.21097129583358765, "learning_rate": 1e-05, "loss": 1.0504, "step": 13330 }, { "epoch": 11.811337466784765, "grad_norm": 0.26400476694107056, "learning_rate": 1e-05, "loss": 1.0119, "step": 13335 }, { "epoch": 11.815766164747565, "grad_norm": 0.2999187111854553, "learning_rate": 1e-05, "loss": 0.9753, "step": 13340 }, { "epoch": 11.820194862710363, "grad_norm": 0.31398919224739075, "learning_rate": 1e-05, "loss": 1.0232, "step": 13345 }, { "epoch": 11.824623560673162, "grad_norm": 0.3374306559562683, "learning_rate": 1e-05, "loss": 0.9927, "step": 13350 }, { "epoch": 11.829052258635961, "grad_norm": 0.2914050221443176, "learning_rate": 1e-05, "loss": 1.0418, "step": 13355 }, { "epoch": 11.833480956598759, "grad_norm": 0.3007832169532776, "learning_rate": 1e-05, "loss": 1.0173, "step": 13360 }, { "epoch": 11.837909654561559, "grad_norm": 0.33798086643218994, "learning_rate": 1e-05, "loss": 1.0284, "step": 13365 }, { "epoch": 11.842338352524358, "grad_norm": 0.26915472745895386, "learning_rate": 1e-05, "loss": 0.9827, "step": 13370 }, { "epoch": 11.846767050487157, "grad_norm": 0.2517496347427368, "learning_rate": 1e-05, "loss": 1.0505, "step": 13375 }, { "epoch": 11.851195748449955, "grad_norm": 0.2744987905025482, "learning_rate": 1e-05, "loss": 0.9912, "step": 13380 }, { "epoch": 11.855624446412754, "grad_norm": 0.2502387762069702, "learning_rate": 1e-05, "loss": 1.0161, "step": 13385 }, { "epoch": 11.860053144375554, "grad_norm": 0.3029354512691498, "learning_rate": 1e-05, "loss": 0.9637, "step": 13390 }, { "epoch": 11.864481842338353, "grad_norm": 0.2605747878551483, "learning_rate": 1e-05, "loss": 1.0526, "step": 13395 }, { "epoch": 11.868910540301151, "grad_norm": 0.29193899035453796, "learning_rate": 1e-05, "loss": 0.9968, "step": 13400 }, { "epoch": 11.87333923826395, "grad_norm": 0.27779293060302734, "learning_rate": 1e-05, "loss": 1.0028, "step": 13405 }, { "epoch": 11.87776793622675, "grad_norm": 0.3530624210834503, "learning_rate": 1e-05, "loss": 1.0221, "step": 13410 }, { "epoch": 11.882196634189548, "grad_norm": 0.31932714581489563, "learning_rate": 1e-05, "loss": 0.9656, "step": 13415 }, { "epoch": 11.886625332152347, "grad_norm": 0.297229528427124, "learning_rate": 1e-05, "loss": 0.9741, "step": 13420 }, { "epoch": 11.891054030115146, "grad_norm": 0.2655245363712311, "learning_rate": 1e-05, "loss": 1.0279, "step": 13425 }, { "epoch": 11.895482728077946, "grad_norm": 0.2570720613002777, "learning_rate": 1e-05, "loss": 1.0091, "step": 13430 }, { "epoch": 11.899911426040743, "grad_norm": 0.23089171946048737, "learning_rate": 1e-05, "loss": 0.9906, "step": 13435 }, { "epoch": 11.904340124003543, "grad_norm": 0.23117727041244507, "learning_rate": 1e-05, "loss": 0.9797, "step": 13440 }, { "epoch": 11.908768821966342, "grad_norm": 0.41261041164398193, "learning_rate": 1e-05, "loss": 0.9404, "step": 13445 }, { "epoch": 11.91319751992914, "grad_norm": 0.3185042142868042, "learning_rate": 1e-05, "loss": 1.0346, "step": 13450 }, { "epoch": 11.91762621789194, "grad_norm": 0.26604071259498596, "learning_rate": 1e-05, "loss": 1.0002, "step": 13455 }, { "epoch": 11.922054915854739, "grad_norm": 0.24834921956062317, "learning_rate": 1e-05, "loss": 0.9692, "step": 13460 }, { "epoch": 11.926483613817538, "grad_norm": 0.3106094300746918, "learning_rate": 1e-05, "loss": 1.0178, "step": 13465 }, { "epoch": 11.930912311780336, "grad_norm": 0.35537686944007874, "learning_rate": 1e-05, "loss": 1.0281, "step": 13470 }, { "epoch": 11.935341009743135, "grad_norm": 0.2999175488948822, "learning_rate": 1e-05, "loss": 0.9714, "step": 13475 }, { "epoch": 11.939769707705935, "grad_norm": 0.2737196385860443, "learning_rate": 1e-05, "loss": 1.0263, "step": 13480 }, { "epoch": 11.944198405668732, "grad_norm": 0.2924957573413849, "learning_rate": 1e-05, "loss": 1.0422, "step": 13485 }, { "epoch": 11.948627103631532, "grad_norm": 0.2330625206232071, "learning_rate": 1e-05, "loss": 0.9438, "step": 13490 }, { "epoch": 11.953055801594331, "grad_norm": 0.2882536053657532, "learning_rate": 1e-05, "loss": 1.0031, "step": 13495 }, { "epoch": 11.95748449955713, "grad_norm": 0.2570204436779022, "learning_rate": 1e-05, "loss": 1.0011, "step": 13500 }, { "epoch": 11.961913197519928, "grad_norm": 0.2935815155506134, "learning_rate": 1e-05, "loss": 0.9827, "step": 13505 }, { "epoch": 11.966341895482728, "grad_norm": 0.277477890253067, "learning_rate": 1e-05, "loss": 0.9714, "step": 13510 }, { "epoch": 11.970770593445527, "grad_norm": 0.34977397322654724, "learning_rate": 1e-05, "loss": 0.94, "step": 13515 }, { "epoch": 11.975199291408327, "grad_norm": 0.31164172291755676, "learning_rate": 1e-05, "loss": 1.019, "step": 13520 }, { "epoch": 11.979627989371124, "grad_norm": 0.24536538124084473, "learning_rate": 1e-05, "loss": 1.0473, "step": 13525 }, { "epoch": 11.984056687333924, "grad_norm": 0.2435460239648819, "learning_rate": 1e-05, "loss": 1.0224, "step": 13530 }, { "epoch": 11.988485385296723, "grad_norm": 0.2834871709346771, "learning_rate": 1e-05, "loss": 0.998, "step": 13535 }, { "epoch": 11.992914083259521, "grad_norm": 0.3505094349384308, "learning_rate": 1e-05, "loss": 1.0158, "step": 13540 }, { "epoch": 11.99734278122232, "grad_norm": 0.24276615679264069, "learning_rate": 1e-05, "loss": 0.9859, "step": 13545 }, { "epoch": 12.00177147918512, "grad_norm": 0.2683594524860382, "learning_rate": 1e-05, "loss": 0.9615, "step": 13550 }, { "epoch": 12.00620017714792, "grad_norm": 0.2599976658821106, "learning_rate": 1e-05, "loss": 0.9574, "step": 13555 }, { "epoch": 12.010628875110717, "grad_norm": 0.3061163127422333, "learning_rate": 1e-05, "loss": 1.0212, "step": 13560 }, { "epoch": 12.015057573073516, "grad_norm": 0.26979687809944153, "learning_rate": 1e-05, "loss": 0.997, "step": 13565 }, { "epoch": 12.019486271036316, "grad_norm": 0.34937670826911926, "learning_rate": 1e-05, "loss": 0.9833, "step": 13570 }, { "epoch": 12.023914968999113, "grad_norm": 0.2727797031402588, "learning_rate": 1e-05, "loss": 1.0185, "step": 13575 }, { "epoch": 12.028343666961913, "grad_norm": 0.27641206979751587, "learning_rate": 1e-05, "loss": 0.9875, "step": 13580 }, { "epoch": 12.032772364924712, "grad_norm": 0.25684574246406555, "learning_rate": 1e-05, "loss": 1.0017, "step": 13585 }, { "epoch": 12.037201062887512, "grad_norm": 0.26939576864242554, "learning_rate": 1e-05, "loss": 1.068, "step": 13590 }, { "epoch": 12.04162976085031, "grad_norm": 0.2836732268333435, "learning_rate": 1e-05, "loss": 1.0124, "step": 13595 }, { "epoch": 12.046058458813109, "grad_norm": 0.3030366897583008, "learning_rate": 1e-05, "loss": 0.9629, "step": 13600 }, { "epoch": 12.050487156775908, "grad_norm": 0.25969383120536804, "learning_rate": 1e-05, "loss": 0.9617, "step": 13605 }, { "epoch": 12.054915854738708, "grad_norm": 0.25452321767807007, "learning_rate": 1e-05, "loss": 1.0053, "step": 13610 }, { "epoch": 12.059344552701505, "grad_norm": 0.26574021577835083, "learning_rate": 1e-05, "loss": 0.963, "step": 13615 }, { "epoch": 12.063773250664305, "grad_norm": 0.2719283699989319, "learning_rate": 1e-05, "loss": 1.0126, "step": 13620 }, { "epoch": 12.068201948627104, "grad_norm": 0.284067839384079, "learning_rate": 1e-05, "loss": 1.0044, "step": 13625 }, { "epoch": 12.072630646589902, "grad_norm": 0.25207099318504333, "learning_rate": 1e-05, "loss": 1.0247, "step": 13630 }, { "epoch": 12.077059344552701, "grad_norm": 0.2667444944381714, "learning_rate": 1e-05, "loss": 0.9641, "step": 13635 }, { "epoch": 12.0814880425155, "grad_norm": 0.2897314727306366, "learning_rate": 1e-05, "loss": 0.9448, "step": 13640 }, { "epoch": 12.0859167404783, "grad_norm": 0.278303325176239, "learning_rate": 1e-05, "loss": 1.0008, "step": 13645 }, { "epoch": 12.090345438441098, "grad_norm": 0.3381325602531433, "learning_rate": 1e-05, "loss": 0.9407, "step": 13650 }, { "epoch": 12.094774136403897, "grad_norm": 0.3587614893913269, "learning_rate": 1e-05, "loss": 1.0088, "step": 13655 }, { "epoch": 12.099202834366697, "grad_norm": 0.28937163949012756, "learning_rate": 1e-05, "loss": 1.0398, "step": 13660 }, { "epoch": 12.103631532329494, "grad_norm": 0.22299537062644958, "learning_rate": 1e-05, "loss": 1.0414, "step": 13665 }, { "epoch": 12.108060230292294, "grad_norm": 0.24265766143798828, "learning_rate": 1e-05, "loss": 1.0269, "step": 13670 }, { "epoch": 12.112488928255093, "grad_norm": 0.3386152386665344, "learning_rate": 1e-05, "loss": 0.9314, "step": 13675 }, { "epoch": 12.116917626217893, "grad_norm": 0.258344441652298, "learning_rate": 1e-05, "loss": 1.0264, "step": 13680 }, { "epoch": 12.12134632418069, "grad_norm": 0.23197130858898163, "learning_rate": 1e-05, "loss": 0.9353, "step": 13685 }, { "epoch": 12.12577502214349, "grad_norm": 0.31310248374938965, "learning_rate": 1e-05, "loss": 0.9631, "step": 13690 }, { "epoch": 12.130203720106289, "grad_norm": 0.2991909086704254, "learning_rate": 1e-05, "loss": 1.0048, "step": 13695 }, { "epoch": 12.134632418069089, "grad_norm": 0.29839247465133667, "learning_rate": 1e-05, "loss": 0.9993, "step": 13700 }, { "epoch": 12.139061116031886, "grad_norm": 0.27661699056625366, "learning_rate": 1e-05, "loss": 1.0065, "step": 13705 }, { "epoch": 12.143489813994686, "grad_norm": 0.26128870248794556, "learning_rate": 1e-05, "loss": 0.9696, "step": 13710 }, { "epoch": 12.147918511957485, "grad_norm": 0.2674477994441986, "learning_rate": 1e-05, "loss": 1.0581, "step": 13715 }, { "epoch": 12.152347209920283, "grad_norm": 0.3240382969379425, "learning_rate": 1e-05, "loss": 1.0136, "step": 13720 }, { "epoch": 12.156775907883082, "grad_norm": 0.34261512756347656, "learning_rate": 1e-05, "loss": 0.9465, "step": 13725 }, { "epoch": 12.161204605845882, "grad_norm": 0.3624170422554016, "learning_rate": 1e-05, "loss": 0.9728, "step": 13730 }, { "epoch": 12.165633303808681, "grad_norm": 0.25599101185798645, "learning_rate": 1e-05, "loss": 0.9924, "step": 13735 }, { "epoch": 12.170062001771479, "grad_norm": 0.31879791617393494, "learning_rate": 1e-05, "loss": 0.9937, "step": 13740 }, { "epoch": 12.174490699734278, "grad_norm": 0.24751757085323334, "learning_rate": 1e-05, "loss": 1.0343, "step": 13745 }, { "epoch": 12.178919397697078, "grad_norm": 0.29482147097587585, "learning_rate": 1e-05, "loss": 0.9466, "step": 13750 }, { "epoch": 12.183348095659875, "grad_norm": 0.2824556231498718, "learning_rate": 1e-05, "loss": 1.0231, "step": 13755 }, { "epoch": 12.187776793622675, "grad_norm": 0.25332191586494446, "learning_rate": 1e-05, "loss": 0.992, "step": 13760 }, { "epoch": 12.192205491585474, "grad_norm": 0.28341910243034363, "learning_rate": 1e-05, "loss": 1.0237, "step": 13765 }, { "epoch": 12.196634189548273, "grad_norm": 0.25882938504219055, "learning_rate": 1e-05, "loss": 1.0313, "step": 13770 }, { "epoch": 12.201062887511071, "grad_norm": 0.29963967204093933, "learning_rate": 1e-05, "loss": 0.9997, "step": 13775 }, { "epoch": 12.20549158547387, "grad_norm": 0.35134243965148926, "learning_rate": 1e-05, "loss": 1.0217, "step": 13780 }, { "epoch": 12.20992028343667, "grad_norm": 0.26065540313720703, "learning_rate": 1e-05, "loss": 0.968, "step": 13785 }, { "epoch": 12.214348981399468, "grad_norm": 0.26940080523490906, "learning_rate": 1e-05, "loss": 0.995, "step": 13790 }, { "epoch": 12.218777679362267, "grad_norm": 0.3097437024116516, "learning_rate": 1e-05, "loss": 1.0479, "step": 13795 }, { "epoch": 12.223206377325067, "grad_norm": 0.2877049446105957, "learning_rate": 1e-05, "loss": 1.0317, "step": 13800 }, { "epoch": 12.227635075287866, "grad_norm": 0.2793222963809967, "learning_rate": 1e-05, "loss": 0.9547, "step": 13805 }, { "epoch": 12.232063773250664, "grad_norm": 0.3042561411857605, "learning_rate": 1e-05, "loss": 1.0205, "step": 13810 }, { "epoch": 12.236492471213463, "grad_norm": 0.36875292658805847, "learning_rate": 1e-05, "loss": 1.0145, "step": 13815 }, { "epoch": 12.240921169176262, "grad_norm": 0.27395907044410706, "learning_rate": 1e-05, "loss": 0.9697, "step": 13820 }, { "epoch": 12.245349867139062, "grad_norm": 0.23993048071861267, "learning_rate": 1e-05, "loss": 0.9858, "step": 13825 }, { "epoch": 12.24977856510186, "grad_norm": 0.3032304644584656, "learning_rate": 1e-05, "loss": 1.0223, "step": 13830 }, { "epoch": 12.254207263064659, "grad_norm": 0.29428017139434814, "learning_rate": 1e-05, "loss": 0.9668, "step": 13835 }, { "epoch": 12.258635961027458, "grad_norm": 0.32754456996917725, "learning_rate": 1e-05, "loss": 0.9798, "step": 13840 }, { "epoch": 12.263064658990256, "grad_norm": 0.29243528842926025, "learning_rate": 1e-05, "loss": 0.9694, "step": 13845 }, { "epoch": 12.267493356953056, "grad_norm": 0.2800409197807312, "learning_rate": 1e-05, "loss": 1.0019, "step": 13850 }, { "epoch": 12.271922054915855, "grad_norm": 0.22223135828971863, "learning_rate": 1e-05, "loss": 0.9682, "step": 13855 }, { "epoch": 12.276350752878654, "grad_norm": 0.2942938506603241, "learning_rate": 1e-05, "loss": 1.0251, "step": 13860 }, { "epoch": 12.280779450841452, "grad_norm": 0.27925005555152893, "learning_rate": 1e-05, "loss": 0.9572, "step": 13865 }, { "epoch": 12.285208148804251, "grad_norm": 0.2256144881248474, "learning_rate": 1e-05, "loss": 0.9939, "step": 13870 }, { "epoch": 12.289636846767051, "grad_norm": 0.26106294989585876, "learning_rate": 1e-05, "loss": 1.0165, "step": 13875 }, { "epoch": 12.294065544729849, "grad_norm": 0.23196075856685638, "learning_rate": 1e-05, "loss": 1.0313, "step": 13880 }, { "epoch": 12.298494242692648, "grad_norm": 0.24064002931118011, "learning_rate": 1e-05, "loss": 1.003, "step": 13885 }, { "epoch": 12.302922940655447, "grad_norm": 0.2666199207305908, "learning_rate": 1e-05, "loss": 1.0571, "step": 13890 }, { "epoch": 12.307351638618247, "grad_norm": 0.2753749489784241, "learning_rate": 1e-05, "loss": 0.9982, "step": 13895 }, { "epoch": 12.311780336581045, "grad_norm": 0.2881872355937958, "learning_rate": 1e-05, "loss": 0.9747, "step": 13900 }, { "epoch": 12.316209034543844, "grad_norm": 0.2434229552745819, "learning_rate": 1e-05, "loss": 1.0052, "step": 13905 }, { "epoch": 12.320637732506643, "grad_norm": 0.2382996827363968, "learning_rate": 1e-05, "loss": 0.9878, "step": 13910 }, { "epoch": 12.325066430469443, "grad_norm": 0.27602484822273254, "learning_rate": 1e-05, "loss": 0.981, "step": 13915 }, { "epoch": 12.32949512843224, "grad_norm": 0.28263893723487854, "learning_rate": 1e-05, "loss": 1.0079, "step": 13920 }, { "epoch": 12.33392382639504, "grad_norm": 0.3976885974407196, "learning_rate": 1e-05, "loss": 0.9746, "step": 13925 }, { "epoch": 12.33835252435784, "grad_norm": 0.2859059274196625, "learning_rate": 1e-05, "loss": 1.0181, "step": 13930 }, { "epoch": 12.342781222320637, "grad_norm": 0.35167238116264343, "learning_rate": 1e-05, "loss": 1.0321, "step": 13935 }, { "epoch": 12.347209920283436, "grad_norm": 0.28873613476753235, "learning_rate": 1e-05, "loss": 1.012, "step": 13940 }, { "epoch": 12.351638618246236, "grad_norm": 0.30664119124412537, "learning_rate": 1e-05, "loss": 1.0172, "step": 13945 }, { "epoch": 12.356067316209035, "grad_norm": 0.3501007854938507, "learning_rate": 1e-05, "loss": 0.953, "step": 13950 }, { "epoch": 12.360496014171833, "grad_norm": 0.31964924931526184, "learning_rate": 1e-05, "loss": 1.0058, "step": 13955 }, { "epoch": 12.364924712134632, "grad_norm": 0.39151525497436523, "learning_rate": 1e-05, "loss": 0.9524, "step": 13960 }, { "epoch": 12.369353410097432, "grad_norm": 0.36976611614227295, "learning_rate": 1e-05, "loss": 1.0249, "step": 13965 }, { "epoch": 12.37378210806023, "grad_norm": 0.3437179923057556, "learning_rate": 1e-05, "loss": 1.0485, "step": 13970 }, { "epoch": 12.378210806023029, "grad_norm": 0.2536727786064148, "learning_rate": 1e-05, "loss": 1.0469, "step": 13975 }, { "epoch": 12.382639503985828, "grad_norm": 0.2405978888273239, "learning_rate": 1e-05, "loss": 1.0144, "step": 13980 }, { "epoch": 12.387068201948628, "grad_norm": 0.31488922238349915, "learning_rate": 1e-05, "loss": 1.0236, "step": 13985 }, { "epoch": 12.391496899911425, "grad_norm": 0.25583669543266296, "learning_rate": 1e-05, "loss": 0.9939, "step": 13990 }, { "epoch": 12.395925597874225, "grad_norm": 0.35331764817237854, "learning_rate": 1e-05, "loss": 0.99, "step": 13995 }, { "epoch": 12.400354295837024, "grad_norm": 0.29949167370796204, "learning_rate": 1e-05, "loss": 1.0232, "step": 14000 }, { "epoch": 12.404782993799824, "grad_norm": 0.3051189184188843, "learning_rate": 1e-05, "loss": 0.9952, "step": 14005 }, { "epoch": 12.409211691762621, "grad_norm": 0.29661527276039124, "learning_rate": 1e-05, "loss": 1.025, "step": 14010 }, { "epoch": 12.41364038972542, "grad_norm": 0.3046285808086395, "learning_rate": 1e-05, "loss": 1.0063, "step": 14015 }, { "epoch": 12.41806908768822, "grad_norm": 0.2921496331691742, "learning_rate": 1e-05, "loss": 1.0285, "step": 14020 }, { "epoch": 12.422497785651018, "grad_norm": 0.28083136677742004, "learning_rate": 1e-05, "loss": 0.9702, "step": 14025 }, { "epoch": 12.426926483613817, "grad_norm": 0.27116382122039795, "learning_rate": 1e-05, "loss": 1.0071, "step": 14030 }, { "epoch": 12.431355181576617, "grad_norm": 0.32082560658454895, "learning_rate": 1e-05, "loss": 0.9675, "step": 14035 }, { "epoch": 12.435783879539416, "grad_norm": 0.30781230330467224, "learning_rate": 1e-05, "loss": 0.9844, "step": 14040 }, { "epoch": 12.440212577502214, "grad_norm": 0.26844415068626404, "learning_rate": 1e-05, "loss": 1.0211, "step": 14045 }, { "epoch": 12.444641275465013, "grad_norm": 0.2612963020801544, "learning_rate": 1e-05, "loss": 0.9527, "step": 14050 }, { "epoch": 12.449069973427813, "grad_norm": 0.2543257772922516, "learning_rate": 1e-05, "loss": 0.9507, "step": 14055 }, { "epoch": 12.45349867139061, "grad_norm": 0.2535167336463928, "learning_rate": 1e-05, "loss": 0.9665, "step": 14060 }, { "epoch": 12.45792736935341, "grad_norm": 0.30882173776626587, "learning_rate": 1e-05, "loss": 1.007, "step": 14065 }, { "epoch": 12.46235606731621, "grad_norm": 0.32208821177482605, "learning_rate": 1e-05, "loss": 1.034, "step": 14070 }, { "epoch": 12.466784765279009, "grad_norm": 0.35279417037963867, "learning_rate": 1e-05, "loss": 1.0018, "step": 14075 }, { "epoch": 12.471213463241806, "grad_norm": 0.37035006284713745, "learning_rate": 1e-05, "loss": 0.9956, "step": 14080 }, { "epoch": 12.475642161204606, "grad_norm": 0.33337658643722534, "learning_rate": 1e-05, "loss": 1.0125, "step": 14085 }, { "epoch": 12.480070859167405, "grad_norm": 0.28878286480903625, "learning_rate": 1e-05, "loss": 1.0084, "step": 14090 }, { "epoch": 12.484499557130203, "grad_norm": 0.2391795814037323, "learning_rate": 1e-05, "loss": 0.9822, "step": 14095 }, { "epoch": 12.488928255093002, "grad_norm": 0.3022184669971466, "learning_rate": 1e-05, "loss": 0.9688, "step": 14100 }, { "epoch": 12.493356953055802, "grad_norm": 0.27281835675239563, "learning_rate": 1e-05, "loss": 0.9955, "step": 14105 }, { "epoch": 12.497785651018601, "grad_norm": 0.26640427112579346, "learning_rate": 1e-05, "loss": 0.9926, "step": 14110 }, { "epoch": 12.502214348981399, "grad_norm": 0.26127880811691284, "learning_rate": 1e-05, "loss": 1.0375, "step": 14115 }, { "epoch": 12.506643046944198, "grad_norm": 0.2945282459259033, "learning_rate": 1e-05, "loss": 1.0102, "step": 14120 }, { "epoch": 12.511071744906998, "grad_norm": 0.2432110607624054, "learning_rate": 1e-05, "loss": 1.0182, "step": 14125 }, { "epoch": 12.515500442869797, "grad_norm": 0.24952492117881775, "learning_rate": 1e-05, "loss": 0.966, "step": 14130 }, { "epoch": 12.519929140832595, "grad_norm": 0.26578807830810547, "learning_rate": 1e-05, "loss": 0.9981, "step": 14135 }, { "epoch": 12.524357838795394, "grad_norm": 0.2699507772922516, "learning_rate": 1e-05, "loss": 1.0095, "step": 14140 }, { "epoch": 12.528786536758194, "grad_norm": 0.30454644560813904, "learning_rate": 1e-05, "loss": 0.9595, "step": 14145 }, { "epoch": 12.533215234720991, "grad_norm": 0.38640955090522766, "learning_rate": 1e-05, "loss": 0.9935, "step": 14150 }, { "epoch": 12.53764393268379, "grad_norm": 0.33499661087989807, "learning_rate": 1e-05, "loss": 1.016, "step": 14155 }, { "epoch": 12.54207263064659, "grad_norm": 0.32563361525535583, "learning_rate": 1e-05, "loss": 0.9587, "step": 14160 }, { "epoch": 12.54650132860939, "grad_norm": 0.30377498269081116, "learning_rate": 1e-05, "loss": 1.0553, "step": 14165 }, { "epoch": 12.550930026572187, "grad_norm": 0.29143550992012024, "learning_rate": 1e-05, "loss": 0.9854, "step": 14170 }, { "epoch": 12.555358724534987, "grad_norm": 0.24909621477127075, "learning_rate": 1e-05, "loss": 1.024, "step": 14175 }, { "epoch": 12.559787422497786, "grad_norm": 0.2271912693977356, "learning_rate": 1e-05, "loss": 0.9619, "step": 14180 }, { "epoch": 12.564216120460584, "grad_norm": 0.2555599510669708, "learning_rate": 1e-05, "loss": 1.0473, "step": 14185 }, { "epoch": 12.568644818423383, "grad_norm": 0.23209033906459808, "learning_rate": 1e-05, "loss": 0.9964, "step": 14190 }, { "epoch": 12.573073516386183, "grad_norm": 0.2560659348964691, "learning_rate": 1e-05, "loss": 0.9807, "step": 14195 }, { "epoch": 12.577502214348982, "grad_norm": 0.3476274907588959, "learning_rate": 1e-05, "loss": 1.0273, "step": 14200 }, { "epoch": 12.58193091231178, "grad_norm": 0.2799437940120697, "learning_rate": 1e-05, "loss": 1.0095, "step": 14205 }, { "epoch": 12.58635961027458, "grad_norm": 0.30695584416389465, "learning_rate": 1e-05, "loss": 0.9815, "step": 14210 }, { "epoch": 12.590788308237379, "grad_norm": 0.2842298746109009, "learning_rate": 1e-05, "loss": 1.0303, "step": 14215 }, { "epoch": 12.595217006200176, "grad_norm": 0.26325708627700806, "learning_rate": 1e-05, "loss": 0.9819, "step": 14220 }, { "epoch": 12.599645704162976, "grad_norm": 0.36153414845466614, "learning_rate": 1e-05, "loss": 0.9661, "step": 14225 }, { "epoch": 12.604074402125775, "grad_norm": 0.2362048476934433, "learning_rate": 1e-05, "loss": 1.0034, "step": 14230 }, { "epoch": 12.608503100088575, "grad_norm": 0.22038434445858002, "learning_rate": 1e-05, "loss": 0.9788, "step": 14235 }, { "epoch": 12.612931798051372, "grad_norm": 0.29843786358833313, "learning_rate": 1e-05, "loss": 1.0414, "step": 14240 }, { "epoch": 12.617360496014172, "grad_norm": 0.3027302026748657, "learning_rate": 1e-05, "loss": 0.9951, "step": 14245 }, { "epoch": 12.621789193976971, "grad_norm": 0.301037460565567, "learning_rate": 1e-05, "loss": 1.013, "step": 14250 }, { "epoch": 12.62621789193977, "grad_norm": 0.2412690669298172, "learning_rate": 1e-05, "loss": 0.9818, "step": 14255 }, { "epoch": 12.630646589902568, "grad_norm": 0.2833200693130493, "learning_rate": 1e-05, "loss": 1.0354, "step": 14260 }, { "epoch": 12.635075287865368, "grad_norm": 0.31504499912261963, "learning_rate": 1e-05, "loss": 0.9919, "step": 14265 }, { "epoch": 12.639503985828167, "grad_norm": 0.32337433099746704, "learning_rate": 1e-05, "loss": 0.9391, "step": 14270 }, { "epoch": 12.643932683790965, "grad_norm": 0.2757129967212677, "learning_rate": 1e-05, "loss": 0.9736, "step": 14275 }, { "epoch": 12.648361381753764, "grad_norm": 0.21794089674949646, "learning_rate": 1e-05, "loss": 1.0298, "step": 14280 }, { "epoch": 12.652790079716564, "grad_norm": 0.31632256507873535, "learning_rate": 1e-05, "loss": 1.0105, "step": 14285 }, { "epoch": 12.657218777679363, "grad_norm": 0.28485068678855896, "learning_rate": 1e-05, "loss": 1.0257, "step": 14290 }, { "epoch": 12.66164747564216, "grad_norm": 0.28050705790519714, "learning_rate": 1e-05, "loss": 1.0317, "step": 14295 }, { "epoch": 12.66607617360496, "grad_norm": 0.31393176317214966, "learning_rate": 1e-05, "loss": 0.9718, "step": 14300 }, { "epoch": 12.67050487156776, "grad_norm": 0.3161180317401886, "learning_rate": 1e-05, "loss": 1.0295, "step": 14305 }, { "epoch": 12.674933569530559, "grad_norm": 0.3357236683368683, "learning_rate": 1e-05, "loss": 0.9681, "step": 14310 }, { "epoch": 12.679362267493357, "grad_norm": 0.27490562200546265, "learning_rate": 1e-05, "loss": 1.0383, "step": 14315 }, { "epoch": 12.683790965456156, "grad_norm": 0.29009804129600525, "learning_rate": 1e-05, "loss": 0.98, "step": 14320 }, { "epoch": 12.688219663418955, "grad_norm": 0.34791311621665955, "learning_rate": 1e-05, "loss": 1.0633, "step": 14325 }, { "epoch": 12.692648361381753, "grad_norm": 0.27698037028312683, "learning_rate": 1e-05, "loss": 1.0068, "step": 14330 }, { "epoch": 12.697077059344553, "grad_norm": 0.23974473774433136, "learning_rate": 1e-05, "loss": 0.9799, "step": 14335 }, { "epoch": 12.701505757307352, "grad_norm": 0.25287118554115295, "learning_rate": 1e-05, "loss": 1.0173, "step": 14340 }, { "epoch": 12.70593445527015, "grad_norm": 0.2530422806739807, "learning_rate": 1e-05, "loss": 0.9891, "step": 14345 }, { "epoch": 12.710363153232949, "grad_norm": 0.26218244433403015, "learning_rate": 1e-05, "loss": 1.0155, "step": 14350 }, { "epoch": 12.714791851195749, "grad_norm": 0.26334822177886963, "learning_rate": 1e-05, "loss": 1.0397, "step": 14355 }, { "epoch": 12.719220549158548, "grad_norm": 0.24559754133224487, "learning_rate": 1e-05, "loss": 1.0172, "step": 14360 }, { "epoch": 12.723649247121346, "grad_norm": 0.24789783358573914, "learning_rate": 1e-05, "loss": 0.991, "step": 14365 }, { "epoch": 12.728077945084145, "grad_norm": 0.25581347942352295, "learning_rate": 1e-05, "loss": 0.9946, "step": 14370 }, { "epoch": 12.732506643046944, "grad_norm": 0.33454272150993347, "learning_rate": 1e-05, "loss": 1.0023, "step": 14375 }, { "epoch": 12.736935341009744, "grad_norm": 0.2632225751876831, "learning_rate": 1e-05, "loss": 0.989, "step": 14380 }, { "epoch": 12.741364038972542, "grad_norm": 0.26623839139938354, "learning_rate": 1e-05, "loss": 1.0027, "step": 14385 }, { "epoch": 12.745792736935341, "grad_norm": 0.2904810905456543, "learning_rate": 1e-05, "loss": 0.9863, "step": 14390 }, { "epoch": 12.75022143489814, "grad_norm": 0.3001648485660553, "learning_rate": 1e-05, "loss": 1.0017, "step": 14395 }, { "epoch": 12.754650132860938, "grad_norm": 0.27608737349510193, "learning_rate": 1e-05, "loss": 0.9827, "step": 14400 }, { "epoch": 12.759078830823738, "grad_norm": 0.2190631926059723, "learning_rate": 1e-05, "loss": 1.0343, "step": 14405 }, { "epoch": 12.763507528786537, "grad_norm": 0.23140758275985718, "learning_rate": 1e-05, "loss": 1.02, "step": 14410 }, { "epoch": 12.767936226749336, "grad_norm": 0.24402092397212982, "learning_rate": 1e-05, "loss": 1.0005, "step": 14415 }, { "epoch": 12.772364924712134, "grad_norm": 0.26170435547828674, "learning_rate": 1e-05, "loss": 1.0025, "step": 14420 }, { "epoch": 12.776793622674933, "grad_norm": 0.2686377763748169, "learning_rate": 1e-05, "loss": 0.9909, "step": 14425 }, { "epoch": 12.781222320637733, "grad_norm": 0.36198344826698303, "learning_rate": 1e-05, "loss": 1.0035, "step": 14430 }, { "epoch": 12.785651018600532, "grad_norm": 0.3081618547439575, "learning_rate": 1e-05, "loss": 1.0016, "step": 14435 }, { "epoch": 12.79007971656333, "grad_norm": 0.2812959849834442, "learning_rate": 1e-05, "loss": 1.0045, "step": 14440 }, { "epoch": 12.79450841452613, "grad_norm": 0.42400217056274414, "learning_rate": 1e-05, "loss": 1.0262, "step": 14445 }, { "epoch": 12.798937112488929, "grad_norm": 0.30481165647506714, "learning_rate": 1e-05, "loss": 0.9733, "step": 14450 }, { "epoch": 12.803365810451727, "grad_norm": 0.28901466727256775, "learning_rate": 1e-05, "loss": 0.9569, "step": 14455 }, { "epoch": 12.807794508414526, "grad_norm": 0.2570507228374481, "learning_rate": 1e-05, "loss": 0.9652, "step": 14460 }, { "epoch": 12.812223206377325, "grad_norm": 0.2827906012535095, "learning_rate": 1e-05, "loss": 0.9853, "step": 14465 }, { "epoch": 12.816651904340125, "grad_norm": 0.2440817803144455, "learning_rate": 1e-05, "loss": 1.031, "step": 14470 }, { "epoch": 12.821080602302922, "grad_norm": 0.3164633810520172, "learning_rate": 1e-05, "loss": 1.0056, "step": 14475 }, { "epoch": 12.825509300265722, "grad_norm": 0.25699707865715027, "learning_rate": 1e-05, "loss": 0.9964, "step": 14480 }, { "epoch": 12.829937998228521, "grad_norm": 0.2710092067718506, "learning_rate": 1e-05, "loss": 1.0095, "step": 14485 }, { "epoch": 12.834366696191319, "grad_norm": 0.2854827344417572, "learning_rate": 1e-05, "loss": 1.0136, "step": 14490 }, { "epoch": 12.838795394154118, "grad_norm": 0.24846535921096802, "learning_rate": 1e-05, "loss": 0.997, "step": 14495 }, { "epoch": 12.843224092116918, "grad_norm": 0.29199254512786865, "learning_rate": 1e-05, "loss": 1.0151, "step": 14500 }, { "epoch": 12.847652790079717, "grad_norm": 0.24273204803466797, "learning_rate": 1e-05, "loss": 0.9911, "step": 14505 }, { "epoch": 12.852081488042515, "grad_norm": 0.30488133430480957, "learning_rate": 1e-05, "loss": 1.0016, "step": 14510 }, { "epoch": 12.856510186005314, "grad_norm": 0.27750492095947266, "learning_rate": 1e-05, "loss": 1.0134, "step": 14515 }, { "epoch": 12.860938883968114, "grad_norm": 0.2598741054534912, "learning_rate": 1e-05, "loss": 1.0818, "step": 14520 }, { "epoch": 12.865367581930911, "grad_norm": 0.26312053203582764, "learning_rate": 1e-05, "loss": 1.012, "step": 14525 }, { "epoch": 12.869796279893711, "grad_norm": 0.3174665868282318, "learning_rate": 1e-05, "loss": 0.9947, "step": 14530 }, { "epoch": 12.87422497785651, "grad_norm": 0.2978462278842926, "learning_rate": 1e-05, "loss": 1.001, "step": 14535 }, { "epoch": 12.87865367581931, "grad_norm": 0.26777568459510803, "learning_rate": 1e-05, "loss": 1.0483, "step": 14540 }, { "epoch": 12.883082373782107, "grad_norm": 0.32283392548561096, "learning_rate": 1e-05, "loss": 1.0055, "step": 14545 }, { "epoch": 12.887511071744907, "grad_norm": 0.2876666784286499, "learning_rate": 1e-05, "loss": 0.9545, "step": 14550 }, { "epoch": 12.891939769707706, "grad_norm": 0.2791752219200134, "learning_rate": 1e-05, "loss": 1.0175, "step": 14555 }, { "epoch": 12.896368467670506, "grad_norm": 0.26223334670066833, "learning_rate": 1e-05, "loss": 0.9869, "step": 14560 }, { "epoch": 12.900797165633303, "grad_norm": 0.29784226417541504, "learning_rate": 1e-05, "loss": 0.9979, "step": 14565 }, { "epoch": 12.905225863596103, "grad_norm": 0.2935771644115448, "learning_rate": 1e-05, "loss": 0.9683, "step": 14570 }, { "epoch": 12.909654561558902, "grad_norm": 0.3291126787662506, "learning_rate": 1e-05, "loss": 0.9746, "step": 14575 }, { "epoch": 12.9140832595217, "grad_norm": 0.2663058042526245, "learning_rate": 1e-05, "loss": 0.9938, "step": 14580 }, { "epoch": 12.9185119574845, "grad_norm": 0.321157842874527, "learning_rate": 1e-05, "loss": 1.0313, "step": 14585 }, { "epoch": 12.922940655447299, "grad_norm": 0.28205373883247375, "learning_rate": 1e-05, "loss": 0.9953, "step": 14590 }, { "epoch": 12.927369353410098, "grad_norm": 0.3134406805038452, "learning_rate": 1e-05, "loss": 1.0195, "step": 14595 }, { "epoch": 12.931798051372896, "grad_norm": 0.3107638359069824, "learning_rate": 1e-05, "loss": 0.9517, "step": 14600 }, { "epoch": 12.936226749335695, "grad_norm": 0.3422958552837372, "learning_rate": 1e-05, "loss": 0.9251, "step": 14605 }, { "epoch": 12.940655447298495, "grad_norm": 0.2904197573661804, "learning_rate": 1e-05, "loss": 0.971, "step": 14610 }, { "epoch": 12.945084145261292, "grad_norm": 0.25404492020606995, "learning_rate": 1e-05, "loss": 0.9639, "step": 14615 }, { "epoch": 12.949512843224092, "grad_norm": 0.2975197732448578, "learning_rate": 1e-05, "loss": 1.0371, "step": 14620 }, { "epoch": 12.953941541186891, "grad_norm": 0.28089869022369385, "learning_rate": 1e-05, "loss": 0.9969, "step": 14625 }, { "epoch": 12.95837023914969, "grad_norm": 0.2694743871688843, "learning_rate": 1e-05, "loss": 0.9496, "step": 14630 }, { "epoch": 12.962798937112488, "grad_norm": 0.24171431362628937, "learning_rate": 1e-05, "loss": 0.9711, "step": 14635 }, { "epoch": 12.967227635075288, "grad_norm": 0.3107065260410309, "learning_rate": 1e-05, "loss": 0.9565, "step": 14640 }, { "epoch": 12.971656333038087, "grad_norm": 0.24199900031089783, "learning_rate": 1e-05, "loss": 0.9802, "step": 14645 }, { "epoch": 12.976085031000885, "grad_norm": 0.28428131341934204, "learning_rate": 1e-05, "loss": 0.9283, "step": 14650 }, { "epoch": 12.980513728963684, "grad_norm": 0.3267878293991089, "learning_rate": 1e-05, "loss": 0.9429, "step": 14655 }, { "epoch": 12.984942426926484, "grad_norm": 0.27152273058891296, "learning_rate": 1e-05, "loss": 0.9825, "step": 14660 }, { "epoch": 12.989371124889283, "grad_norm": 0.26075512170791626, "learning_rate": 1e-05, "loss": 1.038, "step": 14665 }, { "epoch": 12.99379982285208, "grad_norm": 0.25247660279273987, "learning_rate": 1e-05, "loss": 1.0845, "step": 14670 }, { "epoch": 12.99822852081488, "grad_norm": 0.2509274482727051, "learning_rate": 1e-05, "loss": 0.9729, "step": 14675 }, { "epoch": 13.00265721877768, "grad_norm": 0.2458716183900833, "learning_rate": 1e-05, "loss": 1.0127, "step": 14680 }, { "epoch": 13.007085916740479, "grad_norm": 0.29686760902404785, "learning_rate": 1e-05, "loss": 1.0091, "step": 14685 }, { "epoch": 13.011514614703277, "grad_norm": 0.23911021649837494, "learning_rate": 1e-05, "loss": 0.945, "step": 14690 }, { "epoch": 13.015943312666076, "grad_norm": 0.32030215859413147, "learning_rate": 1e-05, "loss": 0.9358, "step": 14695 }, { "epoch": 13.020372010628876, "grad_norm": 0.3203235864639282, "learning_rate": 1e-05, "loss": 1.0365, "step": 14700 }, { "epoch": 13.024800708591673, "grad_norm": 0.2907693386077881, "learning_rate": 1e-05, "loss": 0.9804, "step": 14705 }, { "epoch": 13.029229406554473, "grad_norm": 0.26345759630203247, "learning_rate": 1e-05, "loss": 0.945, "step": 14710 }, { "epoch": 13.033658104517272, "grad_norm": 0.2642922103404999, "learning_rate": 1e-05, "loss": 0.9941, "step": 14715 }, { "epoch": 13.038086802480072, "grad_norm": 0.25867316126823425, "learning_rate": 1e-05, "loss": 0.9611, "step": 14720 }, { "epoch": 13.04251550044287, "grad_norm": 0.28513777256011963, "learning_rate": 1e-05, "loss": 0.9982, "step": 14725 }, { "epoch": 13.046944198405669, "grad_norm": 0.24561457335948944, "learning_rate": 1e-05, "loss": 1.0285, "step": 14730 }, { "epoch": 13.051372896368468, "grad_norm": 0.27027419209480286, "learning_rate": 1e-05, "loss": 0.9804, "step": 14735 }, { "epoch": 13.055801594331266, "grad_norm": 0.28715813159942627, "learning_rate": 1e-05, "loss": 1.0003, "step": 14740 }, { "epoch": 13.060230292294065, "grad_norm": 0.2848362624645233, "learning_rate": 1e-05, "loss": 1.0021, "step": 14745 }, { "epoch": 13.064658990256865, "grad_norm": 0.2359524369239807, "learning_rate": 1e-05, "loss": 1.0312, "step": 14750 }, { "epoch": 13.069087688219664, "grad_norm": 0.2665725648403168, "learning_rate": 1e-05, "loss": 1.0897, "step": 14755 }, { "epoch": 13.073516386182462, "grad_norm": 0.2580292224884033, "learning_rate": 1e-05, "loss": 1.0357, "step": 14760 }, { "epoch": 13.077945084145261, "grad_norm": 0.2622484564781189, "learning_rate": 1e-05, "loss": 0.9944, "step": 14765 }, { "epoch": 13.08237378210806, "grad_norm": 0.2700832784175873, "learning_rate": 1e-05, "loss": 0.9963, "step": 14770 }, { "epoch": 13.08680248007086, "grad_norm": 0.285881906747818, "learning_rate": 1e-05, "loss": 1.057, "step": 14775 }, { "epoch": 13.091231178033658, "grad_norm": 0.33594414591789246, "learning_rate": 1e-05, "loss": 0.9121, "step": 14780 }, { "epoch": 13.095659875996457, "grad_norm": 0.23105424642562866, "learning_rate": 1e-05, "loss": 0.94, "step": 14785 }, { "epoch": 13.100088573959257, "grad_norm": 0.2254118025302887, "learning_rate": 1e-05, "loss": 1.019, "step": 14790 }, { "epoch": 13.104517271922054, "grad_norm": 0.29767879843711853, "learning_rate": 1e-05, "loss": 1.0289, "step": 14795 }, { "epoch": 13.108945969884854, "grad_norm": 0.2523473799228668, "learning_rate": 1e-05, "loss": 1.0359, "step": 14800 }, { "epoch": 13.113374667847653, "grad_norm": 0.2674669623374939, "learning_rate": 1e-05, "loss": 0.9397, "step": 14805 }, { "epoch": 13.117803365810452, "grad_norm": 0.2697570025920868, "learning_rate": 1e-05, "loss": 1.0114, "step": 14810 }, { "epoch": 13.12223206377325, "grad_norm": 0.2735148072242737, "learning_rate": 1e-05, "loss": 1.039, "step": 14815 }, { "epoch": 13.12666076173605, "grad_norm": 0.28674471378326416, "learning_rate": 1e-05, "loss": 0.9934, "step": 14820 }, { "epoch": 13.131089459698849, "grad_norm": 0.290833055973053, "learning_rate": 1e-05, "loss": 1.0179, "step": 14825 }, { "epoch": 13.135518157661647, "grad_norm": 0.29840123653411865, "learning_rate": 1e-05, "loss": 0.9705, "step": 14830 }, { "epoch": 13.139946855624446, "grad_norm": 0.2405647486448288, "learning_rate": 1e-05, "loss": 0.9857, "step": 14835 }, { "epoch": 13.144375553587246, "grad_norm": 0.3389081060886383, "learning_rate": 1e-05, "loss": 0.9703, "step": 14840 }, { "epoch": 13.148804251550045, "grad_norm": 0.30209752917289734, "learning_rate": 1e-05, "loss": 0.9833, "step": 14845 }, { "epoch": 13.153232949512843, "grad_norm": 0.25667551159858704, "learning_rate": 1e-05, "loss": 1.0045, "step": 14850 }, { "epoch": 13.157661647475642, "grad_norm": 0.2599428594112396, "learning_rate": 1e-05, "loss": 0.9485, "step": 14855 }, { "epoch": 13.162090345438441, "grad_norm": 0.2966095507144928, "learning_rate": 1e-05, "loss": 0.9792, "step": 14860 }, { "epoch": 13.166519043401241, "grad_norm": 0.2184838205575943, "learning_rate": 1e-05, "loss": 1.0457, "step": 14865 }, { "epoch": 13.170947741364039, "grad_norm": 0.2505369782447815, "learning_rate": 1e-05, "loss": 0.9993, "step": 14870 }, { "epoch": 13.175376439326838, "grad_norm": 0.2525484561920166, "learning_rate": 1e-05, "loss": 1.0025, "step": 14875 }, { "epoch": 13.179805137289637, "grad_norm": 0.2752380967140198, "learning_rate": 1e-05, "loss": 0.9764, "step": 14880 }, { "epoch": 13.184233835252435, "grad_norm": 0.2934929430484772, "learning_rate": 1e-05, "loss": 0.9742, "step": 14885 }, { "epoch": 13.188662533215235, "grad_norm": 0.29155921936035156, "learning_rate": 1e-05, "loss": 0.9785, "step": 14890 }, { "epoch": 13.193091231178034, "grad_norm": 0.278621643781662, "learning_rate": 1e-05, "loss": 1.0109, "step": 14895 }, { "epoch": 13.197519929140833, "grad_norm": 0.2534531354904175, "learning_rate": 1e-05, "loss": 0.9504, "step": 14900 }, { "epoch": 13.201948627103631, "grad_norm": 0.2333713173866272, "learning_rate": 1e-05, "loss": 1.0073, "step": 14905 }, { "epoch": 13.20637732506643, "grad_norm": 0.2530066967010498, "learning_rate": 1e-05, "loss": 1.0505, "step": 14910 }, { "epoch": 13.21080602302923, "grad_norm": 0.24350881576538086, "learning_rate": 1e-05, "loss": 0.9785, "step": 14915 }, { "epoch": 13.215234720992028, "grad_norm": 0.2508932054042816, "learning_rate": 1e-05, "loss": 1.0025, "step": 14920 }, { "epoch": 13.219663418954827, "grad_norm": 0.25571197271347046, "learning_rate": 1e-05, "loss": 1.0282, "step": 14925 }, { "epoch": 13.224092116917626, "grad_norm": 0.2576509416103363, "learning_rate": 1e-05, "loss": 0.9728, "step": 14930 }, { "epoch": 13.228520814880426, "grad_norm": 0.2673387825489044, "learning_rate": 1e-05, "loss": 0.968, "step": 14935 }, { "epoch": 13.232949512843224, "grad_norm": 0.29158371686935425, "learning_rate": 1e-05, "loss": 0.98, "step": 14940 }, { "epoch": 13.237378210806023, "grad_norm": 0.26073262095451355, "learning_rate": 1e-05, "loss": 0.9418, "step": 14945 }, { "epoch": 13.241806908768822, "grad_norm": 0.28596410155296326, "learning_rate": 1e-05, "loss": 0.9739, "step": 14950 }, { "epoch": 13.24623560673162, "grad_norm": 0.28583839535713196, "learning_rate": 1e-05, "loss": 1.0221, "step": 14955 }, { "epoch": 13.25066430469442, "grad_norm": 0.3165747821331024, "learning_rate": 1e-05, "loss": 0.9681, "step": 14960 }, { "epoch": 13.255093002657219, "grad_norm": 0.3266614079475403, "learning_rate": 1e-05, "loss": 0.9622, "step": 14965 }, { "epoch": 13.259521700620018, "grad_norm": 0.25403618812561035, "learning_rate": 1e-05, "loss": 0.9813, "step": 14970 }, { "epoch": 13.263950398582816, "grad_norm": 0.3080897331237793, "learning_rate": 1e-05, "loss": 0.9779, "step": 14975 }, { "epoch": 13.268379096545615, "grad_norm": 0.253658264875412, "learning_rate": 1e-05, "loss": 0.994, "step": 14980 }, { "epoch": 13.272807794508415, "grad_norm": 0.26744991540908813, "learning_rate": 1e-05, "loss": 1.0078, "step": 14985 }, { "epoch": 13.277236492471214, "grad_norm": 0.3480512201786041, "learning_rate": 1e-05, "loss": 0.9449, "step": 14990 }, { "epoch": 13.281665190434012, "grad_norm": 0.4289158880710602, "learning_rate": 1e-05, "loss": 1.0122, "step": 14995 }, { "epoch": 13.286093888396811, "grad_norm": 0.28872525691986084, "learning_rate": 1e-05, "loss": 0.984, "step": 15000 }, { "epoch": 13.29052258635961, "grad_norm": 0.3190418481826782, "learning_rate": 1e-05, "loss": 1.0054, "step": 15005 }, { "epoch": 13.294951284322408, "grad_norm": 0.3040004074573517, "learning_rate": 1e-05, "loss": 1.0298, "step": 15010 }, { "epoch": 13.299379982285208, "grad_norm": 0.32927224040031433, "learning_rate": 1e-05, "loss": 0.9695, "step": 15015 }, { "epoch": 13.303808680248007, "grad_norm": 0.26025819778442383, "learning_rate": 1e-05, "loss": 0.9537, "step": 15020 }, { "epoch": 13.308237378210807, "grad_norm": 0.24589228630065918, "learning_rate": 1e-05, "loss": 1.0233, "step": 15025 }, { "epoch": 13.312666076173604, "grad_norm": 0.24863485991954803, "learning_rate": 1e-05, "loss": 1.01, "step": 15030 }, { "epoch": 13.317094774136404, "grad_norm": 0.2284671813249588, "learning_rate": 1e-05, "loss": 0.9901, "step": 15035 }, { "epoch": 13.321523472099203, "grad_norm": 0.32172033190727234, "learning_rate": 1e-05, "loss": 0.9884, "step": 15040 }, { "epoch": 13.325952170062001, "grad_norm": 0.2671842575073242, "learning_rate": 1e-05, "loss": 0.9827, "step": 15045 }, { "epoch": 13.3303808680248, "grad_norm": 0.2727336585521698, "learning_rate": 1e-05, "loss": 1.0107, "step": 15050 }, { "epoch": 13.3348095659876, "grad_norm": 0.2501024901866913, "learning_rate": 1e-05, "loss": 1.0398, "step": 15055 }, { "epoch": 13.3392382639504, "grad_norm": 0.29526761174201965, "learning_rate": 1e-05, "loss": 0.9378, "step": 15060 }, { "epoch": 13.343666961913197, "grad_norm": 0.27258872985839844, "learning_rate": 1e-05, "loss": 0.9929, "step": 15065 }, { "epoch": 13.348095659875996, "grad_norm": 0.2876748740673065, "learning_rate": 1e-05, "loss": 0.9839, "step": 15070 }, { "epoch": 13.352524357838796, "grad_norm": 0.29867053031921387, "learning_rate": 1e-05, "loss": 0.979, "step": 15075 }, { "epoch": 13.356953055801593, "grad_norm": 0.33151668310165405, "learning_rate": 1e-05, "loss": 1.015, "step": 15080 }, { "epoch": 13.361381753764393, "grad_norm": 0.2648833692073822, "learning_rate": 1e-05, "loss": 1.0728, "step": 15085 }, { "epoch": 13.365810451727192, "grad_norm": 0.2855778932571411, "learning_rate": 1e-05, "loss": 1.01, "step": 15090 }, { "epoch": 13.370239149689992, "grad_norm": 0.268180787563324, "learning_rate": 1e-05, "loss": 1.0241, "step": 15095 }, { "epoch": 13.37466784765279, "grad_norm": 0.23653945326805115, "learning_rate": 1e-05, "loss": 1.0189, "step": 15100 }, { "epoch": 13.379096545615589, "grad_norm": 0.2820050120353699, "learning_rate": 1e-05, "loss": 0.9839, "step": 15105 }, { "epoch": 13.383525243578388, "grad_norm": 0.2409396767616272, "learning_rate": 1e-05, "loss": 0.9866, "step": 15110 }, { "epoch": 13.387953941541188, "grad_norm": 0.3344702422618866, "learning_rate": 1e-05, "loss": 1.0461, "step": 15115 }, { "epoch": 13.392382639503985, "grad_norm": 0.2607822120189667, "learning_rate": 1e-05, "loss": 1.0144, "step": 15120 }, { "epoch": 13.396811337466785, "grad_norm": 0.24322141706943512, "learning_rate": 1e-05, "loss": 1.0225, "step": 15125 }, { "epoch": 13.401240035429584, "grad_norm": 0.27175086736679077, "learning_rate": 1e-05, "loss": 0.9583, "step": 15130 }, { "epoch": 13.405668733392382, "grad_norm": 0.3235015869140625, "learning_rate": 1e-05, "loss": 1.0584, "step": 15135 }, { "epoch": 13.410097431355181, "grad_norm": 0.2846199572086334, "learning_rate": 1e-05, "loss": 1.0427, "step": 15140 }, { "epoch": 13.41452612931798, "grad_norm": 0.2673349976539612, "learning_rate": 1e-05, "loss": 1.0138, "step": 15145 }, { "epoch": 13.41895482728078, "grad_norm": 0.22861461341381073, "learning_rate": 1e-05, "loss": 1.0271, "step": 15150 }, { "epoch": 13.423383525243578, "grad_norm": 0.22525490820407867, "learning_rate": 1e-05, "loss": 0.9943, "step": 15155 }, { "epoch": 13.427812223206377, "grad_norm": 0.2689531147480011, "learning_rate": 1e-05, "loss": 1.0252, "step": 15160 }, { "epoch": 13.432240921169177, "grad_norm": 0.2856645882129669, "learning_rate": 1e-05, "loss": 0.9808, "step": 15165 }, { "epoch": 13.436669619131976, "grad_norm": 0.23056024312973022, "learning_rate": 1e-05, "loss": 0.9979, "step": 15170 }, { "epoch": 13.441098317094774, "grad_norm": 0.24246810376644135, "learning_rate": 1e-05, "loss": 1.0106, "step": 15175 }, { "epoch": 13.445527015057573, "grad_norm": 0.23817594349384308, "learning_rate": 1e-05, "loss": 1.0057, "step": 15180 }, { "epoch": 13.449955713020373, "grad_norm": 0.23851200938224792, "learning_rate": 1e-05, "loss": 1.0051, "step": 15185 }, { "epoch": 13.45438441098317, "grad_norm": 0.2606596350669861, "learning_rate": 1e-05, "loss": 1.0776, "step": 15190 }, { "epoch": 13.45881310894597, "grad_norm": 0.2664880156517029, "learning_rate": 1e-05, "loss": 1.0118, "step": 15195 }, { "epoch": 13.46324180690877, "grad_norm": 0.32686474919319153, "learning_rate": 1e-05, "loss": 0.9924, "step": 15200 }, { "epoch": 13.467670504871569, "grad_norm": 0.2740435004234314, "learning_rate": 1e-05, "loss": 1.0068, "step": 15205 }, { "epoch": 13.472099202834366, "grad_norm": 0.2651437819004059, "learning_rate": 1e-05, "loss": 1.0167, "step": 15210 }, { "epoch": 13.476527900797166, "grad_norm": 0.2624920904636383, "learning_rate": 1e-05, "loss": 1.0268, "step": 15215 }, { "epoch": 13.480956598759965, "grad_norm": 0.29028382897377014, "learning_rate": 1e-05, "loss": 1.0031, "step": 15220 }, { "epoch": 13.485385296722763, "grad_norm": 0.2924567461013794, "learning_rate": 1e-05, "loss": 1.0102, "step": 15225 }, { "epoch": 13.489813994685562, "grad_norm": 0.29711809754371643, "learning_rate": 1e-05, "loss": 0.9957, "step": 15230 }, { "epoch": 13.494242692648362, "grad_norm": 0.2705939710140228, "learning_rate": 1e-05, "loss": 0.9918, "step": 15235 }, { "epoch": 13.498671390611161, "grad_norm": 0.240801140666008, "learning_rate": 1e-05, "loss": 1.0061, "step": 15240 }, { "epoch": 13.503100088573959, "grad_norm": 0.30542322993278503, "learning_rate": 1e-05, "loss": 1.0561, "step": 15245 }, { "epoch": 13.507528786536758, "grad_norm": 0.33220988512039185, "learning_rate": 1e-05, "loss": 1.0063, "step": 15250 }, { "epoch": 13.511957484499558, "grad_norm": 0.2919374406337738, "learning_rate": 1e-05, "loss": 1.0353, "step": 15255 }, { "epoch": 13.516386182462355, "grad_norm": 0.30203157663345337, "learning_rate": 1e-05, "loss": 0.9545, "step": 15260 }, { "epoch": 13.520814880425155, "grad_norm": 0.2783576250076294, "learning_rate": 1e-05, "loss": 0.9475, "step": 15265 }, { "epoch": 13.525243578387954, "grad_norm": 0.26149386167526245, "learning_rate": 1e-05, "loss": 0.9751, "step": 15270 }, { "epoch": 13.529672276350754, "grad_norm": 0.3352258503437042, "learning_rate": 1e-05, "loss": 1.0392, "step": 15275 }, { "epoch": 13.534100974313551, "grad_norm": 0.25567755103111267, "learning_rate": 1e-05, "loss": 0.9992, "step": 15280 }, { "epoch": 13.53852967227635, "grad_norm": 0.24535495042800903, "learning_rate": 1e-05, "loss": 0.9804, "step": 15285 }, { "epoch": 13.54295837023915, "grad_norm": 0.3483872413635254, "learning_rate": 1e-05, "loss": 1.0367, "step": 15290 }, { "epoch": 13.54738706820195, "grad_norm": 0.27215635776519775, "learning_rate": 1e-05, "loss": 1.0862, "step": 15295 }, { "epoch": 13.551815766164747, "grad_norm": 0.24170774221420288, "learning_rate": 1e-05, "loss": 0.9805, "step": 15300 }, { "epoch": 13.556244464127547, "grad_norm": 0.22384804487228394, "learning_rate": 1e-05, "loss": 1.0302, "step": 15305 }, { "epoch": 13.560673162090346, "grad_norm": 0.3383476734161377, "learning_rate": 1e-05, "loss": 0.9782, "step": 15310 }, { "epoch": 13.565101860053144, "grad_norm": 0.2526433765888214, "learning_rate": 1e-05, "loss": 1.0103, "step": 15315 }, { "epoch": 13.569530558015943, "grad_norm": 0.26605209708213806, "learning_rate": 1e-05, "loss": 0.9819, "step": 15320 }, { "epoch": 13.573959255978743, "grad_norm": 0.2773497998714447, "learning_rate": 1e-05, "loss": 0.9337, "step": 15325 }, { "epoch": 13.578387953941542, "grad_norm": 0.28566789627075195, "learning_rate": 1e-05, "loss": 0.9854, "step": 15330 }, { "epoch": 13.58281665190434, "grad_norm": 0.25376608967781067, "learning_rate": 1e-05, "loss": 1.0461, "step": 15335 }, { "epoch": 13.587245349867139, "grad_norm": 0.3573301136493683, "learning_rate": 1e-05, "loss": 1.0166, "step": 15340 }, { "epoch": 13.591674047829938, "grad_norm": 0.2915681004524231, "learning_rate": 1e-05, "loss": 1.0186, "step": 15345 }, { "epoch": 13.596102745792736, "grad_norm": 0.30945998430252075, "learning_rate": 1e-05, "loss": 1.0011, "step": 15350 }, { "epoch": 13.600531443755536, "grad_norm": 0.35102200508117676, "learning_rate": 1e-05, "loss": 1.0017, "step": 15355 }, { "epoch": 13.604960141718335, "grad_norm": 0.25039467215538025, "learning_rate": 1e-05, "loss": 0.9554, "step": 15360 }, { "epoch": 13.609388839681134, "grad_norm": 0.23250414431095123, "learning_rate": 1e-05, "loss": 0.9637, "step": 15365 }, { "epoch": 13.613817537643932, "grad_norm": 0.29039010405540466, "learning_rate": 1e-05, "loss": 0.9759, "step": 15370 }, { "epoch": 13.618246235606732, "grad_norm": 0.28464218974113464, "learning_rate": 1e-05, "loss": 0.9982, "step": 15375 }, { "epoch": 13.622674933569531, "grad_norm": 0.2606031894683838, "learning_rate": 1e-05, "loss": 0.9272, "step": 15380 }, { "epoch": 13.627103631532329, "grad_norm": 0.27778005599975586, "learning_rate": 1e-05, "loss": 1.0419, "step": 15385 }, { "epoch": 13.631532329495128, "grad_norm": 0.2646855115890503, "learning_rate": 1e-05, "loss": 0.953, "step": 15390 }, { "epoch": 13.635961027457927, "grad_norm": 0.25512513518333435, "learning_rate": 1e-05, "loss": 0.9166, "step": 15395 }, { "epoch": 13.640389725420727, "grad_norm": 0.2692898213863373, "learning_rate": 1e-05, "loss": 1.0, "step": 15400 }, { "epoch": 13.644818423383525, "grad_norm": 0.21172656118869781, "learning_rate": 1e-05, "loss": 1.0305, "step": 15405 }, { "epoch": 13.649247121346324, "grad_norm": 0.23341864347457886, "learning_rate": 1e-05, "loss": 1.0236, "step": 15410 }, { "epoch": 13.653675819309123, "grad_norm": 0.2582659125328064, "learning_rate": 1e-05, "loss": 0.9949, "step": 15415 }, { "epoch": 13.658104517271923, "grad_norm": 0.2700680196285248, "learning_rate": 1e-05, "loss": 0.9915, "step": 15420 }, { "epoch": 13.66253321523472, "grad_norm": 0.22885730862617493, "learning_rate": 1e-05, "loss": 0.9629, "step": 15425 }, { "epoch": 13.66696191319752, "grad_norm": 0.36526116728782654, "learning_rate": 1e-05, "loss": 0.9791, "step": 15430 }, { "epoch": 13.67139061116032, "grad_norm": 0.2604011595249176, "learning_rate": 1e-05, "loss": 1.0284, "step": 15435 }, { "epoch": 13.675819309123117, "grad_norm": 0.29298102855682373, "learning_rate": 1e-05, "loss": 1.0024, "step": 15440 }, { "epoch": 13.680248007085916, "grad_norm": 0.23797187209129333, "learning_rate": 1e-05, "loss": 1.0333, "step": 15445 }, { "epoch": 13.684676705048716, "grad_norm": 0.25061294436454773, "learning_rate": 1e-05, "loss": 0.9483, "step": 15450 }, { "epoch": 13.689105403011515, "grad_norm": 0.3172397017478943, "learning_rate": 1e-05, "loss": 0.9584, "step": 15455 }, { "epoch": 13.693534100974313, "grad_norm": 0.2732192575931549, "learning_rate": 1e-05, "loss": 0.9538, "step": 15460 }, { "epoch": 13.697962798937112, "grad_norm": 0.3379781246185303, "learning_rate": 1e-05, "loss": 1.0113, "step": 15465 }, { "epoch": 13.702391496899912, "grad_norm": 0.29626280069351196, "learning_rate": 1e-05, "loss": 1.026, "step": 15470 }, { "epoch": 13.706820194862711, "grad_norm": 0.25145697593688965, "learning_rate": 1e-05, "loss": 0.9972, "step": 15475 }, { "epoch": 13.711248892825509, "grad_norm": 0.28558701276779175, "learning_rate": 1e-05, "loss": 1.0352, "step": 15480 }, { "epoch": 13.715677590788308, "grad_norm": 0.2868648171424866, "learning_rate": 1e-05, "loss": 0.975, "step": 15485 }, { "epoch": 13.720106288751108, "grad_norm": 0.22914022207260132, "learning_rate": 1e-05, "loss": 0.9817, "step": 15490 }, { "epoch": 13.724534986713905, "grad_norm": 0.2646102011203766, "learning_rate": 1e-05, "loss": 0.9717, "step": 15495 }, { "epoch": 13.728963684676705, "grad_norm": 0.29670560359954834, "learning_rate": 1e-05, "loss": 1.0009, "step": 15500 }, { "epoch": 13.733392382639504, "grad_norm": 0.2894361913204193, "learning_rate": 1e-05, "loss": 0.9859, "step": 15505 }, { "epoch": 13.737821080602302, "grad_norm": 0.32102397084236145, "learning_rate": 1e-05, "loss": 1.0388, "step": 15510 }, { "epoch": 13.742249778565101, "grad_norm": 0.3547017574310303, "learning_rate": 1e-05, "loss": 1.0293, "step": 15515 }, { "epoch": 13.7466784765279, "grad_norm": 0.2745082974433899, "learning_rate": 1e-05, "loss": 0.9862, "step": 15520 }, { "epoch": 13.7511071744907, "grad_norm": 0.2461821287870407, "learning_rate": 1e-05, "loss": 0.9745, "step": 15525 }, { "epoch": 13.755535872453498, "grad_norm": 0.30014970898628235, "learning_rate": 1e-05, "loss": 0.9915, "step": 15530 }, { "epoch": 13.759964570416297, "grad_norm": 0.28270408511161804, "learning_rate": 1e-05, "loss": 1.0204, "step": 15535 }, { "epoch": 13.764393268379097, "grad_norm": 0.2666551470756531, "learning_rate": 1e-05, "loss": 0.9972, "step": 15540 }, { "epoch": 13.768821966341896, "grad_norm": 0.2758060693740845, "learning_rate": 1e-05, "loss": 1.0317, "step": 15545 }, { "epoch": 13.773250664304694, "grad_norm": 0.28551164269447327, "learning_rate": 1e-05, "loss": 1.0177, "step": 15550 }, { "epoch": 13.777679362267493, "grad_norm": 0.28669363260269165, "learning_rate": 1e-05, "loss": 0.9949, "step": 15555 }, { "epoch": 13.782108060230293, "grad_norm": 0.27511075139045715, "learning_rate": 1e-05, "loss": 0.9606, "step": 15560 }, { "epoch": 13.78653675819309, "grad_norm": 0.28586411476135254, "learning_rate": 1e-05, "loss": 0.9366, "step": 15565 }, { "epoch": 13.79096545615589, "grad_norm": 0.2807426452636719, "learning_rate": 1e-05, "loss": 0.9796, "step": 15570 }, { "epoch": 13.79539415411869, "grad_norm": 0.25462839007377625, "learning_rate": 1e-05, "loss": 0.9944, "step": 15575 }, { "epoch": 13.799822852081489, "grad_norm": 0.2450139969587326, "learning_rate": 1e-05, "loss": 1.0083, "step": 15580 }, { "epoch": 13.804251550044286, "grad_norm": 0.25302863121032715, "learning_rate": 1e-05, "loss": 1.0281, "step": 15585 }, { "epoch": 13.808680248007086, "grad_norm": 0.2476828545331955, "learning_rate": 1e-05, "loss": 0.9648, "step": 15590 }, { "epoch": 13.813108945969885, "grad_norm": 0.23024851083755493, "learning_rate": 1e-05, "loss": 1.0201, "step": 15595 }, { "epoch": 13.817537643932685, "grad_norm": 0.24292601644992828, "learning_rate": 1e-05, "loss": 0.9932, "step": 15600 }, { "epoch": 13.821966341895482, "grad_norm": 0.2939973473548889, "learning_rate": 1e-05, "loss": 0.959, "step": 15605 }, { "epoch": 13.826395039858282, "grad_norm": 0.25731831789016724, "learning_rate": 1e-05, "loss": 0.9863, "step": 15610 }, { "epoch": 13.830823737821081, "grad_norm": 0.26878997683525085, "learning_rate": 1e-05, "loss": 1.0011, "step": 15615 }, { "epoch": 13.835252435783879, "grad_norm": 0.3293323218822479, "learning_rate": 1e-05, "loss": 0.9385, "step": 15620 }, { "epoch": 13.839681133746678, "grad_norm": 0.25292903184890747, "learning_rate": 1e-05, "loss": 0.9618, "step": 15625 }, { "epoch": 13.844109831709478, "grad_norm": 0.24890176951885223, "learning_rate": 1e-05, "loss": 1.0377, "step": 15630 }, { "epoch": 13.848538529672277, "grad_norm": 0.28238019347190857, "learning_rate": 1e-05, "loss": 0.9491, "step": 15635 }, { "epoch": 13.852967227635075, "grad_norm": 0.28742435574531555, "learning_rate": 1e-05, "loss": 0.9654, "step": 15640 }, { "epoch": 13.857395925597874, "grad_norm": 0.27353227138519287, "learning_rate": 1e-05, "loss": 1.0555, "step": 15645 }, { "epoch": 13.861824623560674, "grad_norm": 0.24027560651302338, "learning_rate": 1e-05, "loss": 0.9973, "step": 15650 }, { "epoch": 13.866253321523471, "grad_norm": 0.2460683435201645, "learning_rate": 1e-05, "loss": 1.029, "step": 15655 }, { "epoch": 13.87068201948627, "grad_norm": 0.29546400904655457, "learning_rate": 1e-05, "loss": 0.9625, "step": 15660 }, { "epoch": 13.87511071744907, "grad_norm": 0.23563817143440247, "learning_rate": 1e-05, "loss": 1.0281, "step": 15665 }, { "epoch": 13.87953941541187, "grad_norm": 0.2567593455314636, "learning_rate": 1e-05, "loss": 0.9981, "step": 15670 }, { "epoch": 13.883968113374667, "grad_norm": 0.2727123498916626, "learning_rate": 1e-05, "loss": 1.0415, "step": 15675 }, { "epoch": 13.888396811337467, "grad_norm": 0.31490030884742737, "learning_rate": 1e-05, "loss": 1.0059, "step": 15680 }, { "epoch": 13.892825509300266, "grad_norm": 0.25928354263305664, "learning_rate": 1e-05, "loss": 0.9547, "step": 15685 }, { "epoch": 13.897254207263064, "grad_norm": 0.24376000463962555, "learning_rate": 1e-05, "loss": 0.9727, "step": 15690 }, { "epoch": 13.901682905225863, "grad_norm": 0.266998291015625, "learning_rate": 1e-05, "loss": 0.9553, "step": 15695 }, { "epoch": 13.906111603188663, "grad_norm": 0.23325850069522858, "learning_rate": 1e-05, "loss": 0.9879, "step": 15700 }, { "epoch": 13.910540301151462, "grad_norm": 0.2657238841056824, "learning_rate": 1e-05, "loss": 0.9308, "step": 15705 }, { "epoch": 13.91496899911426, "grad_norm": 0.28277379274368286, "learning_rate": 1e-05, "loss": 1.0249, "step": 15710 }, { "epoch": 13.91939769707706, "grad_norm": 0.2881607413291931, "learning_rate": 1e-05, "loss": 1.0031, "step": 15715 }, { "epoch": 13.923826395039859, "grad_norm": 0.30713793635368347, "learning_rate": 1e-05, "loss": 1.0064, "step": 15720 }, { "epoch": 13.928255093002658, "grad_norm": 0.2747843265533447, "learning_rate": 1e-05, "loss": 0.9452, "step": 15725 }, { "epoch": 13.932683790965456, "grad_norm": 0.3049401044845581, "learning_rate": 1e-05, "loss": 1.0305, "step": 15730 }, { "epoch": 13.937112488928255, "grad_norm": 0.2870447635650635, "learning_rate": 1e-05, "loss": 1.015, "step": 15735 }, { "epoch": 13.941541186891055, "grad_norm": 0.23038822412490845, "learning_rate": 1e-05, "loss": 0.997, "step": 15740 }, { "epoch": 13.945969884853852, "grad_norm": 0.22686836123466492, "learning_rate": 1e-05, "loss": 1.0305, "step": 15745 }, { "epoch": 13.950398582816652, "grad_norm": 0.2563081681728363, "learning_rate": 1e-05, "loss": 1.0248, "step": 15750 }, { "epoch": 13.954827280779451, "grad_norm": 0.2691231966018677, "learning_rate": 1e-05, "loss": 0.9471, "step": 15755 }, { "epoch": 13.95925597874225, "grad_norm": 0.2593192756175995, "learning_rate": 1e-05, "loss": 1.0345, "step": 15760 }, { "epoch": 13.963684676705048, "grad_norm": 0.2814178466796875, "learning_rate": 1e-05, "loss": 0.9683, "step": 15765 }, { "epoch": 13.968113374667848, "grad_norm": 0.3045186400413513, "learning_rate": 1e-05, "loss": 1.0113, "step": 15770 }, { "epoch": 13.972542072630647, "grad_norm": 0.2753313481807709, "learning_rate": 1e-05, "loss": 1.0216, "step": 15775 }, { "epoch": 13.976970770593445, "grad_norm": 0.2835383415222168, "learning_rate": 1e-05, "loss": 0.9403, "step": 15780 }, { "epoch": 13.981399468556244, "grad_norm": 0.2729502022266388, "learning_rate": 1e-05, "loss": 1.0389, "step": 15785 }, { "epoch": 13.985828166519044, "grad_norm": 0.325484037399292, "learning_rate": 1e-05, "loss": 0.9882, "step": 15790 }, { "epoch": 13.990256864481843, "grad_norm": 0.28955528140068054, "learning_rate": 1e-05, "loss": 1.0341, "step": 15795 }, { "epoch": 13.99468556244464, "grad_norm": 0.22102151811122894, "learning_rate": 1e-05, "loss": 0.9887, "step": 15800 }, { "epoch": 13.99911426040744, "grad_norm": 0.23810943961143494, "learning_rate": 1e-05, "loss": 0.9985, "step": 15805 }, { "epoch": 14.00354295837024, "grad_norm": 0.2610328495502472, "learning_rate": 1e-05, "loss": 0.9846, "step": 15810 }, { "epoch": 14.007971656333037, "grad_norm": 0.2607215940952301, "learning_rate": 1e-05, "loss": 1.0094, "step": 15815 }, { "epoch": 14.012400354295837, "grad_norm": 0.28001588582992554, "learning_rate": 1e-05, "loss": 1.0011, "step": 15820 }, { "epoch": 14.016829052258636, "grad_norm": 0.32271939516067505, "learning_rate": 1e-05, "loss": 0.9703, "step": 15825 }, { "epoch": 14.021257750221436, "grad_norm": 0.26282185316085815, "learning_rate": 1e-05, "loss": 1.0151, "step": 15830 }, { "epoch": 14.025686448184233, "grad_norm": 0.2943328619003296, "learning_rate": 1e-05, "loss": 0.945, "step": 15835 }, { "epoch": 14.030115146147033, "grad_norm": 0.31595247983932495, "learning_rate": 1e-05, "loss": 0.9668, "step": 15840 }, { "epoch": 14.034543844109832, "grad_norm": 0.2925513982772827, "learning_rate": 1e-05, "loss": 0.9814, "step": 15845 }, { "epoch": 14.038972542072631, "grad_norm": 0.2770253121852875, "learning_rate": 1e-05, "loss": 0.9752, "step": 15850 }, { "epoch": 14.043401240035429, "grad_norm": 0.34785792231559753, "learning_rate": 1e-05, "loss": 0.9695, "step": 15855 }, { "epoch": 14.047829937998229, "grad_norm": 0.25001445412635803, "learning_rate": 1e-05, "loss": 1.001, "step": 15860 }, { "epoch": 14.052258635961028, "grad_norm": 0.28387728333473206, "learning_rate": 1e-05, "loss": 0.9935, "step": 15865 }, { "epoch": 14.056687333923826, "grad_norm": 0.2730706036090851, "learning_rate": 1e-05, "loss": 1.026, "step": 15870 }, { "epoch": 14.061116031886625, "grad_norm": 0.2684321105480194, "learning_rate": 1e-05, "loss": 1.0011, "step": 15875 }, { "epoch": 14.065544729849424, "grad_norm": 0.25972771644592285, "learning_rate": 1e-05, "loss": 0.9836, "step": 15880 }, { "epoch": 14.069973427812224, "grad_norm": 0.26421937346458435, "learning_rate": 1e-05, "loss": 1.0089, "step": 15885 }, { "epoch": 14.074402125775022, "grad_norm": 0.31100571155548096, "learning_rate": 1e-05, "loss": 1.0172, "step": 15890 }, { "epoch": 14.078830823737821, "grad_norm": 0.26712560653686523, "learning_rate": 1e-05, "loss": 1.0526, "step": 15895 }, { "epoch": 14.08325952170062, "grad_norm": 0.27068427205085754, "learning_rate": 1e-05, "loss": 0.9839, "step": 15900 }, { "epoch": 14.087688219663418, "grad_norm": 0.3587118983268738, "learning_rate": 1e-05, "loss": 0.9973, "step": 15905 }, { "epoch": 14.092116917626218, "grad_norm": 0.38153502345085144, "learning_rate": 1e-05, "loss": 0.9977, "step": 15910 }, { "epoch": 14.096545615589017, "grad_norm": 0.25527530908584595, "learning_rate": 1e-05, "loss": 1.0246, "step": 15915 }, { "epoch": 14.100974313551816, "grad_norm": 0.2993118166923523, "learning_rate": 1e-05, "loss": 1.01, "step": 15920 }, { "epoch": 14.105403011514614, "grad_norm": 0.2507144510746002, "learning_rate": 1e-05, "loss": 1.035, "step": 15925 }, { "epoch": 14.109831709477413, "grad_norm": 0.3291673958301544, "learning_rate": 1e-05, "loss": 0.9852, "step": 15930 }, { "epoch": 14.114260407440213, "grad_norm": 0.22528529167175293, "learning_rate": 1e-05, "loss": 1.0087, "step": 15935 }, { "epoch": 14.118689105403012, "grad_norm": 0.24888576567173004, "learning_rate": 1e-05, "loss": 1.0312, "step": 15940 }, { "epoch": 14.12311780336581, "grad_norm": 0.24165961146354675, "learning_rate": 1e-05, "loss": 1.0081, "step": 15945 }, { "epoch": 14.12754650132861, "grad_norm": 0.28978678584098816, "learning_rate": 1e-05, "loss": 1.0264, "step": 15950 }, { "epoch": 14.131975199291409, "grad_norm": 0.2681102752685547, "learning_rate": 1e-05, "loss": 1.0319, "step": 15955 }, { "epoch": 14.136403897254207, "grad_norm": 0.24786819517612457, "learning_rate": 1e-05, "loss": 1.0259, "step": 15960 }, { "epoch": 14.140832595217006, "grad_norm": 0.2648155987262726, "learning_rate": 1e-05, "loss": 0.9889, "step": 15965 }, { "epoch": 14.145261293179805, "grad_norm": 0.25509393215179443, "learning_rate": 1e-05, "loss": 0.9482, "step": 15970 }, { "epoch": 14.149689991142605, "grad_norm": 0.33331871032714844, "learning_rate": 1e-05, "loss": 1.0318, "step": 15975 }, { "epoch": 14.154118689105402, "grad_norm": 0.2945652902126312, "learning_rate": 1e-05, "loss": 1.0115, "step": 15980 }, { "epoch": 14.158547387068202, "grad_norm": 0.41670629382133484, "learning_rate": 1e-05, "loss": 0.9921, "step": 15985 }, { "epoch": 14.162976085031001, "grad_norm": 0.2906208336353302, "learning_rate": 1e-05, "loss": 0.9904, "step": 15990 }, { "epoch": 14.167404782993799, "grad_norm": 0.2625887095928192, "learning_rate": 1e-05, "loss": 1.0215, "step": 15995 }, { "epoch": 14.171833480956598, "grad_norm": 0.2908119559288025, "learning_rate": 1e-05, "loss": 0.9561, "step": 16000 }, { "epoch": 14.176262178919398, "grad_norm": 0.2747705578804016, "learning_rate": 1e-05, "loss": 1.0255, "step": 16005 }, { "epoch": 14.180690876882197, "grad_norm": 0.3522818386554718, "learning_rate": 1e-05, "loss": 1.0089, "step": 16010 }, { "epoch": 14.185119574844995, "grad_norm": 0.27590030431747437, "learning_rate": 1e-05, "loss": 0.9899, "step": 16015 }, { "epoch": 14.189548272807794, "grad_norm": 0.24589639902114868, "learning_rate": 1e-05, "loss": 0.9772, "step": 16020 }, { "epoch": 14.193976970770594, "grad_norm": 0.28107768297195435, "learning_rate": 1e-05, "loss": 0.9196, "step": 16025 }, { "epoch": 14.198405668733393, "grad_norm": 0.26836106181144714, "learning_rate": 1e-05, "loss": 1.004, "step": 16030 }, { "epoch": 14.202834366696191, "grad_norm": 0.2444487065076828, "learning_rate": 1e-05, "loss": 0.9892, "step": 16035 }, { "epoch": 14.20726306465899, "grad_norm": 0.26155325770378113, "learning_rate": 1e-05, "loss": 0.9592, "step": 16040 }, { "epoch": 14.21169176262179, "grad_norm": 0.2920330762863159, "learning_rate": 1e-05, "loss": 0.9875, "step": 16045 }, { "epoch": 14.216120460584587, "grad_norm": 0.3188605010509491, "learning_rate": 1e-05, "loss": 1.0132, "step": 16050 }, { "epoch": 14.220549158547387, "grad_norm": 0.27870675921440125, "learning_rate": 1e-05, "loss": 1.0632, "step": 16055 }, { "epoch": 14.224977856510186, "grad_norm": 0.24375474452972412, "learning_rate": 1e-05, "loss": 0.9836, "step": 16060 }, { "epoch": 14.229406554472986, "grad_norm": 0.28569549322128296, "learning_rate": 1e-05, "loss": 1.0486, "step": 16065 }, { "epoch": 14.233835252435783, "grad_norm": 0.3168725371360779, "learning_rate": 1e-05, "loss": 0.9994, "step": 16070 }, { "epoch": 14.238263950398583, "grad_norm": 0.2294890433549881, "learning_rate": 1e-05, "loss": 1.0174, "step": 16075 }, { "epoch": 14.242692648361382, "grad_norm": 0.30843108892440796, "learning_rate": 1e-05, "loss": 0.9991, "step": 16080 }, { "epoch": 14.24712134632418, "grad_norm": 0.23705920577049255, "learning_rate": 1e-05, "loss": 1.019, "step": 16085 }, { "epoch": 14.25155004428698, "grad_norm": 0.22615958750247955, "learning_rate": 1e-05, "loss": 0.9734, "step": 16090 }, { "epoch": 14.255978742249779, "grad_norm": 0.27387434244155884, "learning_rate": 1e-05, "loss": 0.9439, "step": 16095 }, { "epoch": 14.260407440212578, "grad_norm": 0.31500357389450073, "learning_rate": 1e-05, "loss": 1.0383, "step": 16100 }, { "epoch": 14.264836138175376, "grad_norm": 0.27954399585723877, "learning_rate": 1e-05, "loss": 0.9618, "step": 16105 }, { "epoch": 14.269264836138175, "grad_norm": 0.26311737298965454, "learning_rate": 1e-05, "loss": 0.9774, "step": 16110 }, { "epoch": 14.273693534100975, "grad_norm": 0.2789212763309479, "learning_rate": 1e-05, "loss": 0.9483, "step": 16115 }, { "epoch": 14.278122232063772, "grad_norm": 0.2928401529788971, "learning_rate": 1e-05, "loss": 0.9756, "step": 16120 }, { "epoch": 14.282550930026572, "grad_norm": 0.2525635361671448, "learning_rate": 1e-05, "loss": 0.982, "step": 16125 }, { "epoch": 14.286979627989371, "grad_norm": 0.2728481888771057, "learning_rate": 1e-05, "loss": 1.0276, "step": 16130 }, { "epoch": 14.29140832595217, "grad_norm": 0.2740583121776581, "learning_rate": 1e-05, "loss": 1.0115, "step": 16135 }, { "epoch": 14.295837023914968, "grad_norm": 0.24192453920841217, "learning_rate": 1e-05, "loss": 0.9422, "step": 16140 }, { "epoch": 14.300265721877768, "grad_norm": 0.36275580525398254, "learning_rate": 1e-05, "loss": 1.0312, "step": 16145 }, { "epoch": 14.304694419840567, "grad_norm": 0.2272033393383026, "learning_rate": 1e-05, "loss": 1.0274, "step": 16150 }, { "epoch": 14.309123117803367, "grad_norm": 0.22436775267124176, "learning_rate": 1e-05, "loss": 1.0248, "step": 16155 }, { "epoch": 14.313551815766164, "grad_norm": 0.246480792760849, "learning_rate": 1e-05, "loss": 0.9687, "step": 16160 }, { "epoch": 14.317980513728964, "grad_norm": 0.21877822279930115, "learning_rate": 1e-05, "loss": 0.9321, "step": 16165 }, { "epoch": 14.322409211691763, "grad_norm": 0.23440659046173096, "learning_rate": 1e-05, "loss": 1.0475, "step": 16170 }, { "epoch": 14.32683790965456, "grad_norm": 0.2668089270591736, "learning_rate": 1e-05, "loss": 0.985, "step": 16175 }, { "epoch": 14.33126660761736, "grad_norm": 0.2701428532600403, "learning_rate": 1e-05, "loss": 0.9854, "step": 16180 }, { "epoch": 14.33569530558016, "grad_norm": 0.32622238993644714, "learning_rate": 1e-05, "loss": 1.0302, "step": 16185 }, { "epoch": 14.34012400354296, "grad_norm": 0.23200897872447968, "learning_rate": 1e-05, "loss": 1.0193, "step": 16190 }, { "epoch": 14.344552701505757, "grad_norm": 0.3101765215396881, "learning_rate": 1e-05, "loss": 0.8924, "step": 16195 }, { "epoch": 14.348981399468556, "grad_norm": 0.26896390318870544, "learning_rate": 1e-05, "loss": 0.9637, "step": 16200 }, { "epoch": 14.353410097431356, "grad_norm": 0.277150958776474, "learning_rate": 1e-05, "loss": 0.9821, "step": 16205 }, { "epoch": 14.357838795394153, "grad_norm": 0.23689205944538116, "learning_rate": 1e-05, "loss": 0.9819, "step": 16210 }, { "epoch": 14.362267493356953, "grad_norm": 0.25146445631980896, "learning_rate": 1e-05, "loss": 0.9921, "step": 16215 }, { "epoch": 14.366696191319752, "grad_norm": 0.2717793583869934, "learning_rate": 1e-05, "loss": 0.9691, "step": 16220 }, { "epoch": 14.371124889282552, "grad_norm": 0.2160385698080063, "learning_rate": 1e-05, "loss": 0.972, "step": 16225 }, { "epoch": 14.37555358724535, "grad_norm": 0.226392924785614, "learning_rate": 1e-05, "loss": 0.9955, "step": 16230 }, { "epoch": 14.379982285208149, "grad_norm": 0.21924401819705963, "learning_rate": 1e-05, "loss": 0.9404, "step": 16235 }, { "epoch": 14.384410983170948, "grad_norm": 0.23507100343704224, "learning_rate": 1e-05, "loss": 0.9791, "step": 16240 }, { "epoch": 14.388839681133746, "grad_norm": 0.2684524655342102, "learning_rate": 1e-05, "loss": 1.0065, "step": 16245 }, { "epoch": 14.393268379096545, "grad_norm": 0.2606136202812195, "learning_rate": 1e-05, "loss": 1.0114, "step": 16250 }, { "epoch": 14.397697077059345, "grad_norm": 0.27165958285331726, "learning_rate": 1e-05, "loss": 0.9858, "step": 16255 }, { "epoch": 14.402125775022144, "grad_norm": 0.27855613827705383, "learning_rate": 1e-05, "loss": 1.0446, "step": 16260 }, { "epoch": 14.406554472984942, "grad_norm": 0.28144940733909607, "learning_rate": 1e-05, "loss": 0.9526, "step": 16265 }, { "epoch": 14.410983170947741, "grad_norm": 0.285361111164093, "learning_rate": 1e-05, "loss": 0.9941, "step": 16270 }, { "epoch": 14.41541186891054, "grad_norm": 0.2869277596473694, "learning_rate": 1e-05, "loss": 0.9758, "step": 16275 }, { "epoch": 14.41984056687334, "grad_norm": 0.28857702016830444, "learning_rate": 1e-05, "loss": 0.9898, "step": 16280 }, { "epoch": 14.424269264836138, "grad_norm": 0.25510743260383606, "learning_rate": 1e-05, "loss": 0.9083, "step": 16285 }, { "epoch": 14.428697962798937, "grad_norm": 0.2594030201435089, "learning_rate": 1e-05, "loss": 1.0083, "step": 16290 }, { "epoch": 14.433126660761737, "grad_norm": 0.26197192072868347, "learning_rate": 1e-05, "loss": 0.9856, "step": 16295 }, { "epoch": 14.437555358724534, "grad_norm": 0.25105172395706177, "learning_rate": 1e-05, "loss": 1.009, "step": 16300 }, { "epoch": 14.441984056687334, "grad_norm": 0.26715198159217834, "learning_rate": 1e-05, "loss": 0.9935, "step": 16305 }, { "epoch": 14.446412754650133, "grad_norm": 0.2774592638015747, "learning_rate": 1e-05, "loss": 1.0382, "step": 16310 }, { "epoch": 14.450841452612933, "grad_norm": 0.2662948668003082, "learning_rate": 1e-05, "loss": 1.0201, "step": 16315 }, { "epoch": 14.45527015057573, "grad_norm": 0.35703012347221375, "learning_rate": 1e-05, "loss": 1.0323, "step": 16320 }, { "epoch": 14.45969884853853, "grad_norm": 0.29315707087516785, "learning_rate": 1e-05, "loss": 0.9761, "step": 16325 }, { "epoch": 14.464127546501329, "grad_norm": 0.2548542022705078, "learning_rate": 1e-05, "loss": 1.0042, "step": 16330 }, { "epoch": 14.468556244464127, "grad_norm": 0.2775784432888031, "learning_rate": 1e-05, "loss": 1.0043, "step": 16335 }, { "epoch": 14.472984942426926, "grad_norm": 0.24884721636772156, "learning_rate": 1e-05, "loss": 1.0493, "step": 16340 }, { "epoch": 14.477413640389726, "grad_norm": 0.2941446602344513, "learning_rate": 1e-05, "loss": 0.9651, "step": 16345 }, { "epoch": 14.481842338352525, "grad_norm": 0.2529769837856293, "learning_rate": 1e-05, "loss": 0.9439, "step": 16350 }, { "epoch": 14.486271036315323, "grad_norm": 0.2483808547258377, "learning_rate": 1e-05, "loss": 1.0153, "step": 16355 }, { "epoch": 14.490699734278122, "grad_norm": 0.29735067486763, "learning_rate": 1e-05, "loss": 0.9795, "step": 16360 }, { "epoch": 14.495128432240922, "grad_norm": 0.2199345976114273, "learning_rate": 1e-05, "loss": 1.009, "step": 16365 }, { "epoch": 14.499557130203721, "grad_norm": 0.25083208084106445, "learning_rate": 1e-05, "loss": 0.9812, "step": 16370 }, { "epoch": 14.503985828166519, "grad_norm": 0.2711193263530731, "learning_rate": 1e-05, "loss": 1.0041, "step": 16375 }, { "epoch": 14.508414526129318, "grad_norm": 0.2512422800064087, "learning_rate": 1e-05, "loss": 0.949, "step": 16380 }, { "epoch": 14.512843224092117, "grad_norm": 0.277103453874588, "learning_rate": 1e-05, "loss": 0.9979, "step": 16385 }, { "epoch": 14.517271922054915, "grad_norm": 0.27260157465934753, "learning_rate": 1e-05, "loss": 0.9509, "step": 16390 }, { "epoch": 14.521700620017715, "grad_norm": 0.2731979191303253, "learning_rate": 1e-05, "loss": 0.9749, "step": 16395 }, { "epoch": 14.526129317980514, "grad_norm": 0.27902352809906006, "learning_rate": 1e-05, "loss": 1.0092, "step": 16400 }, { "epoch": 14.530558015943313, "grad_norm": 0.28178659081459045, "learning_rate": 1e-05, "loss": 1.0216, "step": 16405 }, { "epoch": 14.534986713906111, "grad_norm": 0.2443675696849823, "learning_rate": 1e-05, "loss": 0.961, "step": 16410 }, { "epoch": 14.53941541186891, "grad_norm": 0.2959047555923462, "learning_rate": 1e-05, "loss": 0.9637, "step": 16415 }, { "epoch": 14.54384410983171, "grad_norm": 0.3143279254436493, "learning_rate": 1e-05, "loss": 0.9699, "step": 16420 }, { "epoch": 14.548272807794508, "grad_norm": 0.281217098236084, "learning_rate": 1e-05, "loss": 1.0133, "step": 16425 }, { "epoch": 14.552701505757307, "grad_norm": 0.31636083126068115, "learning_rate": 1e-05, "loss": 1.0368, "step": 16430 }, { "epoch": 14.557130203720106, "grad_norm": 0.2782772183418274, "learning_rate": 1e-05, "loss": 0.9515, "step": 16435 }, { "epoch": 14.561558901682906, "grad_norm": 0.28139472007751465, "learning_rate": 1e-05, "loss": 0.9774, "step": 16440 }, { "epoch": 14.565987599645704, "grad_norm": 0.2928841710090637, "learning_rate": 1e-05, "loss": 1.0301, "step": 16445 }, { "epoch": 14.570416297608503, "grad_norm": 0.26062625646591187, "learning_rate": 1e-05, "loss": 1.0515, "step": 16450 }, { "epoch": 14.574844995571302, "grad_norm": 0.29303669929504395, "learning_rate": 1e-05, "loss": 1.0352, "step": 16455 }, { "epoch": 14.579273693534102, "grad_norm": 0.2500944435596466, "learning_rate": 1e-05, "loss": 1.0297, "step": 16460 }, { "epoch": 14.5837023914969, "grad_norm": 0.26715782284736633, "learning_rate": 1e-05, "loss": 0.9407, "step": 16465 }, { "epoch": 14.588131089459699, "grad_norm": 0.2846585512161255, "learning_rate": 1e-05, "loss": 1.0171, "step": 16470 }, { "epoch": 14.592559787422498, "grad_norm": 0.26248589158058167, "learning_rate": 1e-05, "loss": 1.0135, "step": 16475 }, { "epoch": 14.596988485385296, "grad_norm": 0.3085353374481201, "learning_rate": 1e-05, "loss": 1.0399, "step": 16480 }, { "epoch": 14.601417183348095, "grad_norm": 0.37500736117362976, "learning_rate": 1e-05, "loss": 0.9784, "step": 16485 }, { "epoch": 14.605845881310895, "grad_norm": 0.27004265785217285, "learning_rate": 1e-05, "loss": 1.0709, "step": 16490 }, { "epoch": 14.610274579273694, "grad_norm": 0.30227288603782654, "learning_rate": 1e-05, "loss": 0.9987, "step": 16495 }, { "epoch": 14.614703277236492, "grad_norm": 0.2669404447078705, "learning_rate": 1e-05, "loss": 0.9948, "step": 16500 }, { "epoch": 14.619131975199291, "grad_norm": 0.2740556001663208, "learning_rate": 1e-05, "loss": 0.9784, "step": 16505 }, { "epoch": 14.62356067316209, "grad_norm": 0.2449372559785843, "learning_rate": 1e-05, "loss": 1.0109, "step": 16510 }, { "epoch": 14.627989371124889, "grad_norm": 0.30259495973587036, "learning_rate": 1e-05, "loss": 0.978, "step": 16515 }, { "epoch": 14.632418069087688, "grad_norm": 0.2543676197528839, "learning_rate": 1e-05, "loss": 1.0349, "step": 16520 }, { "epoch": 14.636846767050487, "grad_norm": 0.2868063747882843, "learning_rate": 1e-05, "loss": 1.0244, "step": 16525 }, { "epoch": 14.641275465013287, "grad_norm": 0.24118371307849884, "learning_rate": 1e-05, "loss": 0.9197, "step": 16530 }, { "epoch": 14.645704162976084, "grad_norm": 0.2835771143436432, "learning_rate": 1e-05, "loss": 1.0282, "step": 16535 }, { "epoch": 14.650132860938884, "grad_norm": 0.2593700587749481, "learning_rate": 1e-05, "loss": 1.0104, "step": 16540 }, { "epoch": 14.654561558901683, "grad_norm": 0.21953213214874268, "learning_rate": 1e-05, "loss": 0.9596, "step": 16545 }, { "epoch": 14.658990256864481, "grad_norm": 0.26301050186157227, "learning_rate": 1e-05, "loss": 1.0542, "step": 16550 }, { "epoch": 14.66341895482728, "grad_norm": 0.27383026480674744, "learning_rate": 1e-05, "loss": 0.9527, "step": 16555 }, { "epoch": 14.66784765279008, "grad_norm": 0.2769801914691925, "learning_rate": 1e-05, "loss": 0.9736, "step": 16560 }, { "epoch": 14.67227635075288, "grad_norm": 0.29033446311950684, "learning_rate": 1e-05, "loss": 0.991, "step": 16565 }, { "epoch": 14.676705048715677, "grad_norm": 0.2810085415840149, "learning_rate": 1e-05, "loss": 1.0159, "step": 16570 }, { "epoch": 14.681133746678476, "grad_norm": 0.30164459347724915, "learning_rate": 1e-05, "loss": 1.0192, "step": 16575 }, { "epoch": 14.685562444641276, "grad_norm": 0.2507741451263428, "learning_rate": 1e-05, "loss": 0.985, "step": 16580 }, { "epoch": 14.689991142604075, "grad_norm": 0.296539843082428, "learning_rate": 1e-05, "loss": 1.0318, "step": 16585 }, { "epoch": 14.694419840566873, "grad_norm": 0.28560638427734375, "learning_rate": 1e-05, "loss": 0.9748, "step": 16590 }, { "epoch": 14.698848538529672, "grad_norm": 0.2627999782562256, "learning_rate": 1e-05, "loss": 1.0426, "step": 16595 }, { "epoch": 14.703277236492472, "grad_norm": 0.26439911127090454, "learning_rate": 1e-05, "loss": 1.079, "step": 16600 }, { "epoch": 14.70770593445527, "grad_norm": 0.25219106674194336, "learning_rate": 1e-05, "loss": 1.0303, "step": 16605 }, { "epoch": 14.712134632418069, "grad_norm": 0.23781970143318176, "learning_rate": 1e-05, "loss": 0.9782, "step": 16610 }, { "epoch": 14.716563330380868, "grad_norm": 0.27697667479515076, "learning_rate": 1e-05, "loss": 1.029, "step": 16615 }, { "epoch": 14.720992028343668, "grad_norm": 0.26851117610931396, "learning_rate": 1e-05, "loss": 1.0184, "step": 16620 }, { "epoch": 14.725420726306465, "grad_norm": 0.2835143804550171, "learning_rate": 1e-05, "loss": 0.9737, "step": 16625 }, { "epoch": 14.729849424269265, "grad_norm": 0.27118510007858276, "learning_rate": 1e-05, "loss": 0.9771, "step": 16630 }, { "epoch": 14.734278122232064, "grad_norm": 0.28629037737846375, "learning_rate": 1e-05, "loss": 1.0154, "step": 16635 }, { "epoch": 14.738706820194862, "grad_norm": 0.3202347159385681, "learning_rate": 1e-05, "loss": 1.0425, "step": 16640 }, { "epoch": 14.743135518157661, "grad_norm": 0.24710731208324432, "learning_rate": 1e-05, "loss": 1.009, "step": 16645 }, { "epoch": 14.74756421612046, "grad_norm": 0.25110748410224915, "learning_rate": 1e-05, "loss": 0.9701, "step": 16650 }, { "epoch": 14.75199291408326, "grad_norm": 0.271636039018631, "learning_rate": 1e-05, "loss": 0.96, "step": 16655 }, { "epoch": 14.756421612046058, "grad_norm": 0.2625683546066284, "learning_rate": 1e-05, "loss": 1.0064, "step": 16660 }, { "epoch": 14.760850310008857, "grad_norm": 0.26619279384613037, "learning_rate": 1e-05, "loss": 1.0247, "step": 16665 }, { "epoch": 14.765279007971657, "grad_norm": 0.3262878656387329, "learning_rate": 1e-05, "loss": 0.9899, "step": 16670 }, { "epoch": 14.769707705934454, "grad_norm": 0.2604900896549225, "learning_rate": 1e-05, "loss": 0.9843, "step": 16675 }, { "epoch": 14.774136403897254, "grad_norm": 0.26651960611343384, "learning_rate": 1e-05, "loss": 0.9871, "step": 16680 }, { "epoch": 14.778565101860053, "grad_norm": 0.2893853485584259, "learning_rate": 1e-05, "loss": 1.0151, "step": 16685 }, { "epoch": 14.782993799822853, "grad_norm": 0.24873779714107513, "learning_rate": 1e-05, "loss": 0.9602, "step": 16690 }, { "epoch": 14.78742249778565, "grad_norm": 0.2951345443725586, "learning_rate": 1e-05, "loss": 1.019, "step": 16695 }, { "epoch": 14.79185119574845, "grad_norm": 0.25161123275756836, "learning_rate": 1e-05, "loss": 1.0316, "step": 16700 }, { "epoch": 14.79627989371125, "grad_norm": 0.27895843982696533, "learning_rate": 1e-05, "loss": 0.9714, "step": 16705 }, { "epoch": 14.800708591674049, "grad_norm": 0.2510391175746918, "learning_rate": 1e-05, "loss": 0.9698, "step": 16710 }, { "epoch": 14.805137289636846, "grad_norm": 0.29707637429237366, "learning_rate": 1e-05, "loss": 0.9642, "step": 16715 }, { "epoch": 14.809565987599646, "grad_norm": 0.27752935886383057, "learning_rate": 1e-05, "loss": 1.025, "step": 16720 }, { "epoch": 14.813994685562445, "grad_norm": 0.24980412423610687, "learning_rate": 1e-05, "loss": 1.0286, "step": 16725 }, { "epoch": 14.818423383525243, "grad_norm": 0.24784047901630402, "learning_rate": 1e-05, "loss": 0.9786, "step": 16730 }, { "epoch": 14.822852081488042, "grad_norm": 0.27620649337768555, "learning_rate": 1e-05, "loss": 0.9716, "step": 16735 }, { "epoch": 14.827280779450842, "grad_norm": 0.25357839465141296, "learning_rate": 1e-05, "loss": 0.9642, "step": 16740 }, { "epoch": 14.831709477413641, "grad_norm": 0.2991798520088196, "learning_rate": 1e-05, "loss": 1.0519, "step": 16745 }, { "epoch": 14.836138175376439, "grad_norm": 0.25299325585365295, "learning_rate": 1e-05, "loss": 0.9933, "step": 16750 }, { "epoch": 14.840566873339238, "grad_norm": 0.24106444418430328, "learning_rate": 1e-05, "loss": 1.0152, "step": 16755 }, { "epoch": 14.844995571302038, "grad_norm": 0.29380327463150024, "learning_rate": 1e-05, "loss": 0.9977, "step": 16760 }, { "epoch": 14.849424269264837, "grad_norm": 0.23624031245708466, "learning_rate": 1e-05, "loss": 0.9864, "step": 16765 }, { "epoch": 14.853852967227635, "grad_norm": 0.26048368215560913, "learning_rate": 1e-05, "loss": 0.988, "step": 16770 }, { "epoch": 14.858281665190434, "grad_norm": 0.22700917720794678, "learning_rate": 1e-05, "loss": 1.061, "step": 16775 }, { "epoch": 14.862710363153234, "grad_norm": 0.28412002325057983, "learning_rate": 1e-05, "loss": 1.0024, "step": 16780 }, { "epoch": 14.867139061116031, "grad_norm": 0.2695987820625305, "learning_rate": 1e-05, "loss": 0.9959, "step": 16785 }, { "epoch": 14.87156775907883, "grad_norm": 0.23892877995967865, "learning_rate": 1e-05, "loss": 0.9807, "step": 16790 }, { "epoch": 14.87599645704163, "grad_norm": 0.21462759375572205, "learning_rate": 1e-05, "loss": 1.0327, "step": 16795 }, { "epoch": 14.88042515500443, "grad_norm": 0.3129950761795044, "learning_rate": 1e-05, "loss": 1.0001, "step": 16800 }, { "epoch": 14.884853852967227, "grad_norm": 0.26655280590057373, "learning_rate": 1e-05, "loss": 1.0066, "step": 16805 }, { "epoch": 14.889282550930027, "grad_norm": 0.2924897074699402, "learning_rate": 1e-05, "loss": 0.9505, "step": 16810 }, { "epoch": 14.893711248892826, "grad_norm": 0.2594379186630249, "learning_rate": 1e-05, "loss": 1.0318, "step": 16815 }, { "epoch": 14.898139946855624, "grad_norm": 0.3770524263381958, "learning_rate": 1e-05, "loss": 1.023, "step": 16820 }, { "epoch": 14.902568644818423, "grad_norm": 0.32294076681137085, "learning_rate": 1e-05, "loss": 1.0065, "step": 16825 }, { "epoch": 14.906997342781223, "grad_norm": 0.35033518075942993, "learning_rate": 1e-05, "loss": 0.9878, "step": 16830 }, { "epoch": 14.911426040744022, "grad_norm": 0.2783731520175934, "learning_rate": 1e-05, "loss": 1.0506, "step": 16835 }, { "epoch": 14.91585473870682, "grad_norm": 0.30879294872283936, "learning_rate": 1e-05, "loss": 0.9899, "step": 16840 }, { "epoch": 14.920283436669619, "grad_norm": 0.262994647026062, "learning_rate": 1e-05, "loss": 0.9655, "step": 16845 }, { "epoch": 14.924712134632419, "grad_norm": 0.30566802620887756, "learning_rate": 1e-05, "loss": 1.0238, "step": 16850 }, { "epoch": 14.929140832595216, "grad_norm": 0.2475215494632721, "learning_rate": 1e-05, "loss": 1.0132, "step": 16855 }, { "epoch": 14.933569530558016, "grad_norm": 0.2576778829097748, "learning_rate": 1e-05, "loss": 0.9585, "step": 16860 }, { "epoch": 14.937998228520815, "grad_norm": 0.22945937514305115, "learning_rate": 1e-05, "loss": 1.0148, "step": 16865 }, { "epoch": 14.942426926483614, "grad_norm": 0.2262829840183258, "learning_rate": 1e-05, "loss": 1.0227, "step": 16870 }, { "epoch": 14.946855624446412, "grad_norm": 0.2827739119529724, "learning_rate": 1e-05, "loss": 0.9778, "step": 16875 }, { "epoch": 14.951284322409212, "grad_norm": 0.20690865814685822, "learning_rate": 1e-05, "loss": 1.0391, "step": 16880 }, { "epoch": 14.955713020372011, "grad_norm": 0.2357817441225052, "learning_rate": 1e-05, "loss": 1.0043, "step": 16885 }, { "epoch": 14.96014171833481, "grad_norm": 0.2403680831193924, "learning_rate": 1e-05, "loss": 0.9739, "step": 16890 }, { "epoch": 14.964570416297608, "grad_norm": 0.24647489190101624, "learning_rate": 1e-05, "loss": 0.9696, "step": 16895 }, { "epoch": 14.968999114260408, "grad_norm": 0.26419657468795776, "learning_rate": 1e-05, "loss": 1.0052, "step": 16900 }, { "epoch": 14.973427812223207, "grad_norm": 0.25355908274650574, "learning_rate": 1e-05, "loss": 0.9508, "step": 16905 }, { "epoch": 14.977856510186005, "grad_norm": 0.2276563048362732, "learning_rate": 1e-05, "loss": 0.9967, "step": 16910 }, { "epoch": 14.982285208148804, "grad_norm": 0.253778338432312, "learning_rate": 1e-05, "loss": 0.9327, "step": 16915 }, { "epoch": 14.986713906111603, "grad_norm": 0.2397511899471283, "learning_rate": 1e-05, "loss": 0.9977, "step": 16920 }, { "epoch": 14.991142604074403, "grad_norm": 0.22408337891101837, "learning_rate": 1e-05, "loss": 1.0343, "step": 16925 }, { "epoch": 14.9955713020372, "grad_norm": 0.25532180070877075, "learning_rate": 1e-05, "loss": 0.9581, "step": 16930 }, { "epoch": 15.0, "grad_norm": 0.32064101099967957, "learning_rate": 1e-05, "loss": 0.9966, "step": 16935 }, { "epoch": 15.0044286979628, "grad_norm": 0.25387218594551086, "learning_rate": 1e-05, "loss": 0.9911, "step": 16940 }, { "epoch": 15.008857395925597, "grad_norm": 0.29483988881111145, "learning_rate": 1e-05, "loss": 0.981, "step": 16945 }, { "epoch": 15.013286093888397, "grad_norm": 0.28784963488578796, "learning_rate": 1e-05, "loss": 1.0123, "step": 16950 }, { "epoch": 15.017714791851196, "grad_norm": 0.2789418697357178, "learning_rate": 1e-05, "loss": 0.9993, "step": 16955 }, { "epoch": 15.022143489813995, "grad_norm": 0.2883960008621216, "learning_rate": 1e-05, "loss": 0.965, "step": 16960 }, { "epoch": 15.026572187776793, "grad_norm": 0.27577394247055054, "learning_rate": 1e-05, "loss": 0.9987, "step": 16965 }, { "epoch": 15.031000885739592, "grad_norm": 0.23802579939365387, "learning_rate": 1e-05, "loss": 1.0158, "step": 16970 }, { "epoch": 15.035429583702392, "grad_norm": 0.3305317163467407, "learning_rate": 1e-05, "loss": 0.9931, "step": 16975 }, { "epoch": 15.03985828166519, "grad_norm": 0.23631387948989868, "learning_rate": 1e-05, "loss": 0.9973, "step": 16980 }, { "epoch": 15.044286979627989, "grad_norm": 0.31919947266578674, "learning_rate": 1e-05, "loss": 1.0233, "step": 16985 }, { "epoch": 15.048715677590788, "grad_norm": 0.2521390914916992, "learning_rate": 1e-05, "loss": 0.954, "step": 16990 }, { "epoch": 15.053144375553588, "grad_norm": 0.34347400069236755, "learning_rate": 1e-05, "loss": 1.0387, "step": 16995 }, { "epoch": 15.057573073516386, "grad_norm": 0.21043822169303894, "learning_rate": 1e-05, "loss": 1.0191, "step": 17000 }, { "epoch": 15.062001771479185, "grad_norm": 0.22088706493377686, "learning_rate": 1e-05, "loss": 0.9336, "step": 17005 }, { "epoch": 15.066430469441984, "grad_norm": 0.2586420476436615, "learning_rate": 1e-05, "loss": 1.0483, "step": 17010 }, { "epoch": 15.070859167404784, "grad_norm": 0.27061590552330017, "learning_rate": 1e-05, "loss": 0.915, "step": 17015 }, { "epoch": 15.075287865367581, "grad_norm": 0.2675482928752899, "learning_rate": 1e-05, "loss": 0.9746, "step": 17020 }, { "epoch": 15.079716563330381, "grad_norm": 0.21281589567661285, "learning_rate": 1e-05, "loss": 1.0022, "step": 17025 }, { "epoch": 15.08414526129318, "grad_norm": 0.3287443220615387, "learning_rate": 1e-05, "loss": 1.0194, "step": 17030 }, { "epoch": 15.088573959255978, "grad_norm": 0.2601924538612366, "learning_rate": 1e-05, "loss": 1.0005, "step": 17035 }, { "epoch": 15.093002657218777, "grad_norm": 0.27621152997016907, "learning_rate": 1e-05, "loss": 1.0465, "step": 17040 }, { "epoch": 15.097431355181577, "grad_norm": 0.24964499473571777, "learning_rate": 1e-05, "loss": 1.0006, "step": 17045 }, { "epoch": 15.101860053144376, "grad_norm": 0.31866148114204407, "learning_rate": 1e-05, "loss": 0.9819, "step": 17050 }, { "epoch": 15.106288751107174, "grad_norm": 0.26239389181137085, "learning_rate": 1e-05, "loss": 1.0817, "step": 17055 }, { "epoch": 15.110717449069973, "grad_norm": 0.2661646604537964, "learning_rate": 1e-05, "loss": 1.0093, "step": 17060 }, { "epoch": 15.115146147032773, "grad_norm": 0.2909359037876129, "learning_rate": 1e-05, "loss": 0.9882, "step": 17065 }, { "epoch": 15.11957484499557, "grad_norm": 0.25504469871520996, "learning_rate": 1e-05, "loss": 1.0014, "step": 17070 }, { "epoch": 15.12400354295837, "grad_norm": 0.27992212772369385, "learning_rate": 1e-05, "loss": 1.0038, "step": 17075 }, { "epoch": 15.12843224092117, "grad_norm": 0.2818259596824646, "learning_rate": 1e-05, "loss": 0.9831, "step": 17080 }, { "epoch": 15.132860938883969, "grad_norm": 0.31795716285705566, "learning_rate": 1e-05, "loss": 1.071, "step": 17085 }, { "epoch": 15.137289636846766, "grad_norm": 0.2748221457004547, "learning_rate": 1e-05, "loss": 0.9867, "step": 17090 }, { "epoch": 15.141718334809566, "grad_norm": 0.3239392936229706, "learning_rate": 1e-05, "loss": 0.9644, "step": 17095 }, { "epoch": 15.146147032772365, "grad_norm": 0.2508675754070282, "learning_rate": 1e-05, "loss": 1.0222, "step": 17100 }, { "epoch": 15.150575730735165, "grad_norm": 0.2542046308517456, "learning_rate": 1e-05, "loss": 1.0091, "step": 17105 }, { "epoch": 15.155004428697962, "grad_norm": 0.2578985393047333, "learning_rate": 1e-05, "loss": 1.0, "step": 17110 }, { "epoch": 15.159433126660762, "grad_norm": 0.3133172392845154, "learning_rate": 1e-05, "loss": 1.0004, "step": 17115 }, { "epoch": 15.163861824623561, "grad_norm": 0.26482972502708435, "learning_rate": 1e-05, "loss": 0.9695, "step": 17120 }, { "epoch": 15.168290522586359, "grad_norm": 0.304808109998703, "learning_rate": 1e-05, "loss": 0.9573, "step": 17125 }, { "epoch": 15.172719220549158, "grad_norm": 0.28081241250038147, "learning_rate": 1e-05, "loss": 1.0218, "step": 17130 }, { "epoch": 15.177147918511958, "grad_norm": 0.25393274426460266, "learning_rate": 1e-05, "loss": 1.0319, "step": 17135 }, { "epoch": 15.181576616474757, "grad_norm": 0.25129199028015137, "learning_rate": 1e-05, "loss": 1.0346, "step": 17140 }, { "epoch": 15.186005314437555, "grad_norm": 0.26994654536247253, "learning_rate": 1e-05, "loss": 0.971, "step": 17145 }, { "epoch": 15.190434012400354, "grad_norm": 0.3445732891559601, "learning_rate": 1e-05, "loss": 0.979, "step": 17150 }, { "epoch": 15.194862710363154, "grad_norm": 0.2910650372505188, "learning_rate": 1e-05, "loss": 0.946, "step": 17155 }, { "epoch": 15.199291408325951, "grad_norm": 0.2666941285133362, "learning_rate": 1e-05, "loss": 0.9817, "step": 17160 }, { "epoch": 15.20372010628875, "grad_norm": 0.3099244236946106, "learning_rate": 1e-05, "loss": 1.0169, "step": 17165 }, { "epoch": 15.20814880425155, "grad_norm": 0.2541021406650543, "learning_rate": 1e-05, "loss": 0.982, "step": 17170 }, { "epoch": 15.21257750221435, "grad_norm": 0.2885095179080963, "learning_rate": 1e-05, "loss": 1.0234, "step": 17175 }, { "epoch": 15.217006200177147, "grad_norm": 0.2297290563583374, "learning_rate": 1e-05, "loss": 0.9376, "step": 17180 }, { "epoch": 15.221434898139947, "grad_norm": 0.26323774456977844, "learning_rate": 1e-05, "loss": 0.9861, "step": 17185 }, { "epoch": 15.225863596102746, "grad_norm": 0.23744550347328186, "learning_rate": 1e-05, "loss": 1.039, "step": 17190 }, { "epoch": 15.230292294065544, "grad_norm": 0.22292733192443848, "learning_rate": 1e-05, "loss": 1.0062, "step": 17195 }, { "epoch": 15.234720992028343, "grad_norm": 0.23988264799118042, "learning_rate": 1e-05, "loss": 0.9543, "step": 17200 }, { "epoch": 15.239149689991143, "grad_norm": 0.22324340045452118, "learning_rate": 1e-05, "loss": 0.9839, "step": 17205 }, { "epoch": 15.243578387953942, "grad_norm": 0.2578936815261841, "learning_rate": 1e-05, "loss": 1.0426, "step": 17210 }, { "epoch": 15.24800708591674, "grad_norm": 0.24282030761241913, "learning_rate": 1e-05, "loss": 0.9638, "step": 17215 }, { "epoch": 15.25243578387954, "grad_norm": 0.25213000178337097, "learning_rate": 1e-05, "loss": 0.9689, "step": 17220 }, { "epoch": 15.256864481842339, "grad_norm": 0.23575039207935333, "learning_rate": 1e-05, "loss": 1.0118, "step": 17225 }, { "epoch": 15.261293179805138, "grad_norm": 0.38254740834236145, "learning_rate": 1e-05, "loss": 0.9768, "step": 17230 }, { "epoch": 15.265721877767936, "grad_norm": 0.2365749627351761, "learning_rate": 1e-05, "loss": 0.9874, "step": 17235 }, { "epoch": 15.270150575730735, "grad_norm": 0.26967090368270874, "learning_rate": 1e-05, "loss": 0.9891, "step": 17240 }, { "epoch": 15.274579273693535, "grad_norm": 0.26564309000968933, "learning_rate": 1e-05, "loss": 1.0176, "step": 17245 }, { "epoch": 15.279007971656332, "grad_norm": 0.31534746289253235, "learning_rate": 1e-05, "loss": 1.0384, "step": 17250 }, { "epoch": 15.283436669619132, "grad_norm": 0.294215589761734, "learning_rate": 1e-05, "loss": 0.966, "step": 17255 }, { "epoch": 15.287865367581931, "grad_norm": 0.26330846548080444, "learning_rate": 1e-05, "loss": 0.999, "step": 17260 }, { "epoch": 15.29229406554473, "grad_norm": 0.24788245558738708, "learning_rate": 1e-05, "loss": 1.0338, "step": 17265 }, { "epoch": 15.296722763507528, "grad_norm": 0.3743017911911011, "learning_rate": 1e-05, "loss": 1.0004, "step": 17270 }, { "epoch": 15.301151461470328, "grad_norm": 0.27872133255004883, "learning_rate": 1e-05, "loss": 0.9995, "step": 17275 }, { "epoch": 15.305580159433127, "grad_norm": 0.2927137017250061, "learning_rate": 1e-05, "loss": 0.99, "step": 17280 }, { "epoch": 15.310008857395925, "grad_norm": 0.2726747989654541, "learning_rate": 1e-05, "loss": 0.9888, "step": 17285 }, { "epoch": 15.314437555358724, "grad_norm": 0.31740278005599976, "learning_rate": 1e-05, "loss": 1.0144, "step": 17290 }, { "epoch": 15.318866253321524, "grad_norm": 0.2880260646343231, "learning_rate": 1e-05, "loss": 1.0157, "step": 17295 }, { "epoch": 15.323294951284323, "grad_norm": 0.26660725474357605, "learning_rate": 1e-05, "loss": 0.9814, "step": 17300 }, { "epoch": 15.32772364924712, "grad_norm": 0.26563549041748047, "learning_rate": 1e-05, "loss": 0.9806, "step": 17305 }, { "epoch": 15.33215234720992, "grad_norm": 0.2138364464044571, "learning_rate": 1e-05, "loss": 0.9767, "step": 17310 }, { "epoch": 15.33658104517272, "grad_norm": 0.288858562707901, "learning_rate": 1e-05, "loss": 0.9986, "step": 17315 }, { "epoch": 15.341009743135519, "grad_norm": 0.3685668408870697, "learning_rate": 1e-05, "loss": 1.0084, "step": 17320 }, { "epoch": 15.345438441098317, "grad_norm": 0.31687313318252563, "learning_rate": 1e-05, "loss": 1.0166, "step": 17325 }, { "epoch": 15.349867139061116, "grad_norm": 0.2954452633857727, "learning_rate": 1e-05, "loss": 1.0448, "step": 17330 }, { "epoch": 15.354295837023916, "grad_norm": 0.2854871451854706, "learning_rate": 1e-05, "loss": 0.9674, "step": 17335 }, { "epoch": 15.358724534986713, "grad_norm": 0.3266577422618866, "learning_rate": 1e-05, "loss": 1.017, "step": 17340 }, { "epoch": 15.363153232949513, "grad_norm": 0.25883886218070984, "learning_rate": 1e-05, "loss": 1.0164, "step": 17345 }, { "epoch": 15.367581930912312, "grad_norm": 0.21190109848976135, "learning_rate": 1e-05, "loss": 0.9837, "step": 17350 }, { "epoch": 15.372010628875111, "grad_norm": 0.26922836899757385, "learning_rate": 1e-05, "loss": 0.9663, "step": 17355 }, { "epoch": 15.37643932683791, "grad_norm": 0.254291832447052, "learning_rate": 1e-05, "loss": 0.9826, "step": 17360 }, { "epoch": 15.380868024800709, "grad_norm": 0.28723376989364624, "learning_rate": 1e-05, "loss": 1.0055, "step": 17365 }, { "epoch": 15.385296722763508, "grad_norm": 0.2999916970729828, "learning_rate": 1e-05, "loss": 1.0119, "step": 17370 }, { "epoch": 15.389725420726306, "grad_norm": 0.23715093731880188, "learning_rate": 1e-05, "loss": 1.0356, "step": 17375 }, { "epoch": 15.394154118689105, "grad_norm": 0.2447013556957245, "learning_rate": 1e-05, "loss": 1.0049, "step": 17380 }, { "epoch": 15.398582816651905, "grad_norm": 0.24983754754066467, "learning_rate": 1e-05, "loss": 1.0191, "step": 17385 }, { "epoch": 15.403011514614704, "grad_norm": 0.27488842606544495, "learning_rate": 1e-05, "loss": 0.9962, "step": 17390 }, { "epoch": 15.407440212577502, "grad_norm": 0.3296554386615753, "learning_rate": 1e-05, "loss": 1.0328, "step": 17395 }, { "epoch": 15.411868910540301, "grad_norm": 0.2354331761598587, "learning_rate": 1e-05, "loss": 1.0264, "step": 17400 }, { "epoch": 15.4162976085031, "grad_norm": 0.27824997901916504, "learning_rate": 1e-05, "loss": 0.9305, "step": 17405 }, { "epoch": 15.420726306465898, "grad_norm": 0.2649856507778168, "learning_rate": 1e-05, "loss": 0.9844, "step": 17410 }, { "epoch": 15.425155004428698, "grad_norm": 0.2622207701206207, "learning_rate": 1e-05, "loss": 0.9818, "step": 17415 }, { "epoch": 15.429583702391497, "grad_norm": 0.24964140355587006, "learning_rate": 1e-05, "loss": 0.9879, "step": 17420 }, { "epoch": 15.434012400354296, "grad_norm": 0.22974726557731628, "learning_rate": 1e-05, "loss": 0.9825, "step": 17425 }, { "epoch": 15.438441098317094, "grad_norm": 0.2717423737049103, "learning_rate": 1e-05, "loss": 1.0163, "step": 17430 }, { "epoch": 15.442869796279894, "grad_norm": 0.2755749821662903, "learning_rate": 1e-05, "loss": 1.0178, "step": 17435 }, { "epoch": 15.447298494242693, "grad_norm": 0.22714024782180786, "learning_rate": 1e-05, "loss": 1.0091, "step": 17440 }, { "epoch": 15.451727192205492, "grad_norm": 0.322359174489975, "learning_rate": 1e-05, "loss": 0.9874, "step": 17445 }, { "epoch": 15.45615589016829, "grad_norm": 0.250095933675766, "learning_rate": 1e-05, "loss": 0.9937, "step": 17450 }, { "epoch": 15.46058458813109, "grad_norm": 0.28568190336227417, "learning_rate": 1e-05, "loss": 0.9952, "step": 17455 }, { "epoch": 15.465013286093889, "grad_norm": 0.2286706417798996, "learning_rate": 1e-05, "loss": 0.991, "step": 17460 }, { "epoch": 15.469441984056687, "grad_norm": 0.22113493084907532, "learning_rate": 1e-05, "loss": 1.0014, "step": 17465 }, { "epoch": 15.473870682019486, "grad_norm": 0.2511354982852936, "learning_rate": 1e-05, "loss": 0.9428, "step": 17470 }, { "epoch": 15.478299379982285, "grad_norm": 0.26192641258239746, "learning_rate": 1e-05, "loss": 1.0074, "step": 17475 }, { "epoch": 15.482728077945085, "grad_norm": 0.31223440170288086, "learning_rate": 1e-05, "loss": 0.9881, "step": 17480 }, { "epoch": 15.487156775907883, "grad_norm": 0.34236565232276917, "learning_rate": 1e-05, "loss": 1.0297, "step": 17485 }, { "epoch": 15.491585473870682, "grad_norm": 0.2933385372161865, "learning_rate": 1e-05, "loss": 0.9932, "step": 17490 }, { "epoch": 15.496014171833481, "grad_norm": 0.24082890152931213, "learning_rate": 1e-05, "loss": 1.0077, "step": 17495 }, { "epoch": 15.50044286979628, "grad_norm": 0.2509610056877136, "learning_rate": 1e-05, "loss": 1.0352, "step": 17500 }, { "epoch": 15.504871567759078, "grad_norm": 0.34095117449760437, "learning_rate": 1e-05, "loss": 1.0432, "step": 17505 }, { "epoch": 15.509300265721878, "grad_norm": 0.26015010476112366, "learning_rate": 1e-05, "loss": 1.0049, "step": 17510 }, { "epoch": 15.513728963684677, "grad_norm": 0.2837755084037781, "learning_rate": 1e-05, "loss": 0.9571, "step": 17515 }, { "epoch": 15.518157661647475, "grad_norm": 0.2888769805431366, "learning_rate": 1e-05, "loss": 0.9871, "step": 17520 }, { "epoch": 15.522586359610274, "grad_norm": 0.2791120111942291, "learning_rate": 1e-05, "loss": 1.0497, "step": 17525 }, { "epoch": 15.527015057573074, "grad_norm": 0.2765829563140869, "learning_rate": 1e-05, "loss": 1.0621, "step": 17530 }, { "epoch": 15.531443755535872, "grad_norm": 0.22506879270076752, "learning_rate": 1e-05, "loss": 1.0229, "step": 17535 }, { "epoch": 15.535872453498671, "grad_norm": 0.288679301738739, "learning_rate": 1e-05, "loss": 0.997, "step": 17540 }, { "epoch": 15.54030115146147, "grad_norm": 0.27534568309783936, "learning_rate": 1e-05, "loss": 0.9547, "step": 17545 }, { "epoch": 15.54472984942427, "grad_norm": 0.2309505194425583, "learning_rate": 1e-05, "loss": 0.9651, "step": 17550 }, { "epoch": 15.549158547387067, "grad_norm": 0.2653706967830658, "learning_rate": 1e-05, "loss": 0.9499, "step": 17555 }, { "epoch": 15.553587245349867, "grad_norm": 0.2603389322757721, "learning_rate": 1e-05, "loss": 1.0055, "step": 17560 }, { "epoch": 15.558015943312666, "grad_norm": 0.24747580289840698, "learning_rate": 1e-05, "loss": 0.9417, "step": 17565 }, { "epoch": 15.562444641275466, "grad_norm": 0.299694299697876, "learning_rate": 1e-05, "loss": 0.9676, "step": 17570 }, { "epoch": 15.566873339238263, "grad_norm": 0.2646327018737793, "learning_rate": 1e-05, "loss": 0.972, "step": 17575 }, { "epoch": 15.571302037201063, "grad_norm": 0.297442227602005, "learning_rate": 1e-05, "loss": 0.9215, "step": 17580 }, { "epoch": 15.575730735163862, "grad_norm": 0.24240759015083313, "learning_rate": 1e-05, "loss": 0.9837, "step": 17585 }, { "epoch": 15.58015943312666, "grad_norm": 0.25861620903015137, "learning_rate": 1e-05, "loss": 1.0223, "step": 17590 }, { "epoch": 15.58458813108946, "grad_norm": 0.36966672539711, "learning_rate": 1e-05, "loss": 0.9539, "step": 17595 }, { "epoch": 15.589016829052259, "grad_norm": 0.2738770544528961, "learning_rate": 1e-05, "loss": 0.9542, "step": 17600 }, { "epoch": 15.593445527015058, "grad_norm": 0.2926274240016937, "learning_rate": 1e-05, "loss": 1.0309, "step": 17605 }, { "epoch": 15.597874224977856, "grad_norm": 0.40111541748046875, "learning_rate": 1e-05, "loss": 1.011, "step": 17610 }, { "epoch": 15.602302922940655, "grad_norm": 0.3129487633705139, "learning_rate": 1e-05, "loss": 0.9798, "step": 17615 }, { "epoch": 15.606731620903455, "grad_norm": 0.3202768564224243, "learning_rate": 1e-05, "loss": 1.0215, "step": 17620 }, { "epoch": 15.611160318866254, "grad_norm": 0.27919039130210876, "learning_rate": 1e-05, "loss": 0.99, "step": 17625 }, { "epoch": 15.615589016829052, "grad_norm": 0.25771623849868774, "learning_rate": 1e-05, "loss": 0.9654, "step": 17630 }, { "epoch": 15.620017714791851, "grad_norm": 0.22565802931785583, "learning_rate": 1e-05, "loss": 1.0371, "step": 17635 }, { "epoch": 15.62444641275465, "grad_norm": 0.3081500232219696, "learning_rate": 1e-05, "loss": 1.0682, "step": 17640 }, { "epoch": 15.628875110717448, "grad_norm": 0.22506597638130188, "learning_rate": 1e-05, "loss": 1.0102, "step": 17645 }, { "epoch": 15.633303808680248, "grad_norm": 0.24768738448619843, "learning_rate": 1e-05, "loss": 0.9721, "step": 17650 }, { "epoch": 15.637732506643047, "grad_norm": 0.22748365998268127, "learning_rate": 1e-05, "loss": 0.9862, "step": 17655 }, { "epoch": 15.642161204605847, "grad_norm": 0.2742569148540497, "learning_rate": 1e-05, "loss": 0.9894, "step": 17660 }, { "epoch": 15.646589902568644, "grad_norm": 0.2463819533586502, "learning_rate": 1e-05, "loss": 0.9792, "step": 17665 }, { "epoch": 15.651018600531444, "grad_norm": 0.25562024116516113, "learning_rate": 1e-05, "loss": 0.9965, "step": 17670 }, { "epoch": 15.655447298494243, "grad_norm": 0.240044966340065, "learning_rate": 1e-05, "loss": 1.0127, "step": 17675 }, { "epoch": 15.65987599645704, "grad_norm": 0.29254019260406494, "learning_rate": 1e-05, "loss": 0.9259, "step": 17680 }, { "epoch": 15.66430469441984, "grad_norm": 0.3491929769515991, "learning_rate": 1e-05, "loss": 1.0331, "step": 17685 }, { "epoch": 15.66873339238264, "grad_norm": 0.22034764289855957, "learning_rate": 1e-05, "loss": 1.0106, "step": 17690 }, { "epoch": 15.67316209034544, "grad_norm": 0.21789421141147614, "learning_rate": 1e-05, "loss": 0.9326, "step": 17695 }, { "epoch": 15.677590788308237, "grad_norm": 0.2813005745410919, "learning_rate": 1e-05, "loss": 1.0044, "step": 17700 }, { "epoch": 15.682019486271036, "grad_norm": 0.2813015282154083, "learning_rate": 1e-05, "loss": 0.9706, "step": 17705 }, { "epoch": 15.686448184233836, "grad_norm": 0.22260402143001556, "learning_rate": 1e-05, "loss": 1.028, "step": 17710 }, { "epoch": 15.690876882196633, "grad_norm": 0.24874848127365112, "learning_rate": 1e-05, "loss": 1.0218, "step": 17715 }, { "epoch": 15.695305580159433, "grad_norm": 0.24613982439041138, "learning_rate": 1e-05, "loss": 1.0398, "step": 17720 }, { "epoch": 15.699734278122232, "grad_norm": 0.3024621605873108, "learning_rate": 1e-05, "loss": 1.0123, "step": 17725 }, { "epoch": 15.704162976085032, "grad_norm": 0.33095231652259827, "learning_rate": 1e-05, "loss": 0.9253, "step": 17730 }, { "epoch": 15.70859167404783, "grad_norm": 0.2548953890800476, "learning_rate": 1e-05, "loss": 0.9942, "step": 17735 }, { "epoch": 15.713020372010629, "grad_norm": 0.24033677577972412, "learning_rate": 1e-05, "loss": 1.0228, "step": 17740 }, { "epoch": 15.717449069973428, "grad_norm": 0.27227887511253357, "learning_rate": 1e-05, "loss": 0.9692, "step": 17745 }, { "epoch": 15.721877767936228, "grad_norm": 0.2598018944263458, "learning_rate": 1e-05, "loss": 0.9777, "step": 17750 }, { "epoch": 15.726306465899025, "grad_norm": 0.24490581452846527, "learning_rate": 1e-05, "loss": 0.9748, "step": 17755 }, { "epoch": 15.730735163861825, "grad_norm": 0.2886039614677429, "learning_rate": 1e-05, "loss": 0.9592, "step": 17760 }, { "epoch": 15.735163861824624, "grad_norm": 0.26948562264442444, "learning_rate": 1e-05, "loss": 0.9757, "step": 17765 }, { "epoch": 15.739592559787422, "grad_norm": 0.24950982630252838, "learning_rate": 1e-05, "loss": 0.9737, "step": 17770 }, { "epoch": 15.744021257750221, "grad_norm": 0.3059339225292206, "learning_rate": 1e-05, "loss": 0.9747, "step": 17775 }, { "epoch": 15.74844995571302, "grad_norm": 0.22925111651420593, "learning_rate": 1e-05, "loss": 0.9994, "step": 17780 }, { "epoch": 15.75287865367582, "grad_norm": 0.3430158495903015, "learning_rate": 1e-05, "loss": 0.9855, "step": 17785 }, { "epoch": 15.757307351638618, "grad_norm": 0.28006812930107117, "learning_rate": 1e-05, "loss": 1.0362, "step": 17790 }, { "epoch": 15.761736049601417, "grad_norm": 0.2532021105289459, "learning_rate": 1e-05, "loss": 1.0096, "step": 17795 }, { "epoch": 15.766164747564217, "grad_norm": 0.2364649623632431, "learning_rate": 1e-05, "loss": 1.0297, "step": 17800 }, { "epoch": 15.770593445527014, "grad_norm": 0.290894091129303, "learning_rate": 1e-05, "loss": 0.9639, "step": 17805 }, { "epoch": 15.775022143489814, "grad_norm": 0.29338327050209045, "learning_rate": 1e-05, "loss": 0.9676, "step": 17810 }, { "epoch": 15.779450841452613, "grad_norm": 0.2634062170982361, "learning_rate": 1e-05, "loss": 1.0205, "step": 17815 }, { "epoch": 15.783879539415413, "grad_norm": 0.25008612871170044, "learning_rate": 1e-05, "loss": 1.0432, "step": 17820 }, { "epoch": 15.78830823737821, "grad_norm": 0.21823376417160034, "learning_rate": 1e-05, "loss": 0.9933, "step": 17825 }, { "epoch": 15.79273693534101, "grad_norm": 0.24414171278476715, "learning_rate": 1e-05, "loss": 0.998, "step": 17830 }, { "epoch": 15.797165633303809, "grad_norm": 0.2700551152229309, "learning_rate": 1e-05, "loss": 1.0561, "step": 17835 }, { "epoch": 15.801594331266607, "grad_norm": 0.28298768401145935, "learning_rate": 1e-05, "loss": 0.9525, "step": 17840 }, { "epoch": 15.806023029229406, "grad_norm": 0.31701624393463135, "learning_rate": 1e-05, "loss": 1.0004, "step": 17845 }, { "epoch": 15.810451727192206, "grad_norm": 0.3518561124801636, "learning_rate": 1e-05, "loss": 0.9877, "step": 17850 }, { "epoch": 15.814880425155005, "grad_norm": 0.27521559596061707, "learning_rate": 1e-05, "loss": 0.9606, "step": 17855 }, { "epoch": 15.819309123117803, "grad_norm": 0.24579580128192902, "learning_rate": 1e-05, "loss": 0.9506, "step": 17860 }, { "epoch": 15.823737821080602, "grad_norm": 0.2322177141904831, "learning_rate": 1e-05, "loss": 1.0088, "step": 17865 }, { "epoch": 15.828166519043402, "grad_norm": 0.2954336106777191, "learning_rate": 1e-05, "loss": 0.997, "step": 17870 }, { "epoch": 15.832595217006201, "grad_norm": 0.24369779229164124, "learning_rate": 1e-05, "loss": 1.0155, "step": 17875 }, { "epoch": 15.837023914968999, "grad_norm": 0.26766467094421387, "learning_rate": 1e-05, "loss": 1.0079, "step": 17880 }, { "epoch": 15.841452612931798, "grad_norm": 0.3284904360771179, "learning_rate": 1e-05, "loss": 1.0274, "step": 17885 }, { "epoch": 15.845881310894598, "grad_norm": 0.3282492160797119, "learning_rate": 1e-05, "loss": 1.0249, "step": 17890 }, { "epoch": 15.850310008857395, "grad_norm": 0.31012794375419617, "learning_rate": 1e-05, "loss": 1.0046, "step": 17895 }, { "epoch": 15.854738706820195, "grad_norm": 0.23211978375911713, "learning_rate": 1e-05, "loss": 0.9563, "step": 17900 }, { "epoch": 15.859167404782994, "grad_norm": 0.30380022525787354, "learning_rate": 1e-05, "loss": 0.9925, "step": 17905 }, { "epoch": 15.863596102745793, "grad_norm": 0.25331732630729675, "learning_rate": 1e-05, "loss": 0.9953, "step": 17910 }, { "epoch": 15.868024800708591, "grad_norm": 0.2511480152606964, "learning_rate": 1e-05, "loss": 0.9998, "step": 17915 }, { "epoch": 15.87245349867139, "grad_norm": 0.2963160276412964, "learning_rate": 1e-05, "loss": 1.002, "step": 17920 }, { "epoch": 15.87688219663419, "grad_norm": 0.254862517118454, "learning_rate": 1e-05, "loss": 1.0235, "step": 17925 }, { "epoch": 15.88131089459699, "grad_norm": 0.2827042043209076, "learning_rate": 1e-05, "loss": 0.9846, "step": 17930 }, { "epoch": 15.885739592559787, "grad_norm": 0.2421710193157196, "learning_rate": 1e-05, "loss": 1.0139, "step": 17935 }, { "epoch": 15.890168290522587, "grad_norm": 0.2819615602493286, "learning_rate": 1e-05, "loss": 1.0099, "step": 17940 }, { "epoch": 15.894596988485386, "grad_norm": 0.3103281855583191, "learning_rate": 1e-05, "loss": 1.0219, "step": 17945 }, { "epoch": 15.899025686448184, "grad_norm": 0.25584855675697327, "learning_rate": 1e-05, "loss": 1.0027, "step": 17950 }, { "epoch": 15.903454384410983, "grad_norm": 0.24295596778392792, "learning_rate": 1e-05, "loss": 1.0319, "step": 17955 }, { "epoch": 15.907883082373782, "grad_norm": 0.2826685309410095, "learning_rate": 1e-05, "loss": 0.9682, "step": 17960 }, { "epoch": 15.91231178033658, "grad_norm": 0.29204365611076355, "learning_rate": 1e-05, "loss": 1.0179, "step": 17965 }, { "epoch": 15.91674047829938, "grad_norm": 0.2577894628047943, "learning_rate": 1e-05, "loss": 1.0475, "step": 17970 }, { "epoch": 15.921169176262179, "grad_norm": 0.24750283360481262, "learning_rate": 1e-05, "loss": 0.9679, "step": 17975 }, { "epoch": 15.925597874224978, "grad_norm": 0.2631934881210327, "learning_rate": 1e-05, "loss": 0.9769, "step": 17980 }, { "epoch": 15.930026572187776, "grad_norm": 0.3149290680885315, "learning_rate": 1e-05, "loss": 0.9981, "step": 17985 }, { "epoch": 15.934455270150576, "grad_norm": 0.3718342185020447, "learning_rate": 1e-05, "loss": 0.9984, "step": 17990 }, { "epoch": 15.938883968113375, "grad_norm": 0.2407686859369278, "learning_rate": 1e-05, "loss": 0.9805, "step": 17995 }, { "epoch": 15.943312666076174, "grad_norm": 0.34004491567611694, "learning_rate": 1e-05, "loss": 0.9418, "step": 18000 }, { "epoch": 15.947741364038972, "grad_norm": 0.25165820121765137, "learning_rate": 1e-05, "loss": 0.9425, "step": 18005 }, { "epoch": 15.952170062001771, "grad_norm": 0.25692176818847656, "learning_rate": 1e-05, "loss": 1.0023, "step": 18010 }, { "epoch": 15.956598759964571, "grad_norm": 0.26404044032096863, "learning_rate": 1e-05, "loss": 0.9928, "step": 18015 }, { "epoch": 15.961027457927369, "grad_norm": 0.2877795398235321, "learning_rate": 1e-05, "loss": 1.001, "step": 18020 }, { "epoch": 15.965456155890168, "grad_norm": 0.2511865198612213, "learning_rate": 1e-05, "loss": 1.0153, "step": 18025 }, { "epoch": 15.969884853852967, "grad_norm": 0.24652396142482758, "learning_rate": 1e-05, "loss": 1.0372, "step": 18030 }, { "epoch": 15.974313551815767, "grad_norm": 0.2523021697998047, "learning_rate": 1e-05, "loss": 1.0665, "step": 18035 }, { "epoch": 15.978742249778564, "grad_norm": 0.23684607446193695, "learning_rate": 1e-05, "loss": 0.9888, "step": 18040 }, { "epoch": 15.983170947741364, "grad_norm": 0.25142917037010193, "learning_rate": 1e-05, "loss": 0.9956, "step": 18045 }, { "epoch": 15.987599645704163, "grad_norm": 0.2796723246574402, "learning_rate": 1e-05, "loss": 0.9819, "step": 18050 }, { "epoch": 15.992028343666963, "grad_norm": 0.26551374793052673, "learning_rate": 1e-05, "loss": 1.0363, "step": 18055 }, { "epoch": 15.99645704162976, "grad_norm": 0.2703036963939667, "learning_rate": 1e-05, "loss": 0.9751, "step": 18060 }, { "epoch": 16.000885739592558, "grad_norm": 0.24998635053634644, "learning_rate": 1e-05, "loss": 0.9603, "step": 18065 }, { "epoch": 16.00531443755536, "grad_norm": 0.24102988839149475, "learning_rate": 1e-05, "loss": 1.0682, "step": 18070 }, { "epoch": 16.009743135518157, "grad_norm": 0.23604339361190796, "learning_rate": 1e-05, "loss": 0.9984, "step": 18075 }, { "epoch": 16.014171833480958, "grad_norm": 0.23942531645298004, "learning_rate": 1e-05, "loss": 0.9698, "step": 18080 }, { "epoch": 16.018600531443756, "grad_norm": 0.2519570291042328, "learning_rate": 1e-05, "loss": 0.9731, "step": 18085 }, { "epoch": 16.023029229406553, "grad_norm": 0.30462661385536194, "learning_rate": 1e-05, "loss": 1.0506, "step": 18090 }, { "epoch": 16.027457927369355, "grad_norm": 0.24723510444164276, "learning_rate": 1e-05, "loss": 1.0329, "step": 18095 }, { "epoch": 16.031886625332152, "grad_norm": 0.23702853918075562, "learning_rate": 1e-05, "loss": 1.0005, "step": 18100 }, { "epoch": 16.03631532329495, "grad_norm": 0.24404092133045197, "learning_rate": 1e-05, "loss": 1.0329, "step": 18105 }, { "epoch": 16.04074402125775, "grad_norm": 0.2478272020816803, "learning_rate": 1e-05, "loss": 1.0034, "step": 18110 }, { "epoch": 16.04517271922055, "grad_norm": 0.32819488644599915, "learning_rate": 1e-05, "loss": 0.9999, "step": 18115 }, { "epoch": 16.049601417183347, "grad_norm": 0.31817978620529175, "learning_rate": 1e-05, "loss": 0.9495, "step": 18120 }, { "epoch": 16.054030115146148, "grad_norm": 0.31144028902053833, "learning_rate": 1e-05, "loss": 0.9926, "step": 18125 }, { "epoch": 16.058458813108945, "grad_norm": 0.23166677355766296, "learning_rate": 1e-05, "loss": 1.0166, "step": 18130 }, { "epoch": 16.062887511071747, "grad_norm": 0.26017239689826965, "learning_rate": 1e-05, "loss": 1.0076, "step": 18135 }, { "epoch": 16.067316209034544, "grad_norm": 0.2920660078525543, "learning_rate": 1e-05, "loss": 0.9957, "step": 18140 }, { "epoch": 16.071744906997342, "grad_norm": 0.2695963382720947, "learning_rate": 1e-05, "loss": 1.0052, "step": 18145 }, { "epoch": 16.076173604960143, "grad_norm": 0.27189207077026367, "learning_rate": 1e-05, "loss": 0.9935, "step": 18150 }, { "epoch": 16.08060230292294, "grad_norm": 0.257854700088501, "learning_rate": 1e-05, "loss": 0.9786, "step": 18155 }, { "epoch": 16.08503100088574, "grad_norm": 0.31605926156044006, "learning_rate": 1e-05, "loss": 0.9979, "step": 18160 }, { "epoch": 16.08945969884854, "grad_norm": 0.2787530720233917, "learning_rate": 1e-05, "loss": 1.0505, "step": 18165 }, { "epoch": 16.093888396811337, "grad_norm": 0.33719149231910706, "learning_rate": 1e-05, "loss": 0.9692, "step": 18170 }, { "epoch": 16.098317094774135, "grad_norm": 0.32978686690330505, "learning_rate": 1e-05, "loss": 0.987, "step": 18175 }, { "epoch": 16.102745792736936, "grad_norm": 0.3089555501937866, "learning_rate": 1e-05, "loss": 0.9882, "step": 18180 }, { "epoch": 16.107174490699734, "grad_norm": 0.23977087438106537, "learning_rate": 1e-05, "loss": 0.9714, "step": 18185 }, { "epoch": 16.11160318866253, "grad_norm": 0.265188068151474, "learning_rate": 1e-05, "loss": 0.9376, "step": 18190 }, { "epoch": 16.116031886625333, "grad_norm": 0.2618181109428406, "learning_rate": 1e-05, "loss": 1.0382, "step": 18195 }, { "epoch": 16.12046058458813, "grad_norm": 0.21465323865413666, "learning_rate": 1e-05, "loss": 0.9823, "step": 18200 }, { "epoch": 16.12488928255093, "grad_norm": 0.2547129690647125, "learning_rate": 1e-05, "loss": 1.0187, "step": 18205 }, { "epoch": 16.12931798051373, "grad_norm": 0.2909153997898102, "learning_rate": 1e-05, "loss": 0.9749, "step": 18210 }, { "epoch": 16.133746678476527, "grad_norm": 0.2824973464012146, "learning_rate": 1e-05, "loss": 1.005, "step": 18215 }, { "epoch": 16.138175376439328, "grad_norm": 0.2601968050003052, "learning_rate": 1e-05, "loss": 0.9758, "step": 18220 }, { "epoch": 16.142604074402126, "grad_norm": 0.30577826499938965, "learning_rate": 1e-05, "loss": 1.0494, "step": 18225 }, { "epoch": 16.147032772364923, "grad_norm": 0.25143304467201233, "learning_rate": 1e-05, "loss": 0.9515, "step": 18230 }, { "epoch": 16.151461470327725, "grad_norm": 0.2998024523258209, "learning_rate": 1e-05, "loss": 1.0091, "step": 18235 }, { "epoch": 16.155890168290522, "grad_norm": 0.2602280080318451, "learning_rate": 1e-05, "loss": 1.0139, "step": 18240 }, { "epoch": 16.16031886625332, "grad_norm": 0.31770411133766174, "learning_rate": 1e-05, "loss": 1.0739, "step": 18245 }, { "epoch": 16.16474756421612, "grad_norm": 0.22414326667785645, "learning_rate": 1e-05, "loss": 1.0169, "step": 18250 }, { "epoch": 16.16917626217892, "grad_norm": 0.24530647695064545, "learning_rate": 1e-05, "loss": 0.9874, "step": 18255 }, { "epoch": 16.17360496014172, "grad_norm": 0.2370666265487671, "learning_rate": 1e-05, "loss": 0.9708, "step": 18260 }, { "epoch": 16.178033658104518, "grad_norm": 0.27960169315338135, "learning_rate": 1e-05, "loss": 1.0178, "step": 18265 }, { "epoch": 16.182462356067315, "grad_norm": 0.25410163402557373, "learning_rate": 1e-05, "loss": 1.0115, "step": 18270 }, { "epoch": 16.186891054030117, "grad_norm": 0.2481650412082672, "learning_rate": 1e-05, "loss": 0.9151, "step": 18275 }, { "epoch": 16.191319751992914, "grad_norm": 0.25486287474632263, "learning_rate": 1e-05, "loss": 0.9834, "step": 18280 }, { "epoch": 16.195748449955712, "grad_norm": 0.2724369764328003, "learning_rate": 1e-05, "loss": 0.958, "step": 18285 }, { "epoch": 16.200177147918513, "grad_norm": 0.34363308548927307, "learning_rate": 1e-05, "loss": 0.9681, "step": 18290 }, { "epoch": 16.20460584588131, "grad_norm": 0.3404398262500763, "learning_rate": 1e-05, "loss": 0.9804, "step": 18295 }, { "epoch": 16.20903454384411, "grad_norm": 0.2944444715976715, "learning_rate": 1e-05, "loss": 1.042, "step": 18300 }, { "epoch": 16.21346324180691, "grad_norm": 0.23392118513584137, "learning_rate": 1e-05, "loss": 1.0087, "step": 18305 }, { "epoch": 16.217891939769707, "grad_norm": 0.2643710970878601, "learning_rate": 1e-05, "loss": 1.0166, "step": 18310 }, { "epoch": 16.22232063773251, "grad_norm": 0.3132951855659485, "learning_rate": 1e-05, "loss": 1.0001, "step": 18315 }, { "epoch": 16.226749335695306, "grad_norm": 0.2576334476470947, "learning_rate": 1e-05, "loss": 0.9885, "step": 18320 }, { "epoch": 16.231178033658104, "grad_norm": 0.2385687530040741, "learning_rate": 1e-05, "loss": 0.9624, "step": 18325 }, { "epoch": 16.235606731620905, "grad_norm": 0.2157694697380066, "learning_rate": 1e-05, "loss": 1.0194, "step": 18330 }, { "epoch": 16.240035429583703, "grad_norm": 0.28696709871292114, "learning_rate": 1e-05, "loss": 0.9673, "step": 18335 }, { "epoch": 16.2444641275465, "grad_norm": 0.25031882524490356, "learning_rate": 1e-05, "loss": 1.052, "step": 18340 }, { "epoch": 16.2488928255093, "grad_norm": 0.2438434660434723, "learning_rate": 1e-05, "loss": 0.9669, "step": 18345 }, { "epoch": 16.2533215234721, "grad_norm": 0.2602662146091461, "learning_rate": 1e-05, "loss": 0.9846, "step": 18350 }, { "epoch": 16.257750221434897, "grad_norm": 0.24205753207206726, "learning_rate": 1e-05, "loss": 1.0015, "step": 18355 }, { "epoch": 16.262178919397698, "grad_norm": 0.30614742636680603, "learning_rate": 1e-05, "loss": 0.9978, "step": 18360 }, { "epoch": 16.266607617360496, "grad_norm": 0.2930641770362854, "learning_rate": 1e-05, "loss": 1.0013, "step": 18365 }, { "epoch": 16.271036315323293, "grad_norm": 0.2607276439666748, "learning_rate": 1e-05, "loss": 0.918, "step": 18370 }, { "epoch": 16.275465013286095, "grad_norm": 0.32311829924583435, "learning_rate": 1e-05, "loss": 1.031, "step": 18375 }, { "epoch": 16.279893711248892, "grad_norm": 0.2691332697868347, "learning_rate": 1e-05, "loss": 1.004, "step": 18380 }, { "epoch": 16.284322409211693, "grad_norm": 0.2581843435764313, "learning_rate": 1e-05, "loss": 1.0011, "step": 18385 }, { "epoch": 16.28875110717449, "grad_norm": 0.23530881106853485, "learning_rate": 1e-05, "loss": 0.9892, "step": 18390 }, { "epoch": 16.29317980513729, "grad_norm": 0.2343352735042572, "learning_rate": 1e-05, "loss": 0.9862, "step": 18395 }, { "epoch": 16.29760850310009, "grad_norm": 0.236292764544487, "learning_rate": 1e-05, "loss": 1.0618, "step": 18400 }, { "epoch": 16.302037201062888, "grad_norm": 0.2484918087720871, "learning_rate": 1e-05, "loss": 0.9735, "step": 18405 }, { "epoch": 16.306465899025685, "grad_norm": 0.2452331930398941, "learning_rate": 1e-05, "loss": 0.9398, "step": 18410 }, { "epoch": 16.310894596988486, "grad_norm": 0.23057523369789124, "learning_rate": 1e-05, "loss": 0.9716, "step": 18415 }, { "epoch": 16.315323294951284, "grad_norm": 0.27240073680877686, "learning_rate": 1e-05, "loss": 1.0333, "step": 18420 }, { "epoch": 16.31975199291408, "grad_norm": 0.24729584157466888, "learning_rate": 1e-05, "loss": 0.9728, "step": 18425 }, { "epoch": 16.324180690876883, "grad_norm": 0.2590065598487854, "learning_rate": 1e-05, "loss": 1.0326, "step": 18430 }, { "epoch": 16.32860938883968, "grad_norm": 0.21345050632953644, "learning_rate": 1e-05, "loss": 0.953, "step": 18435 }, { "epoch": 16.333038086802482, "grad_norm": 0.23559211194515228, "learning_rate": 1e-05, "loss": 1.0345, "step": 18440 }, { "epoch": 16.33746678476528, "grad_norm": 0.23431992530822754, "learning_rate": 1e-05, "loss": 0.9887, "step": 18445 }, { "epoch": 16.341895482728077, "grad_norm": 0.22062960267066956, "learning_rate": 1e-05, "loss": 1.082, "step": 18450 }, { "epoch": 16.34632418069088, "grad_norm": 0.2790276110172272, "learning_rate": 1e-05, "loss": 1.0059, "step": 18455 }, { "epoch": 16.350752878653676, "grad_norm": 0.2819046974182129, "learning_rate": 1e-05, "loss": 1.0389, "step": 18460 }, { "epoch": 16.355181576616474, "grad_norm": 0.25946441292762756, "learning_rate": 1e-05, "loss": 0.9684, "step": 18465 }, { "epoch": 16.359610274579275, "grad_norm": 0.3128257691860199, "learning_rate": 1e-05, "loss": 1.0208, "step": 18470 }, { "epoch": 16.364038972542073, "grad_norm": 0.28413891792297363, "learning_rate": 1e-05, "loss": 0.9461, "step": 18475 }, { "epoch": 16.36846767050487, "grad_norm": 0.27322763204574585, "learning_rate": 1e-05, "loss": 0.9638, "step": 18480 }, { "epoch": 16.37289636846767, "grad_norm": 0.28998783230781555, "learning_rate": 1e-05, "loss": 0.9748, "step": 18485 }, { "epoch": 16.37732506643047, "grad_norm": 0.2913134694099426, "learning_rate": 1e-05, "loss": 0.9783, "step": 18490 }, { "epoch": 16.381753764393267, "grad_norm": 0.226438969373703, "learning_rate": 1e-05, "loss": 1.0021, "step": 18495 }, { "epoch": 16.386182462356068, "grad_norm": 0.2568718194961548, "learning_rate": 1e-05, "loss": 0.9601, "step": 18500 }, { "epoch": 16.390611160318866, "grad_norm": 0.24007783830165863, "learning_rate": 1e-05, "loss": 0.9353, "step": 18505 }, { "epoch": 16.395039858281667, "grad_norm": 0.2564709782600403, "learning_rate": 1e-05, "loss": 0.9958, "step": 18510 }, { "epoch": 16.399468556244464, "grad_norm": 0.23964142799377441, "learning_rate": 1e-05, "loss": 0.9481, "step": 18515 }, { "epoch": 16.403897254207262, "grad_norm": 0.2868022322654724, "learning_rate": 1e-05, "loss": 1.0464, "step": 18520 }, { "epoch": 16.408325952170063, "grad_norm": 0.2683990001678467, "learning_rate": 1e-05, "loss": 0.986, "step": 18525 }, { "epoch": 16.41275465013286, "grad_norm": 0.27346548438072205, "learning_rate": 1e-05, "loss": 1.0497, "step": 18530 }, { "epoch": 16.41718334809566, "grad_norm": 0.2398541420698166, "learning_rate": 1e-05, "loss": 1.0379, "step": 18535 }, { "epoch": 16.42161204605846, "grad_norm": 0.2636696696281433, "learning_rate": 1e-05, "loss": 1.0247, "step": 18540 }, { "epoch": 16.426040744021257, "grad_norm": 0.2778176963329315, "learning_rate": 1e-05, "loss": 0.9807, "step": 18545 }, { "epoch": 16.430469441984055, "grad_norm": 0.24303869903087616, "learning_rate": 1e-05, "loss": 1.0015, "step": 18550 }, { "epoch": 16.434898139946856, "grad_norm": 0.21459044516086578, "learning_rate": 1e-05, "loss": 0.9856, "step": 18555 }, { "epoch": 16.439326837909654, "grad_norm": 0.2780698835849762, "learning_rate": 1e-05, "loss": 0.9955, "step": 18560 }, { "epoch": 16.443755535872455, "grad_norm": 0.211567685008049, "learning_rate": 1e-05, "loss": 0.9759, "step": 18565 }, { "epoch": 16.448184233835253, "grad_norm": 0.30449023842811584, "learning_rate": 1e-05, "loss": 1.0102, "step": 18570 }, { "epoch": 16.45261293179805, "grad_norm": 0.2632780373096466, "learning_rate": 1e-05, "loss": 1.0269, "step": 18575 }, { "epoch": 16.45704162976085, "grad_norm": 0.28514331579208374, "learning_rate": 1e-05, "loss": 0.9584, "step": 18580 }, { "epoch": 16.46147032772365, "grad_norm": 0.2687274217605591, "learning_rate": 1e-05, "loss": 0.9495, "step": 18585 }, { "epoch": 16.465899025686447, "grad_norm": 0.304848313331604, "learning_rate": 1e-05, "loss": 1.0013, "step": 18590 }, { "epoch": 16.47032772364925, "grad_norm": 0.25122809410095215, "learning_rate": 1e-05, "loss": 0.9672, "step": 18595 }, { "epoch": 16.474756421612046, "grad_norm": 0.22518500685691833, "learning_rate": 1e-05, "loss": 1.006, "step": 18600 }, { "epoch": 16.479185119574844, "grad_norm": 0.20703738927841187, "learning_rate": 1e-05, "loss": 1.026, "step": 18605 }, { "epoch": 16.483613817537645, "grad_norm": 0.2702414095401764, "learning_rate": 1e-05, "loss": 1.0283, "step": 18610 }, { "epoch": 16.488042515500442, "grad_norm": 0.2425641268491745, "learning_rate": 1e-05, "loss": 0.9814, "step": 18615 }, { "epoch": 16.49247121346324, "grad_norm": 0.2771373689174652, "learning_rate": 1e-05, "loss": 1.022, "step": 18620 }, { "epoch": 16.49689991142604, "grad_norm": 0.25770580768585205, "learning_rate": 1e-05, "loss": 1.0147, "step": 18625 }, { "epoch": 16.50132860938884, "grad_norm": 0.28298768401145935, "learning_rate": 1e-05, "loss": 0.9653, "step": 18630 }, { "epoch": 16.50575730735164, "grad_norm": 0.27143004536628723, "learning_rate": 1e-05, "loss": 0.973, "step": 18635 }, { "epoch": 16.510186005314438, "grad_norm": 0.25454238057136536, "learning_rate": 1e-05, "loss": 0.9518, "step": 18640 }, { "epoch": 16.514614703277235, "grad_norm": 0.2803010046482086, "learning_rate": 1e-05, "loss": 0.9668, "step": 18645 }, { "epoch": 16.519043401240037, "grad_norm": 0.27099743485450745, "learning_rate": 1e-05, "loss": 0.9923, "step": 18650 }, { "epoch": 16.523472099202834, "grad_norm": 0.3108830749988556, "learning_rate": 1e-05, "loss": 0.9792, "step": 18655 }, { "epoch": 16.527900797165632, "grad_norm": 0.24949656426906586, "learning_rate": 1e-05, "loss": 0.9778, "step": 18660 }, { "epoch": 16.532329495128433, "grad_norm": 0.28336501121520996, "learning_rate": 1e-05, "loss": 0.982, "step": 18665 }, { "epoch": 16.53675819309123, "grad_norm": 0.23413774371147156, "learning_rate": 1e-05, "loss": 1.0506, "step": 18670 }, { "epoch": 16.54118689105403, "grad_norm": 0.2716546952724457, "learning_rate": 1e-05, "loss": 1.0129, "step": 18675 }, { "epoch": 16.54561558901683, "grad_norm": 0.28943341970443726, "learning_rate": 1e-05, "loss": 0.9794, "step": 18680 }, { "epoch": 16.550044286979627, "grad_norm": 0.2586379945278168, "learning_rate": 1e-05, "loss": 0.9831, "step": 18685 }, { "epoch": 16.55447298494243, "grad_norm": 0.29518845677375793, "learning_rate": 1e-05, "loss": 1.0241, "step": 18690 }, { "epoch": 16.558901682905226, "grad_norm": 0.2944999039173126, "learning_rate": 1e-05, "loss": 0.9847, "step": 18695 }, { "epoch": 16.563330380868024, "grad_norm": 0.27595025300979614, "learning_rate": 1e-05, "loss": 1.0133, "step": 18700 }, { "epoch": 16.567759078830825, "grad_norm": 0.3008978068828583, "learning_rate": 1e-05, "loss": 1.0224, "step": 18705 }, { "epoch": 16.572187776793623, "grad_norm": 0.3106701672077179, "learning_rate": 1e-05, "loss": 1.0561, "step": 18710 }, { "epoch": 16.57661647475642, "grad_norm": 0.2804649770259857, "learning_rate": 1e-05, "loss": 0.9548, "step": 18715 }, { "epoch": 16.58104517271922, "grad_norm": 0.24391058087348938, "learning_rate": 1e-05, "loss": 1.0379, "step": 18720 }, { "epoch": 16.58547387068202, "grad_norm": 0.2532961368560791, "learning_rate": 1e-05, "loss": 1.0526, "step": 18725 }, { "epoch": 16.589902568644817, "grad_norm": 0.23720023036003113, "learning_rate": 1e-05, "loss": 0.9325, "step": 18730 }, { "epoch": 16.594331266607618, "grad_norm": 0.23242931067943573, "learning_rate": 1e-05, "loss": 1.006, "step": 18735 }, { "epoch": 16.598759964570416, "grad_norm": 0.27134084701538086, "learning_rate": 1e-05, "loss": 0.9619, "step": 18740 }, { "epoch": 16.603188662533213, "grad_norm": 0.26074808835983276, "learning_rate": 1e-05, "loss": 0.9432, "step": 18745 }, { "epoch": 16.607617360496015, "grad_norm": 0.23537185788154602, "learning_rate": 1e-05, "loss": 1.0086, "step": 18750 }, { "epoch": 16.612046058458812, "grad_norm": 0.27737313508987427, "learning_rate": 1e-05, "loss": 1.002, "step": 18755 }, { "epoch": 16.616474756421614, "grad_norm": 0.2559964656829834, "learning_rate": 1e-05, "loss": 0.9661, "step": 18760 }, { "epoch": 16.62090345438441, "grad_norm": 0.2800896167755127, "learning_rate": 1e-05, "loss": 0.9475, "step": 18765 }, { "epoch": 16.62533215234721, "grad_norm": 0.26467326283454895, "learning_rate": 1e-05, "loss": 0.9815, "step": 18770 }, { "epoch": 16.62976085031001, "grad_norm": 0.21582509577274323, "learning_rate": 1e-05, "loss": 0.9995, "step": 18775 }, { "epoch": 16.634189548272808, "grad_norm": 0.2690695822238922, "learning_rate": 1e-05, "loss": 1.0407, "step": 18780 }, { "epoch": 16.638618246235605, "grad_norm": 0.296164870262146, "learning_rate": 1e-05, "loss": 0.9292, "step": 18785 }, { "epoch": 16.643046944198407, "grad_norm": 0.2603996992111206, "learning_rate": 1e-05, "loss": 0.9651, "step": 18790 }, { "epoch": 16.647475642161204, "grad_norm": 0.2523553669452667, "learning_rate": 1e-05, "loss": 0.9699, "step": 18795 }, { "epoch": 16.651904340124002, "grad_norm": 0.221808522939682, "learning_rate": 1e-05, "loss": 0.9729, "step": 18800 }, { "epoch": 16.656333038086803, "grad_norm": 0.29286250472068787, "learning_rate": 1e-05, "loss": 0.9689, "step": 18805 }, { "epoch": 16.6607617360496, "grad_norm": 0.3606424033641815, "learning_rate": 1e-05, "loss": 1.0284, "step": 18810 }, { "epoch": 16.665190434012402, "grad_norm": 0.25356325507164, "learning_rate": 1e-05, "loss": 0.9757, "step": 18815 }, { "epoch": 16.6696191319752, "grad_norm": 0.2800666093826294, "learning_rate": 1e-05, "loss": 0.9713, "step": 18820 }, { "epoch": 16.674047829937997, "grad_norm": 0.23566178977489471, "learning_rate": 1e-05, "loss": 0.961, "step": 18825 }, { "epoch": 16.6784765279008, "grad_norm": 0.24341733753681183, "learning_rate": 1e-05, "loss": 0.9736, "step": 18830 }, { "epoch": 16.682905225863596, "grad_norm": 0.29197293519973755, "learning_rate": 1e-05, "loss": 0.9529, "step": 18835 }, { "epoch": 16.687333923826394, "grad_norm": 0.29102814197540283, "learning_rate": 1e-05, "loss": 1.0126, "step": 18840 }, { "epoch": 16.691762621789195, "grad_norm": 0.2886674404144287, "learning_rate": 1e-05, "loss": 1.0176, "step": 18845 }, { "epoch": 16.696191319751993, "grad_norm": 0.2126307338476181, "learning_rate": 1e-05, "loss": 0.9957, "step": 18850 }, { "epoch": 16.70062001771479, "grad_norm": 0.23335541784763336, "learning_rate": 1e-05, "loss": 1.0548, "step": 18855 }, { "epoch": 16.70504871567759, "grad_norm": 0.29025331139564514, "learning_rate": 1e-05, "loss": 0.9501, "step": 18860 }, { "epoch": 16.70947741364039, "grad_norm": 0.3079996407032013, "learning_rate": 1e-05, "loss": 1.0154, "step": 18865 }, { "epoch": 16.713906111603187, "grad_norm": 0.26167216897010803, "learning_rate": 1e-05, "loss": 1.0281, "step": 18870 }, { "epoch": 16.718334809565988, "grad_norm": 0.22602461278438568, "learning_rate": 1e-05, "loss": 1.0407, "step": 18875 }, { "epoch": 16.722763507528786, "grad_norm": 0.2575356364250183, "learning_rate": 1e-05, "loss": 0.9705, "step": 18880 }, { "epoch": 16.727192205491587, "grad_norm": 0.2534136474132538, "learning_rate": 1e-05, "loss": 1.0265, "step": 18885 }, { "epoch": 16.731620903454385, "grad_norm": 0.263756662607193, "learning_rate": 1e-05, "loss": 0.9564, "step": 18890 }, { "epoch": 16.736049601417182, "grad_norm": 0.2415543496608734, "learning_rate": 1e-05, "loss": 0.971, "step": 18895 }, { "epoch": 16.740478299379983, "grad_norm": 0.29941806197166443, "learning_rate": 1e-05, "loss": 0.9954, "step": 18900 }, { "epoch": 16.74490699734278, "grad_norm": 0.3705863356590271, "learning_rate": 1e-05, "loss": 0.9864, "step": 18905 }, { "epoch": 16.74933569530558, "grad_norm": 0.28699252009391785, "learning_rate": 1e-05, "loss": 1.0118, "step": 18910 }, { "epoch": 16.75376439326838, "grad_norm": 0.27103716135025024, "learning_rate": 1e-05, "loss": 1.0087, "step": 18915 }, { "epoch": 16.758193091231178, "grad_norm": 0.29779547452926636, "learning_rate": 1e-05, "loss": 0.9969, "step": 18920 }, { "epoch": 16.762621789193975, "grad_norm": 0.2174983024597168, "learning_rate": 1e-05, "loss": 1.0289, "step": 18925 }, { "epoch": 16.767050487156776, "grad_norm": 0.24685895442962646, "learning_rate": 1e-05, "loss": 0.9687, "step": 18930 }, { "epoch": 16.771479185119574, "grad_norm": 0.25331911444664, "learning_rate": 1e-05, "loss": 0.986, "step": 18935 }, { "epoch": 16.775907883082375, "grad_norm": 0.23541639745235443, "learning_rate": 1e-05, "loss": 0.9993, "step": 18940 }, { "epoch": 16.780336581045173, "grad_norm": 0.2841220796108246, "learning_rate": 1e-05, "loss": 1.0241, "step": 18945 }, { "epoch": 16.78476527900797, "grad_norm": 0.32827258110046387, "learning_rate": 1e-05, "loss": 0.9921, "step": 18950 }, { "epoch": 16.789193976970772, "grad_norm": 0.24203045666217804, "learning_rate": 1e-05, "loss": 0.9989, "step": 18955 }, { "epoch": 16.79362267493357, "grad_norm": 0.2676389217376709, "learning_rate": 1e-05, "loss": 1.007, "step": 18960 }, { "epoch": 16.798051372896367, "grad_norm": 0.24541980028152466, "learning_rate": 1e-05, "loss": 0.9976, "step": 18965 }, { "epoch": 16.80248007085917, "grad_norm": 0.24969732761383057, "learning_rate": 1e-05, "loss": 0.957, "step": 18970 }, { "epoch": 16.806908768821966, "grad_norm": 0.2652243375778198, "learning_rate": 1e-05, "loss": 0.9153, "step": 18975 }, { "epoch": 16.811337466784764, "grad_norm": 0.2739236056804657, "learning_rate": 1e-05, "loss": 1.0059, "step": 18980 }, { "epoch": 16.815766164747565, "grad_norm": 0.3239922523498535, "learning_rate": 1e-05, "loss": 1.0515, "step": 18985 }, { "epoch": 16.820194862710363, "grad_norm": 0.24756105244159698, "learning_rate": 1e-05, "loss": 0.9746, "step": 18990 }, { "epoch": 16.824623560673164, "grad_norm": 0.2689042091369629, "learning_rate": 1e-05, "loss": 0.981, "step": 18995 }, { "epoch": 16.82905225863596, "grad_norm": 0.2473742961883545, "learning_rate": 1e-05, "loss": 0.9806, "step": 19000 }, { "epoch": 16.83348095659876, "grad_norm": 0.2622603178024292, "learning_rate": 1e-05, "loss": 0.9211, "step": 19005 }, { "epoch": 16.83790965456156, "grad_norm": 0.3478242754936218, "learning_rate": 1e-05, "loss": 1.0095, "step": 19010 }, { "epoch": 16.842338352524358, "grad_norm": 0.2884719669818878, "learning_rate": 1e-05, "loss": 0.9558, "step": 19015 }, { "epoch": 16.846767050487156, "grad_norm": 0.23757636547088623, "learning_rate": 1e-05, "loss": 1.0145, "step": 19020 }, { "epoch": 16.851195748449957, "grad_norm": 0.25557246804237366, "learning_rate": 1e-05, "loss": 1.0049, "step": 19025 }, { "epoch": 16.855624446412754, "grad_norm": 0.24929285049438477, "learning_rate": 1e-05, "loss": 1.0502, "step": 19030 }, { "epoch": 16.860053144375552, "grad_norm": 0.2467043548822403, "learning_rate": 1e-05, "loss": 0.9823, "step": 19035 }, { "epoch": 16.864481842338353, "grad_norm": 0.24875454604625702, "learning_rate": 1e-05, "loss": 0.994, "step": 19040 }, { "epoch": 16.86891054030115, "grad_norm": 0.24771851301193237, "learning_rate": 1e-05, "loss": 1.0212, "step": 19045 }, { "epoch": 16.873339238263952, "grad_norm": 0.2596139907836914, "learning_rate": 1e-05, "loss": 1.036, "step": 19050 }, { "epoch": 16.87776793622675, "grad_norm": 0.2807680070400238, "learning_rate": 1e-05, "loss": 0.9942, "step": 19055 }, { "epoch": 16.882196634189548, "grad_norm": 0.25339943170547485, "learning_rate": 1e-05, "loss": 1.0032, "step": 19060 }, { "epoch": 16.88662533215235, "grad_norm": 0.29721444845199585, "learning_rate": 1e-05, "loss": 0.9834, "step": 19065 }, { "epoch": 16.891054030115146, "grad_norm": 0.2381032556295395, "learning_rate": 1e-05, "loss": 0.9478, "step": 19070 }, { "epoch": 16.895482728077944, "grad_norm": 0.2421633005142212, "learning_rate": 1e-05, "loss": 1.0129, "step": 19075 }, { "epoch": 16.899911426040745, "grad_norm": 0.3250364065170288, "learning_rate": 1e-05, "loss": 1.0199, "step": 19080 }, { "epoch": 16.904340124003543, "grad_norm": 0.2659304141998291, "learning_rate": 1e-05, "loss": 0.9871, "step": 19085 }, { "epoch": 16.90876882196634, "grad_norm": 0.3063162565231323, "learning_rate": 1e-05, "loss": 1.0085, "step": 19090 }, { "epoch": 16.913197519929142, "grad_norm": 0.38583600521087646, "learning_rate": 1e-05, "loss": 1.0124, "step": 19095 }, { "epoch": 16.91762621789194, "grad_norm": 0.2514832615852356, "learning_rate": 1e-05, "loss": 0.9695, "step": 19100 }, { "epoch": 16.922054915854737, "grad_norm": 0.2967970371246338, "learning_rate": 1e-05, "loss": 0.994, "step": 19105 }, { "epoch": 16.92648361381754, "grad_norm": 0.2786117494106293, "learning_rate": 1e-05, "loss": 0.9845, "step": 19110 }, { "epoch": 16.930912311780336, "grad_norm": 0.2897469103336334, "learning_rate": 1e-05, "loss": 0.9879, "step": 19115 }, { "epoch": 16.935341009743137, "grad_norm": 0.25544604659080505, "learning_rate": 1e-05, "loss": 0.996, "step": 19120 }, { "epoch": 16.939769707705935, "grad_norm": 0.2646866738796234, "learning_rate": 1e-05, "loss": 0.9794, "step": 19125 }, { "epoch": 16.944198405668732, "grad_norm": 0.28695422410964966, "learning_rate": 1e-05, "loss": 0.9766, "step": 19130 }, { "epoch": 16.948627103631534, "grad_norm": 0.22553794085979462, "learning_rate": 1e-05, "loss": 1.0261, "step": 19135 }, { "epoch": 16.95305580159433, "grad_norm": 0.2556069493293762, "learning_rate": 1e-05, "loss": 0.9643, "step": 19140 }, { "epoch": 16.95748449955713, "grad_norm": 0.24725207686424255, "learning_rate": 1e-05, "loss": 1.0017, "step": 19145 }, { "epoch": 16.96191319751993, "grad_norm": 0.2623295485973358, "learning_rate": 1e-05, "loss": 1.001, "step": 19150 }, { "epoch": 16.966341895482728, "grad_norm": 0.2691655158996582, "learning_rate": 1e-05, "loss": 0.9483, "step": 19155 }, { "epoch": 16.970770593445526, "grad_norm": 0.25209763646125793, "learning_rate": 1e-05, "loss": 1.0013, "step": 19160 }, { "epoch": 16.975199291408327, "grad_norm": 0.23078106343746185, "learning_rate": 1e-05, "loss": 0.9544, "step": 19165 }, { "epoch": 16.979627989371124, "grad_norm": 0.2285294383764267, "learning_rate": 1e-05, "loss": 0.954, "step": 19170 }, { "epoch": 16.984056687333926, "grad_norm": 0.2527830898761749, "learning_rate": 1e-05, "loss": 0.9796, "step": 19175 }, { "epoch": 16.988485385296723, "grad_norm": 0.26528918743133545, "learning_rate": 1e-05, "loss": 0.9862, "step": 19180 }, { "epoch": 16.99291408325952, "grad_norm": 0.27150407433509827, "learning_rate": 1e-05, "loss": 1.0044, "step": 19185 }, { "epoch": 16.997342781222322, "grad_norm": 0.2638486325740814, "learning_rate": 1e-05, "loss": 1.0138, "step": 19190 }, { "epoch": 17.00177147918512, "grad_norm": 0.27760282158851624, "learning_rate": 1e-05, "loss": 0.972, "step": 19195 }, { "epoch": 17.006200177147917, "grad_norm": 0.28149768710136414, "learning_rate": 1e-05, "loss": 0.9976, "step": 19200 }, { "epoch": 17.01062887511072, "grad_norm": 0.23505602777004242, "learning_rate": 1e-05, "loss": 0.9786, "step": 19205 }, { "epoch": 17.015057573073516, "grad_norm": 0.2850729823112488, "learning_rate": 1e-05, "loss": 1.0078, "step": 19210 }, { "epoch": 17.019486271036314, "grad_norm": 0.24557830393314362, "learning_rate": 1e-05, "loss": 1.0134, "step": 19215 }, { "epoch": 17.023914968999115, "grad_norm": 0.29252949357032776, "learning_rate": 1e-05, "loss": 1.0053, "step": 19220 }, { "epoch": 17.028343666961913, "grad_norm": 0.2909540832042694, "learning_rate": 1e-05, "loss": 0.9531, "step": 19225 }, { "epoch": 17.03277236492471, "grad_norm": 0.25772130489349365, "learning_rate": 1e-05, "loss": 0.9543, "step": 19230 }, { "epoch": 17.03720106288751, "grad_norm": 0.2648557424545288, "learning_rate": 1e-05, "loss": 1.0142, "step": 19235 }, { "epoch": 17.04162976085031, "grad_norm": 0.23941372334957123, "learning_rate": 1e-05, "loss": 1.0099, "step": 19240 }, { "epoch": 17.04605845881311, "grad_norm": 0.24207670986652374, "learning_rate": 1e-05, "loss": 0.9522, "step": 19245 }, { "epoch": 17.050487156775908, "grad_norm": 0.2905253469944, "learning_rate": 1e-05, "loss": 1.0249, "step": 19250 }, { "epoch": 17.054915854738706, "grad_norm": 0.257564514875412, "learning_rate": 1e-05, "loss": 0.9777, "step": 19255 }, { "epoch": 17.059344552701507, "grad_norm": 0.25863656401634216, "learning_rate": 1e-05, "loss": 1.0196, "step": 19260 }, { "epoch": 17.063773250664305, "grad_norm": 0.2568065822124481, "learning_rate": 1e-05, "loss": 1.0127, "step": 19265 }, { "epoch": 17.068201948627102, "grad_norm": 0.23939284682273865, "learning_rate": 1e-05, "loss": 0.9126, "step": 19270 }, { "epoch": 17.072630646589904, "grad_norm": 0.2636503577232361, "learning_rate": 1e-05, "loss": 0.9575, "step": 19275 }, { "epoch": 17.0770593445527, "grad_norm": 0.2507743239402771, "learning_rate": 1e-05, "loss": 0.9913, "step": 19280 }, { "epoch": 17.0814880425155, "grad_norm": 0.2822876572608948, "learning_rate": 1e-05, "loss": 0.9992, "step": 19285 }, { "epoch": 17.0859167404783, "grad_norm": 0.22202524542808533, "learning_rate": 1e-05, "loss": 1.0189, "step": 19290 }, { "epoch": 17.090345438441098, "grad_norm": 0.29415300488471985, "learning_rate": 1e-05, "loss": 1.0393, "step": 19295 }, { "epoch": 17.0947741364039, "grad_norm": 0.2532123923301697, "learning_rate": 1e-05, "loss": 1.0052, "step": 19300 }, { "epoch": 17.099202834366697, "grad_norm": 0.26051363348960876, "learning_rate": 1e-05, "loss": 1.0206, "step": 19305 }, { "epoch": 17.103631532329494, "grad_norm": 0.22066263854503632, "learning_rate": 1e-05, "loss": 0.993, "step": 19310 }, { "epoch": 17.108060230292296, "grad_norm": 0.2342502772808075, "learning_rate": 1e-05, "loss": 0.9691, "step": 19315 }, { "epoch": 17.112488928255093, "grad_norm": 0.24816593527793884, "learning_rate": 1e-05, "loss": 0.9463, "step": 19320 }, { "epoch": 17.11691762621789, "grad_norm": 0.24889181554317474, "learning_rate": 1e-05, "loss": 1.0043, "step": 19325 }, { "epoch": 17.121346324180692, "grad_norm": 0.2589760720729828, "learning_rate": 1e-05, "loss": 0.9937, "step": 19330 }, { "epoch": 17.12577502214349, "grad_norm": 0.27571389079093933, "learning_rate": 1e-05, "loss": 0.9693, "step": 19335 }, { "epoch": 17.130203720106287, "grad_norm": 0.28763172030448914, "learning_rate": 1e-05, "loss": 0.9682, "step": 19340 }, { "epoch": 17.13463241806909, "grad_norm": 0.2309671938419342, "learning_rate": 1e-05, "loss": 0.9929, "step": 19345 }, { "epoch": 17.139061116031886, "grad_norm": 0.27404001355171204, "learning_rate": 1e-05, "loss": 1.0302, "step": 19350 }, { "epoch": 17.143489813994684, "grad_norm": 0.2963801324367523, "learning_rate": 1e-05, "loss": 1.0321, "step": 19355 }, { "epoch": 17.147918511957485, "grad_norm": 0.264787882566452, "learning_rate": 1e-05, "loss": 1.0142, "step": 19360 }, { "epoch": 17.152347209920283, "grad_norm": 0.28650131821632385, "learning_rate": 1e-05, "loss": 0.9811, "step": 19365 }, { "epoch": 17.156775907883084, "grad_norm": 0.23257899284362793, "learning_rate": 1e-05, "loss": 0.9645, "step": 19370 }, { "epoch": 17.16120460584588, "grad_norm": 0.26733118295669556, "learning_rate": 1e-05, "loss": 0.9539, "step": 19375 }, { "epoch": 17.16563330380868, "grad_norm": 0.22564467787742615, "learning_rate": 1e-05, "loss": 0.962, "step": 19380 }, { "epoch": 17.17006200177148, "grad_norm": 0.22003979980945587, "learning_rate": 1e-05, "loss": 1.0137, "step": 19385 }, { "epoch": 17.174490699734278, "grad_norm": 0.26003363728523254, "learning_rate": 1e-05, "loss": 0.9998, "step": 19390 }, { "epoch": 17.178919397697076, "grad_norm": 0.31560972332954407, "learning_rate": 1e-05, "loss": 1.0133, "step": 19395 }, { "epoch": 17.183348095659877, "grad_norm": 0.24204380810260773, "learning_rate": 1e-05, "loss": 1.0215, "step": 19400 }, { "epoch": 17.187776793622675, "grad_norm": 0.282644659280777, "learning_rate": 1e-05, "loss": 0.9446, "step": 19405 }, { "epoch": 17.192205491585472, "grad_norm": 0.22840814292430878, "learning_rate": 1e-05, "loss": 0.9626, "step": 19410 }, { "epoch": 17.196634189548273, "grad_norm": 0.30209341645240784, "learning_rate": 1e-05, "loss": 0.9701, "step": 19415 }, { "epoch": 17.20106288751107, "grad_norm": 0.27143368124961853, "learning_rate": 1e-05, "loss": 1.045, "step": 19420 }, { "epoch": 17.205491585473872, "grad_norm": 0.2402728646993637, "learning_rate": 1e-05, "loss": 1.0028, "step": 19425 }, { "epoch": 17.20992028343667, "grad_norm": 0.27113693952560425, "learning_rate": 1e-05, "loss": 0.9895, "step": 19430 }, { "epoch": 17.214348981399468, "grad_norm": 0.23963776230812073, "learning_rate": 1e-05, "loss": 1.0443, "step": 19435 }, { "epoch": 17.21877767936227, "grad_norm": 0.2239329218864441, "learning_rate": 1e-05, "loss": 1.0159, "step": 19440 }, { "epoch": 17.223206377325067, "grad_norm": 0.2643023133277893, "learning_rate": 1e-05, "loss": 1.0324, "step": 19445 }, { "epoch": 17.227635075287864, "grad_norm": 0.2785678505897522, "learning_rate": 1e-05, "loss": 0.9936, "step": 19450 }, { "epoch": 17.232063773250665, "grad_norm": 0.3162379860877991, "learning_rate": 1e-05, "loss": 0.9711, "step": 19455 }, { "epoch": 17.236492471213463, "grad_norm": 0.29354482889175415, "learning_rate": 1e-05, "loss": 1.0262, "step": 19460 }, { "epoch": 17.24092116917626, "grad_norm": 0.27754074335098267, "learning_rate": 1e-05, "loss": 0.9782, "step": 19465 }, { "epoch": 17.245349867139062, "grad_norm": 0.238135427236557, "learning_rate": 1e-05, "loss": 1.0372, "step": 19470 }, { "epoch": 17.24977856510186, "grad_norm": 0.23985199630260468, "learning_rate": 1e-05, "loss": 1.012, "step": 19475 }, { "epoch": 17.254207263064657, "grad_norm": 0.30725282430648804, "learning_rate": 1e-05, "loss": 0.9992, "step": 19480 }, { "epoch": 17.25863596102746, "grad_norm": 0.2880932092666626, "learning_rate": 1e-05, "loss": 0.9485, "step": 19485 }, { "epoch": 17.263064658990256, "grad_norm": 0.27494096755981445, "learning_rate": 1e-05, "loss": 0.9809, "step": 19490 }, { "epoch": 17.267493356953057, "grad_norm": 0.26676058769226074, "learning_rate": 1e-05, "loss": 1.0158, "step": 19495 }, { "epoch": 17.271922054915855, "grad_norm": 0.2333136647939682, "learning_rate": 1e-05, "loss": 1.0028, "step": 19500 }, { "epoch": 17.276350752878653, "grad_norm": 0.23762977123260498, "learning_rate": 1e-05, "loss": 1.0069, "step": 19505 }, { "epoch": 17.280779450841454, "grad_norm": 0.2730015218257904, "learning_rate": 1e-05, "loss": 0.9937, "step": 19510 }, { "epoch": 17.28520814880425, "grad_norm": 0.26295778155326843, "learning_rate": 1e-05, "loss": 0.9605, "step": 19515 }, { "epoch": 17.28963684676705, "grad_norm": 0.29106032848358154, "learning_rate": 1e-05, "loss": 1.0345, "step": 19520 }, { "epoch": 17.29406554472985, "grad_norm": 0.288153737783432, "learning_rate": 1e-05, "loss": 0.9568, "step": 19525 }, { "epoch": 17.298494242692648, "grad_norm": 0.250797837972641, "learning_rate": 1e-05, "loss": 0.9976, "step": 19530 }, { "epoch": 17.302922940655446, "grad_norm": 0.22978240251541138, "learning_rate": 1e-05, "loss": 0.9548, "step": 19535 }, { "epoch": 17.307351638618247, "grad_norm": 0.26314058899879456, "learning_rate": 1e-05, "loss": 0.9882, "step": 19540 }, { "epoch": 17.311780336581045, "grad_norm": 0.24025462567806244, "learning_rate": 1e-05, "loss": 0.9878, "step": 19545 }, { "epoch": 17.316209034543846, "grad_norm": 0.25958991050720215, "learning_rate": 1e-05, "loss": 1.005, "step": 19550 }, { "epoch": 17.320637732506643, "grad_norm": 0.25435057282447815, "learning_rate": 1e-05, "loss": 0.9473, "step": 19555 }, { "epoch": 17.32506643046944, "grad_norm": 0.22832071781158447, "learning_rate": 1e-05, "loss": 1.0271, "step": 19560 }, { "epoch": 17.329495128432242, "grad_norm": 0.285186767578125, "learning_rate": 1e-05, "loss": 0.9618, "step": 19565 }, { "epoch": 17.33392382639504, "grad_norm": 0.27580833435058594, "learning_rate": 1e-05, "loss": 0.9454, "step": 19570 }, { "epoch": 17.338352524357838, "grad_norm": 0.22905929386615753, "learning_rate": 1e-05, "loss": 1.0011, "step": 19575 }, { "epoch": 17.34278122232064, "grad_norm": 0.28417590260505676, "learning_rate": 1e-05, "loss": 1.0055, "step": 19580 }, { "epoch": 17.347209920283436, "grad_norm": 0.21974067389965057, "learning_rate": 1e-05, "loss": 1.0035, "step": 19585 }, { "epoch": 17.351638618246234, "grad_norm": 0.2521921396255493, "learning_rate": 1e-05, "loss": 0.9635, "step": 19590 }, { "epoch": 17.356067316209035, "grad_norm": 0.2822516858577728, "learning_rate": 1e-05, "loss": 1.015, "step": 19595 }, { "epoch": 17.360496014171833, "grad_norm": 0.2621997892856598, "learning_rate": 1e-05, "loss": 1.026, "step": 19600 }, { "epoch": 17.36492471213463, "grad_norm": 0.2842501401901245, "learning_rate": 1e-05, "loss": 1.0336, "step": 19605 }, { "epoch": 17.369353410097432, "grad_norm": 0.2468596249818802, "learning_rate": 1e-05, "loss": 0.963, "step": 19610 }, { "epoch": 17.37378210806023, "grad_norm": 0.24540621042251587, "learning_rate": 1e-05, "loss": 0.9943, "step": 19615 }, { "epoch": 17.37821080602303, "grad_norm": 0.2523883581161499, "learning_rate": 1e-05, "loss": 1.0139, "step": 19620 }, { "epoch": 17.38263950398583, "grad_norm": 0.23667879402637482, "learning_rate": 1e-05, "loss": 0.9763, "step": 19625 }, { "epoch": 17.387068201948626, "grad_norm": 0.3174433410167694, "learning_rate": 1e-05, "loss": 0.9675, "step": 19630 }, { "epoch": 17.391496899911427, "grad_norm": 0.292094886302948, "learning_rate": 1e-05, "loss": 0.9927, "step": 19635 }, { "epoch": 17.395925597874225, "grad_norm": 0.3052208721637726, "learning_rate": 1e-05, "loss": 0.9858, "step": 19640 }, { "epoch": 17.400354295837023, "grad_norm": 0.3293698728084564, "learning_rate": 1e-05, "loss": 0.9289, "step": 19645 }, { "epoch": 17.404782993799824, "grad_norm": 0.28416889905929565, "learning_rate": 1e-05, "loss": 1.0463, "step": 19650 }, { "epoch": 17.40921169176262, "grad_norm": 0.264961302280426, "learning_rate": 1e-05, "loss": 1.0473, "step": 19655 }, { "epoch": 17.41364038972542, "grad_norm": 0.23802722990512848, "learning_rate": 1e-05, "loss": 0.9646, "step": 19660 }, { "epoch": 17.41806908768822, "grad_norm": 0.3156811594963074, "learning_rate": 1e-05, "loss": 1.0168, "step": 19665 }, { "epoch": 17.422497785651018, "grad_norm": 0.3185417056083679, "learning_rate": 1e-05, "loss": 0.9966, "step": 19670 }, { "epoch": 17.42692648361382, "grad_norm": 0.25376373529434204, "learning_rate": 1e-05, "loss": 0.9743, "step": 19675 }, { "epoch": 17.431355181576617, "grad_norm": 0.28718098998069763, "learning_rate": 1e-05, "loss": 1.0282, "step": 19680 }, { "epoch": 17.435783879539414, "grad_norm": 0.23882503807544708, "learning_rate": 1e-05, "loss": 0.9981, "step": 19685 }, { "epoch": 17.440212577502216, "grad_norm": 0.2596502900123596, "learning_rate": 1e-05, "loss": 0.9496, "step": 19690 }, { "epoch": 17.444641275465013, "grad_norm": 0.25458356738090515, "learning_rate": 1e-05, "loss": 0.9786, "step": 19695 }, { "epoch": 17.44906997342781, "grad_norm": 0.24817614257335663, "learning_rate": 1e-05, "loss": 0.9872, "step": 19700 }, { "epoch": 17.453498671390612, "grad_norm": 0.23650948703289032, "learning_rate": 1e-05, "loss": 0.9906, "step": 19705 }, { "epoch": 17.45792736935341, "grad_norm": 0.24727743864059448, "learning_rate": 1e-05, "loss": 1.03, "step": 19710 }, { "epoch": 17.462356067316207, "grad_norm": 0.26096096634864807, "learning_rate": 1e-05, "loss": 1.0307, "step": 19715 }, { "epoch": 17.46678476527901, "grad_norm": 0.23782479763031006, "learning_rate": 1e-05, "loss": 1.0255, "step": 19720 }, { "epoch": 17.471213463241806, "grad_norm": 0.26561567187309265, "learning_rate": 1e-05, "loss": 1.0538, "step": 19725 }, { "epoch": 17.475642161204608, "grad_norm": 0.2778032720088959, "learning_rate": 1e-05, "loss": 0.9664, "step": 19730 }, { "epoch": 17.480070859167405, "grad_norm": 0.262604683637619, "learning_rate": 1e-05, "loss": 0.978, "step": 19735 }, { "epoch": 17.484499557130203, "grad_norm": 0.2597878575325012, "learning_rate": 1e-05, "loss": 0.9593, "step": 19740 }, { "epoch": 17.488928255093004, "grad_norm": 0.35033324360847473, "learning_rate": 1e-05, "loss": 0.9488, "step": 19745 }, { "epoch": 17.4933569530558, "grad_norm": 0.2768417000770569, "learning_rate": 1e-05, "loss": 0.9541, "step": 19750 }, { "epoch": 17.4977856510186, "grad_norm": 0.25707241892814636, "learning_rate": 1e-05, "loss": 0.9985, "step": 19755 }, { "epoch": 17.5022143489814, "grad_norm": 0.2521880269050598, "learning_rate": 1e-05, "loss": 1.012, "step": 19760 }, { "epoch": 17.5066430469442, "grad_norm": 0.3279540240764618, "learning_rate": 1e-05, "loss": 0.981, "step": 19765 }, { "epoch": 17.511071744906996, "grad_norm": 0.25486913323402405, "learning_rate": 1e-05, "loss": 1.0453, "step": 19770 }, { "epoch": 17.515500442869797, "grad_norm": 0.26148685812950134, "learning_rate": 1e-05, "loss": 0.9732, "step": 19775 }, { "epoch": 17.519929140832595, "grad_norm": 0.3092002868652344, "learning_rate": 1e-05, "loss": 0.9728, "step": 19780 }, { "epoch": 17.524357838795392, "grad_norm": 0.21546992659568787, "learning_rate": 1e-05, "loss": 1.0158, "step": 19785 }, { "epoch": 17.528786536758194, "grad_norm": 0.258698970079422, "learning_rate": 1e-05, "loss": 1.0127, "step": 19790 }, { "epoch": 17.53321523472099, "grad_norm": 0.26470106840133667, "learning_rate": 1e-05, "loss": 1.0045, "step": 19795 }, { "epoch": 17.537643932683793, "grad_norm": 0.29198598861694336, "learning_rate": 1e-05, "loss": 0.9562, "step": 19800 }, { "epoch": 17.54207263064659, "grad_norm": 0.3538688123226166, "learning_rate": 1e-05, "loss": 0.9671, "step": 19805 }, { "epoch": 17.546501328609388, "grad_norm": 0.3227647542953491, "learning_rate": 1e-05, "loss": 0.988, "step": 19810 }, { "epoch": 17.55093002657219, "grad_norm": 0.2500825822353363, "learning_rate": 1e-05, "loss": 0.9674, "step": 19815 }, { "epoch": 17.555358724534987, "grad_norm": 0.25851163268089294, "learning_rate": 1e-05, "loss": 0.9867, "step": 19820 }, { "epoch": 17.559787422497784, "grad_norm": 0.3146398663520813, "learning_rate": 1e-05, "loss": 1.0418, "step": 19825 }, { "epoch": 17.564216120460586, "grad_norm": 0.3041551113128662, "learning_rate": 1e-05, "loss": 1.0077, "step": 19830 }, { "epoch": 17.568644818423383, "grad_norm": 0.2577322721481323, "learning_rate": 1e-05, "loss": 0.9893, "step": 19835 }, { "epoch": 17.57307351638618, "grad_norm": 0.2891632616519928, "learning_rate": 1e-05, "loss": 0.9939, "step": 19840 }, { "epoch": 17.577502214348982, "grad_norm": 0.3622133433818817, "learning_rate": 1e-05, "loss": 1.0599, "step": 19845 }, { "epoch": 17.58193091231178, "grad_norm": 0.3105239272117615, "learning_rate": 1e-05, "loss": 0.999, "step": 19850 }, { "epoch": 17.58635961027458, "grad_norm": 0.31793999671936035, "learning_rate": 1e-05, "loss": 0.9869, "step": 19855 }, { "epoch": 17.59078830823738, "grad_norm": 0.2550145983695984, "learning_rate": 1e-05, "loss": 1.0143, "step": 19860 }, { "epoch": 17.595217006200176, "grad_norm": 0.2385762482881546, "learning_rate": 1e-05, "loss": 0.9859, "step": 19865 }, { "epoch": 17.599645704162977, "grad_norm": 0.25345996022224426, "learning_rate": 1e-05, "loss": 0.9948, "step": 19870 }, { "epoch": 17.604074402125775, "grad_norm": 0.24802087247371674, "learning_rate": 1e-05, "loss": 0.9472, "step": 19875 }, { "epoch": 17.608503100088573, "grad_norm": 0.22456267476081848, "learning_rate": 1e-05, "loss": 0.9946, "step": 19880 }, { "epoch": 17.612931798051374, "grad_norm": 0.250970721244812, "learning_rate": 1e-05, "loss": 1.0148, "step": 19885 }, { "epoch": 17.61736049601417, "grad_norm": 0.24302758276462555, "learning_rate": 1e-05, "loss": 0.98, "step": 19890 }, { "epoch": 17.62178919397697, "grad_norm": 0.22629830241203308, "learning_rate": 1e-05, "loss": 1.002, "step": 19895 }, { "epoch": 17.62621789193977, "grad_norm": 0.27670133113861084, "learning_rate": 1e-05, "loss": 1.0116, "step": 19900 }, { "epoch": 17.630646589902568, "grad_norm": 0.22957493364810944, "learning_rate": 1e-05, "loss": 0.9818, "step": 19905 }, { "epoch": 17.63507528786537, "grad_norm": 0.25864338874816895, "learning_rate": 1e-05, "loss": 1.0096, "step": 19910 }, { "epoch": 17.639503985828167, "grad_norm": 0.2682626247406006, "learning_rate": 1e-05, "loss": 0.931, "step": 19915 }, { "epoch": 17.643932683790965, "grad_norm": 0.30448803305625916, "learning_rate": 1e-05, "loss": 0.9752, "step": 19920 }, { "epoch": 17.648361381753766, "grad_norm": 0.2132716029882431, "learning_rate": 1e-05, "loss": 1.0369, "step": 19925 }, { "epoch": 17.652790079716564, "grad_norm": 0.27838215231895447, "learning_rate": 1e-05, "loss": 0.9543, "step": 19930 }, { "epoch": 17.65721877767936, "grad_norm": 0.2461836338043213, "learning_rate": 1e-05, "loss": 1.0061, "step": 19935 }, { "epoch": 17.661647475642162, "grad_norm": 0.24796424806118011, "learning_rate": 1e-05, "loss": 0.9079, "step": 19940 }, { "epoch": 17.66607617360496, "grad_norm": 0.2612871825695038, "learning_rate": 1e-05, "loss": 0.9657, "step": 19945 }, { "epoch": 17.670504871567758, "grad_norm": 0.22839590907096863, "learning_rate": 1e-05, "loss": 0.9976, "step": 19950 }, { "epoch": 17.67493356953056, "grad_norm": 0.28730660676956177, "learning_rate": 1e-05, "loss": 1.051, "step": 19955 }, { "epoch": 17.679362267493357, "grad_norm": 0.2569023370742798, "learning_rate": 1e-05, "loss": 0.9758, "step": 19960 }, { "epoch": 17.683790965456154, "grad_norm": 0.2763356566429138, "learning_rate": 1e-05, "loss": 1.0034, "step": 19965 }, { "epoch": 17.688219663418955, "grad_norm": 0.29813385009765625, "learning_rate": 1e-05, "loss": 0.9521, "step": 19970 }, { "epoch": 17.692648361381753, "grad_norm": 0.2158433049917221, "learning_rate": 1e-05, "loss": 0.994, "step": 19975 }, { "epoch": 17.697077059344554, "grad_norm": 0.25374796986579895, "learning_rate": 1e-05, "loss": 1.0167, "step": 19980 }, { "epoch": 17.701505757307352, "grad_norm": 0.2582029104232788, "learning_rate": 1e-05, "loss": 0.9942, "step": 19985 }, { "epoch": 17.70593445527015, "grad_norm": 0.27137789130210876, "learning_rate": 1e-05, "loss": 1.0536, "step": 19990 }, { "epoch": 17.71036315323295, "grad_norm": 0.2295052856206894, "learning_rate": 1e-05, "loss": 1.0071, "step": 19995 }, { "epoch": 17.71479185119575, "grad_norm": 0.24351628124713898, "learning_rate": 1e-05, "loss": 0.9815, "step": 20000 }, { "epoch": 17.719220549158546, "grad_norm": 0.23730763792991638, "learning_rate": 1e-05, "loss": 0.9809, "step": 20005 }, { "epoch": 17.723649247121347, "grad_norm": 0.29798734188079834, "learning_rate": 1e-05, "loss": 1.0129, "step": 20010 }, { "epoch": 17.728077945084145, "grad_norm": 0.2506239712238312, "learning_rate": 1e-05, "loss": 0.9979, "step": 20015 }, { "epoch": 17.732506643046943, "grad_norm": 0.2482958883047104, "learning_rate": 1e-05, "loss": 0.9801, "step": 20020 }, { "epoch": 17.736935341009744, "grad_norm": 0.2259867936372757, "learning_rate": 1e-05, "loss": 1.0229, "step": 20025 }, { "epoch": 17.74136403897254, "grad_norm": 0.256012886762619, "learning_rate": 1e-05, "loss": 0.983, "step": 20030 }, { "epoch": 17.745792736935343, "grad_norm": 0.2893749177455902, "learning_rate": 1e-05, "loss": 1.0065, "step": 20035 }, { "epoch": 17.75022143489814, "grad_norm": 0.27459439635276794, "learning_rate": 1e-05, "loss": 1.0438, "step": 20040 }, { "epoch": 17.754650132860938, "grad_norm": 0.26578623056411743, "learning_rate": 1e-05, "loss": 1.0214, "step": 20045 }, { "epoch": 17.75907883082374, "grad_norm": 0.2591007947921753, "learning_rate": 1e-05, "loss": 1.0047, "step": 20050 }, { "epoch": 17.763507528786537, "grad_norm": 0.3230084478855133, "learning_rate": 1e-05, "loss": 0.9188, "step": 20055 }, { "epoch": 17.767936226749335, "grad_norm": 0.22423215210437775, "learning_rate": 1e-05, "loss": 1.0346, "step": 20060 }, { "epoch": 17.772364924712136, "grad_norm": 0.2446160465478897, "learning_rate": 1e-05, "loss": 0.983, "step": 20065 }, { "epoch": 17.776793622674933, "grad_norm": 0.245269313454628, "learning_rate": 1e-05, "loss": 0.976, "step": 20070 }, { "epoch": 17.78122232063773, "grad_norm": 0.2621501684188843, "learning_rate": 1e-05, "loss": 1.014, "step": 20075 }, { "epoch": 17.785651018600532, "grad_norm": 0.2698981463909149, "learning_rate": 1e-05, "loss": 0.9753, "step": 20080 }, { "epoch": 17.79007971656333, "grad_norm": 0.25634124875068665, "learning_rate": 1e-05, "loss": 1.0256, "step": 20085 }, { "epoch": 17.794508414526128, "grad_norm": 0.23192934691905975, "learning_rate": 1e-05, "loss": 0.996, "step": 20090 }, { "epoch": 17.79893711248893, "grad_norm": 0.27244633436203003, "learning_rate": 1e-05, "loss": 0.9789, "step": 20095 }, { "epoch": 17.803365810451727, "grad_norm": 0.29565033316612244, "learning_rate": 1e-05, "loss": 0.8974, "step": 20100 }, { "epoch": 17.807794508414528, "grad_norm": 0.209321528673172, "learning_rate": 1e-05, "loss": 1.0167, "step": 20105 }, { "epoch": 17.812223206377325, "grad_norm": 0.2633292078971863, "learning_rate": 1e-05, "loss": 1.0313, "step": 20110 }, { "epoch": 17.816651904340123, "grad_norm": 0.22794212400913239, "learning_rate": 1e-05, "loss": 0.9516, "step": 20115 }, { "epoch": 17.821080602302924, "grad_norm": 0.29816770553588867, "learning_rate": 1e-05, "loss": 0.9759, "step": 20120 }, { "epoch": 17.825509300265722, "grad_norm": 0.21468093991279602, "learning_rate": 1e-05, "loss": 0.9666, "step": 20125 }, { "epoch": 17.82993799822852, "grad_norm": 0.24957433342933655, "learning_rate": 1e-05, "loss": 0.9949, "step": 20130 }, { "epoch": 17.83436669619132, "grad_norm": 0.2944706380367279, "learning_rate": 1e-05, "loss": 1.0226, "step": 20135 }, { "epoch": 17.83879539415412, "grad_norm": 0.3037720024585724, "learning_rate": 1e-05, "loss": 0.9784, "step": 20140 }, { "epoch": 17.843224092116916, "grad_norm": 0.2549017071723938, "learning_rate": 1e-05, "loss": 0.9884, "step": 20145 }, { "epoch": 17.847652790079717, "grad_norm": 0.24500052630901337, "learning_rate": 1e-05, "loss": 0.9852, "step": 20150 }, { "epoch": 17.852081488042515, "grad_norm": 0.2671549320220947, "learning_rate": 1e-05, "loss": 0.9745, "step": 20155 }, { "epoch": 17.856510186005316, "grad_norm": 0.2728942930698395, "learning_rate": 1e-05, "loss": 0.9341, "step": 20160 }, { "epoch": 17.860938883968114, "grad_norm": 0.24169975519180298, "learning_rate": 1e-05, "loss": 1.0127, "step": 20165 }, { "epoch": 17.86536758193091, "grad_norm": 0.24461784958839417, "learning_rate": 1e-05, "loss": 1.0303, "step": 20170 }, { "epoch": 17.869796279893713, "grad_norm": 0.32894495129585266, "learning_rate": 1e-05, "loss": 1.0428, "step": 20175 }, { "epoch": 17.87422497785651, "grad_norm": 0.2735280692577362, "learning_rate": 1e-05, "loss": 1.0069, "step": 20180 }, { "epoch": 17.878653675819308, "grad_norm": 0.2700766921043396, "learning_rate": 1e-05, "loss": 1.002, "step": 20185 }, { "epoch": 17.88308237378211, "grad_norm": 0.31885093450546265, "learning_rate": 1e-05, "loss": 1.0873, "step": 20190 }, { "epoch": 17.887511071744907, "grad_norm": 0.2657311260700226, "learning_rate": 1e-05, "loss": 1.0065, "step": 20195 }, { "epoch": 17.891939769707704, "grad_norm": 0.25965866446495056, "learning_rate": 1e-05, "loss": 0.9794, "step": 20200 }, { "epoch": 17.896368467670506, "grad_norm": 0.28984150290489197, "learning_rate": 1e-05, "loss": 1.0036, "step": 20205 }, { "epoch": 17.900797165633303, "grad_norm": 0.2735573351383209, "learning_rate": 1e-05, "loss": 1.0354, "step": 20210 }, { "epoch": 17.9052258635961, "grad_norm": 0.31076914072036743, "learning_rate": 1e-05, "loss": 1.0263, "step": 20215 }, { "epoch": 17.909654561558902, "grad_norm": 0.26216214895248413, "learning_rate": 1e-05, "loss": 0.9918, "step": 20220 }, { "epoch": 17.9140832595217, "grad_norm": 0.25458693504333496, "learning_rate": 1e-05, "loss": 1.0084, "step": 20225 }, { "epoch": 17.9185119574845, "grad_norm": 0.26108530163764954, "learning_rate": 1e-05, "loss": 1.0766, "step": 20230 }, { "epoch": 17.9229406554473, "grad_norm": 0.2990536689758301, "learning_rate": 1e-05, "loss": 1.0192, "step": 20235 }, { "epoch": 17.927369353410096, "grad_norm": 0.28272974491119385, "learning_rate": 1e-05, "loss": 0.9445, "step": 20240 }, { "epoch": 17.931798051372898, "grad_norm": 0.22939245402812958, "learning_rate": 1e-05, "loss": 0.9813, "step": 20245 }, { "epoch": 17.936226749335695, "grad_norm": 0.2612392008304596, "learning_rate": 1e-05, "loss": 1.007, "step": 20250 }, { "epoch": 17.940655447298493, "grad_norm": 0.2584625482559204, "learning_rate": 1e-05, "loss": 1.0066, "step": 20255 }, { "epoch": 17.945084145261294, "grad_norm": 0.2809886634349823, "learning_rate": 1e-05, "loss": 0.9821, "step": 20260 }, { "epoch": 17.949512843224092, "grad_norm": 0.31402015686035156, "learning_rate": 1e-05, "loss": 0.9518, "step": 20265 }, { "epoch": 17.95394154118689, "grad_norm": 0.3004207909107208, "learning_rate": 1e-05, "loss": 0.9816, "step": 20270 }, { "epoch": 17.95837023914969, "grad_norm": 0.32444968819618225, "learning_rate": 1e-05, "loss": 1.0033, "step": 20275 }, { "epoch": 17.96279893711249, "grad_norm": 0.26725834608078003, "learning_rate": 1e-05, "loss": 0.9839, "step": 20280 }, { "epoch": 17.96722763507529, "grad_norm": 0.234386146068573, "learning_rate": 1e-05, "loss": 1.0239, "step": 20285 }, { "epoch": 17.971656333038087, "grad_norm": 0.2573343813419342, "learning_rate": 1e-05, "loss": 0.9885, "step": 20290 }, { "epoch": 17.976085031000885, "grad_norm": 0.2270103543996811, "learning_rate": 1e-05, "loss": 0.9898, "step": 20295 }, { "epoch": 17.980513728963686, "grad_norm": 0.23110328614711761, "learning_rate": 1e-05, "loss": 1.0047, "step": 20300 }, { "epoch": 17.984942426926484, "grad_norm": 0.22924678027629852, "learning_rate": 1e-05, "loss": 1.067, "step": 20305 }, { "epoch": 17.98937112488928, "grad_norm": 0.29164668917655945, "learning_rate": 1e-05, "loss": 0.9993, "step": 20310 }, { "epoch": 17.993799822852083, "grad_norm": 0.2458817958831787, "learning_rate": 1e-05, "loss": 0.9699, "step": 20315 }, { "epoch": 17.99822852081488, "grad_norm": 0.23047512769699097, "learning_rate": 1e-05, "loss": 0.9945, "step": 20320 }, { "epoch": 18.002657218777678, "grad_norm": 0.24460440874099731, "learning_rate": 1e-05, "loss": 1.044, "step": 20325 }, { "epoch": 18.00708591674048, "grad_norm": 0.22777819633483887, "learning_rate": 1e-05, "loss": 0.9539, "step": 20330 }, { "epoch": 18.011514614703277, "grad_norm": 0.23051854968070984, "learning_rate": 1e-05, "loss": 0.9677, "step": 20335 }, { "epoch": 18.015943312666074, "grad_norm": 0.2609602212905884, "learning_rate": 1e-05, "loss": 1.0037, "step": 20340 }, { "epoch": 18.020372010628876, "grad_norm": 0.24906164407730103, "learning_rate": 1e-05, "loss": 1.0099, "step": 20345 }, { "epoch": 18.024800708591673, "grad_norm": 0.25315409898757935, "learning_rate": 1e-05, "loss": 0.9528, "step": 20350 }, { "epoch": 18.029229406554474, "grad_norm": 0.2424316555261612, "learning_rate": 1e-05, "loss": 1.0213, "step": 20355 }, { "epoch": 18.033658104517272, "grad_norm": 0.2625334858894348, "learning_rate": 1e-05, "loss": 0.9805, "step": 20360 }, { "epoch": 18.03808680248007, "grad_norm": 0.26623883843421936, "learning_rate": 1e-05, "loss": 1.0951, "step": 20365 }, { "epoch": 18.04251550044287, "grad_norm": 0.25353771448135376, "learning_rate": 1e-05, "loss": 1.0217, "step": 20370 }, { "epoch": 18.04694419840567, "grad_norm": 0.23834754526615143, "learning_rate": 1e-05, "loss": 1.0041, "step": 20375 }, { "epoch": 18.051372896368466, "grad_norm": 0.2945196330547333, "learning_rate": 1e-05, "loss": 1.0186, "step": 20380 }, { "epoch": 18.055801594331268, "grad_norm": 0.2569020092487335, "learning_rate": 1e-05, "loss": 0.9889, "step": 20385 }, { "epoch": 18.060230292294065, "grad_norm": 0.26425111293792725, "learning_rate": 1e-05, "loss": 0.9304, "step": 20390 }, { "epoch": 18.064658990256863, "grad_norm": 0.22954672574996948, "learning_rate": 1e-05, "loss": 1.0135, "step": 20395 }, { "epoch": 18.069087688219664, "grad_norm": 0.22191312909126282, "learning_rate": 1e-05, "loss": 1.0175, "step": 20400 }, { "epoch": 18.07351638618246, "grad_norm": 0.27791184186935425, "learning_rate": 1e-05, "loss": 1.005, "step": 20405 }, { "epoch": 18.077945084145263, "grad_norm": 0.264342337846756, "learning_rate": 1e-05, "loss": 0.9534, "step": 20410 }, { "epoch": 18.08237378210806, "grad_norm": 0.23839698731899261, "learning_rate": 1e-05, "loss": 1.0088, "step": 20415 }, { "epoch": 18.086802480070858, "grad_norm": 0.2891393005847931, "learning_rate": 1e-05, "loss": 0.9542, "step": 20420 }, { "epoch": 18.09123117803366, "grad_norm": 0.27449479699134827, "learning_rate": 1e-05, "loss": 0.9563, "step": 20425 }, { "epoch": 18.095659875996457, "grad_norm": 0.28650403022766113, "learning_rate": 1e-05, "loss": 0.9657, "step": 20430 }, { "epoch": 18.100088573959255, "grad_norm": 0.2671445608139038, "learning_rate": 1e-05, "loss": 1.0286, "step": 20435 }, { "epoch": 18.104517271922056, "grad_norm": 0.2786725163459778, "learning_rate": 1e-05, "loss": 0.982, "step": 20440 }, { "epoch": 18.108945969884854, "grad_norm": 0.23936715722084045, "learning_rate": 1e-05, "loss": 0.9526, "step": 20445 }, { "epoch": 18.11337466784765, "grad_norm": 0.24532751739025116, "learning_rate": 1e-05, "loss": 1.0, "step": 20450 }, { "epoch": 18.117803365810452, "grad_norm": 0.26958322525024414, "learning_rate": 1e-05, "loss": 1.0512, "step": 20455 }, { "epoch": 18.12223206377325, "grad_norm": 0.2908604145050049, "learning_rate": 1e-05, "loss": 0.9812, "step": 20460 }, { "epoch": 18.12666076173605, "grad_norm": 0.2929830849170685, "learning_rate": 1e-05, "loss": 1.008, "step": 20465 }, { "epoch": 18.13108945969885, "grad_norm": 0.23224352300167084, "learning_rate": 1e-05, "loss": 1.0149, "step": 20470 }, { "epoch": 18.135518157661647, "grad_norm": 0.2708369791507721, "learning_rate": 1e-05, "loss": 0.9804, "step": 20475 }, { "epoch": 18.139946855624448, "grad_norm": 0.2332603931427002, "learning_rate": 1e-05, "loss": 1.0132, "step": 20480 }, { "epoch": 18.144375553587246, "grad_norm": 0.2812580168247223, "learning_rate": 1e-05, "loss": 1.0096, "step": 20485 }, { "epoch": 18.148804251550043, "grad_norm": 0.2496076226234436, "learning_rate": 1e-05, "loss": 0.9728, "step": 20490 }, { "epoch": 18.153232949512844, "grad_norm": 0.2712249755859375, "learning_rate": 1e-05, "loss": 1.0427, "step": 20495 }, { "epoch": 18.157661647475642, "grad_norm": 0.24284881353378296, "learning_rate": 1e-05, "loss": 0.9816, "step": 20500 }, { "epoch": 18.16209034543844, "grad_norm": 0.2146773487329483, "learning_rate": 1e-05, "loss": 0.9772, "step": 20505 }, { "epoch": 18.16651904340124, "grad_norm": 0.23569054901599884, "learning_rate": 1e-05, "loss": 0.968, "step": 20510 }, { "epoch": 18.17094774136404, "grad_norm": 0.24455448985099792, "learning_rate": 1e-05, "loss": 0.9949, "step": 20515 }, { "epoch": 18.175376439326836, "grad_norm": 0.2086784839630127, "learning_rate": 1e-05, "loss": 0.9736, "step": 20520 }, { "epoch": 18.179805137289637, "grad_norm": 0.23628121614456177, "learning_rate": 1e-05, "loss": 1.019, "step": 20525 }, { "epoch": 18.184233835252435, "grad_norm": 0.24341154098510742, "learning_rate": 1e-05, "loss": 0.9773, "step": 20530 }, { "epoch": 18.188662533215236, "grad_norm": 0.2880188822746277, "learning_rate": 1e-05, "loss": 1.0328, "step": 20535 }, { "epoch": 18.193091231178034, "grad_norm": 0.28739896416664124, "learning_rate": 1e-05, "loss": 0.9862, "step": 20540 }, { "epoch": 18.19751992914083, "grad_norm": 0.22721165418624878, "learning_rate": 1e-05, "loss": 0.9848, "step": 20545 }, { "epoch": 18.201948627103633, "grad_norm": 0.240403413772583, "learning_rate": 1e-05, "loss": 1.0191, "step": 20550 }, { "epoch": 18.20637732506643, "grad_norm": 0.27363017201423645, "learning_rate": 1e-05, "loss": 0.9788, "step": 20555 }, { "epoch": 18.210806023029228, "grad_norm": 0.23993827402591705, "learning_rate": 1e-05, "loss": 0.9929, "step": 20560 }, { "epoch": 18.21523472099203, "grad_norm": 0.288872629404068, "learning_rate": 1e-05, "loss": 1.0011, "step": 20565 }, { "epoch": 18.219663418954827, "grad_norm": 0.24095289409160614, "learning_rate": 1e-05, "loss": 0.9871, "step": 20570 }, { "epoch": 18.224092116917625, "grad_norm": 0.2589903473854065, "learning_rate": 1e-05, "loss": 0.9802, "step": 20575 }, { "epoch": 18.228520814880426, "grad_norm": 0.24059975147247314, "learning_rate": 1e-05, "loss": 0.9645, "step": 20580 }, { "epoch": 18.232949512843224, "grad_norm": 0.21484382450580597, "learning_rate": 1e-05, "loss": 0.9721, "step": 20585 }, { "epoch": 18.237378210806025, "grad_norm": 0.22517722845077515, "learning_rate": 1e-05, "loss": 0.9761, "step": 20590 }, { "epoch": 18.241806908768822, "grad_norm": 0.22644075751304626, "learning_rate": 1e-05, "loss": 0.9513, "step": 20595 }, { "epoch": 18.24623560673162, "grad_norm": 0.25431379675865173, "learning_rate": 1e-05, "loss": 1.0015, "step": 20600 }, { "epoch": 18.25066430469442, "grad_norm": 0.24803446233272552, "learning_rate": 1e-05, "loss": 0.9842, "step": 20605 }, { "epoch": 18.25509300265722, "grad_norm": 0.23967397212982178, "learning_rate": 1e-05, "loss": 0.9559, "step": 20610 }, { "epoch": 18.259521700620017, "grad_norm": 0.23101189732551575, "learning_rate": 1e-05, "loss": 0.963, "step": 20615 }, { "epoch": 18.263950398582818, "grad_norm": 0.23987622559070587, "learning_rate": 1e-05, "loss": 0.9548, "step": 20620 }, { "epoch": 18.268379096545615, "grad_norm": 0.2931157648563385, "learning_rate": 1e-05, "loss": 0.9939, "step": 20625 }, { "epoch": 18.272807794508413, "grad_norm": 0.21868695318698883, "learning_rate": 1e-05, "loss": 1.0238, "step": 20630 }, { "epoch": 18.277236492471214, "grad_norm": 0.2845988869667053, "learning_rate": 1e-05, "loss": 0.9924, "step": 20635 }, { "epoch": 18.281665190434012, "grad_norm": 0.2677435278892517, "learning_rate": 1e-05, "loss": 0.9831, "step": 20640 }, { "epoch": 18.28609388839681, "grad_norm": 0.2774239182472229, "learning_rate": 1e-05, "loss": 0.9939, "step": 20645 }, { "epoch": 18.29052258635961, "grad_norm": 0.27630990743637085, "learning_rate": 1e-05, "loss": 0.998, "step": 20650 }, { "epoch": 18.29495128432241, "grad_norm": 0.22741125524044037, "learning_rate": 1e-05, "loss": 1.0037, "step": 20655 }, { "epoch": 18.29937998228521, "grad_norm": 0.2839937210083008, "learning_rate": 1e-05, "loss": 0.9865, "step": 20660 }, { "epoch": 18.303808680248007, "grad_norm": 0.27133598923683167, "learning_rate": 1e-05, "loss": 1.0355, "step": 20665 }, { "epoch": 18.308237378210805, "grad_norm": 0.2232791632413864, "learning_rate": 1e-05, "loss": 0.9934, "step": 20670 }, { "epoch": 18.312666076173606, "grad_norm": 0.255623459815979, "learning_rate": 1e-05, "loss": 0.9999, "step": 20675 }, { "epoch": 18.317094774136404, "grad_norm": 0.28772732615470886, "learning_rate": 1e-05, "loss": 1.0381, "step": 20680 }, { "epoch": 18.3215234720992, "grad_norm": 0.27251097559928894, "learning_rate": 1e-05, "loss": 0.9809, "step": 20685 }, { "epoch": 18.325952170062003, "grad_norm": 0.22250190377235413, "learning_rate": 1e-05, "loss": 1.0544, "step": 20690 }, { "epoch": 18.3303808680248, "grad_norm": 0.28801122307777405, "learning_rate": 1e-05, "loss": 0.9612, "step": 20695 }, { "epoch": 18.334809565987598, "grad_norm": 0.34806159138679504, "learning_rate": 1e-05, "loss": 0.9757, "step": 20700 }, { "epoch": 18.3392382639504, "grad_norm": 0.30037423968315125, "learning_rate": 1e-05, "loss": 1.0235, "step": 20705 }, { "epoch": 18.343666961913197, "grad_norm": 0.2328796535730362, "learning_rate": 1e-05, "loss": 0.9896, "step": 20710 }, { "epoch": 18.348095659875998, "grad_norm": 0.3051951825618744, "learning_rate": 1e-05, "loss": 1.0002, "step": 20715 }, { "epoch": 18.352524357838796, "grad_norm": 0.2697443664073944, "learning_rate": 1e-05, "loss": 1.0264, "step": 20720 }, { "epoch": 18.356953055801593, "grad_norm": 0.26864537596702576, "learning_rate": 1e-05, "loss": 1.0105, "step": 20725 }, { "epoch": 18.361381753764395, "grad_norm": 0.2774394750595093, "learning_rate": 1e-05, "loss": 0.9958, "step": 20730 }, { "epoch": 18.365810451727192, "grad_norm": 0.2447633594274521, "learning_rate": 1e-05, "loss": 1.0079, "step": 20735 }, { "epoch": 18.37023914968999, "grad_norm": 0.24478772282600403, "learning_rate": 1e-05, "loss": 1.0031, "step": 20740 }, { "epoch": 18.37466784765279, "grad_norm": 0.2232452780008316, "learning_rate": 1e-05, "loss": 1.0063, "step": 20745 }, { "epoch": 18.37909654561559, "grad_norm": 0.26278409361839294, "learning_rate": 1e-05, "loss": 1.0058, "step": 20750 }, { "epoch": 18.383525243578386, "grad_norm": 0.2082904428243637, "learning_rate": 1e-05, "loss": 0.9806, "step": 20755 }, { "epoch": 18.387953941541188, "grad_norm": 0.23361815512180328, "learning_rate": 1e-05, "loss": 1.034, "step": 20760 }, { "epoch": 18.392382639503985, "grad_norm": 0.2113049328327179, "learning_rate": 1e-05, "loss": 0.9634, "step": 20765 }, { "epoch": 18.396811337466787, "grad_norm": 0.2505301237106323, "learning_rate": 1e-05, "loss": 0.9687, "step": 20770 }, { "epoch": 18.401240035429584, "grad_norm": 0.28443095088005066, "learning_rate": 1e-05, "loss": 1.0261, "step": 20775 }, { "epoch": 18.405668733392382, "grad_norm": 0.22276189923286438, "learning_rate": 1e-05, "loss": 0.9868, "step": 20780 }, { "epoch": 18.410097431355183, "grad_norm": 0.21942079067230225, "learning_rate": 1e-05, "loss": 0.9783, "step": 20785 }, { "epoch": 18.41452612931798, "grad_norm": 0.23163661360740662, "learning_rate": 1e-05, "loss": 1.0567, "step": 20790 }, { "epoch": 18.41895482728078, "grad_norm": 0.26401811838150024, "learning_rate": 1e-05, "loss": 0.9625, "step": 20795 }, { "epoch": 18.42338352524358, "grad_norm": 0.23706109821796417, "learning_rate": 1e-05, "loss": 0.9147, "step": 20800 }, { "epoch": 18.427812223206377, "grad_norm": 0.24434785544872284, "learning_rate": 1e-05, "loss": 0.9631, "step": 20805 }, { "epoch": 18.432240921169175, "grad_norm": 0.21827857196331024, "learning_rate": 1e-05, "loss": 1.0303, "step": 20810 }, { "epoch": 18.436669619131976, "grad_norm": 0.2557186186313629, "learning_rate": 1e-05, "loss": 0.9343, "step": 20815 }, { "epoch": 18.441098317094774, "grad_norm": 0.23795369267463684, "learning_rate": 1e-05, "loss": 0.9902, "step": 20820 }, { "epoch": 18.44552701505757, "grad_norm": 0.2577052414417267, "learning_rate": 1e-05, "loss": 0.9703, "step": 20825 }, { "epoch": 18.449955713020373, "grad_norm": 0.19321399927139282, "learning_rate": 1e-05, "loss": 0.9772, "step": 20830 }, { "epoch": 18.45438441098317, "grad_norm": 0.27815720438957214, "learning_rate": 1e-05, "loss": 0.9657, "step": 20835 }, { "epoch": 18.45881310894597, "grad_norm": 0.24520039558410645, "learning_rate": 1e-05, "loss": 0.9794, "step": 20840 }, { "epoch": 18.46324180690877, "grad_norm": 0.3463394343852997, "learning_rate": 1e-05, "loss": 0.9908, "step": 20845 }, { "epoch": 18.467670504871567, "grad_norm": 0.2585110068321228, "learning_rate": 1e-05, "loss": 0.9515, "step": 20850 }, { "epoch": 18.472099202834368, "grad_norm": 0.2485339641571045, "learning_rate": 1e-05, "loss": 1.0613, "step": 20855 }, { "epoch": 18.476527900797166, "grad_norm": 0.20855893194675446, "learning_rate": 1e-05, "loss": 1.0257, "step": 20860 }, { "epoch": 18.480956598759963, "grad_norm": 0.28965285420417786, "learning_rate": 1e-05, "loss": 0.9651, "step": 20865 }, { "epoch": 18.485385296722765, "grad_norm": 0.2055974006652832, "learning_rate": 1e-05, "loss": 0.975, "step": 20870 }, { "epoch": 18.489813994685562, "grad_norm": 0.4266437888145447, "learning_rate": 1e-05, "loss": 0.959, "step": 20875 }, { "epoch": 18.49424269264836, "grad_norm": 0.30046847462654114, "learning_rate": 1e-05, "loss": 1.0794, "step": 20880 }, { "epoch": 18.49867139061116, "grad_norm": 0.25240740180015564, "learning_rate": 1e-05, "loss": 1.0279, "step": 20885 }, { "epoch": 18.50310008857396, "grad_norm": 0.29113587737083435, "learning_rate": 1e-05, "loss": 0.9711, "step": 20890 }, { "epoch": 18.50752878653676, "grad_norm": 0.2869458496570587, "learning_rate": 1e-05, "loss": 1.031, "step": 20895 }, { "epoch": 18.511957484499558, "grad_norm": 0.2689855992794037, "learning_rate": 1e-05, "loss": 0.9622, "step": 20900 }, { "epoch": 18.516386182462355, "grad_norm": 0.2189704328775406, "learning_rate": 1e-05, "loss": 1.0015, "step": 20905 }, { "epoch": 18.520814880425156, "grad_norm": 0.2508397698402405, "learning_rate": 1e-05, "loss": 0.9911, "step": 20910 }, { "epoch": 18.525243578387954, "grad_norm": 0.4034369885921478, "learning_rate": 1e-05, "loss": 1.0063, "step": 20915 }, { "epoch": 18.52967227635075, "grad_norm": 0.22806298732757568, "learning_rate": 1e-05, "loss": 0.9864, "step": 20920 }, { "epoch": 18.534100974313553, "grad_norm": 0.2971166968345642, "learning_rate": 1e-05, "loss": 0.9963, "step": 20925 }, { "epoch": 18.53852967227635, "grad_norm": 0.2362562119960785, "learning_rate": 1e-05, "loss": 0.9908, "step": 20930 }, { "epoch": 18.54295837023915, "grad_norm": 0.2426292896270752, "learning_rate": 1e-05, "loss": 1.0191, "step": 20935 }, { "epoch": 18.54738706820195, "grad_norm": 0.2474449723958969, "learning_rate": 1e-05, "loss": 0.9723, "step": 20940 }, { "epoch": 18.551815766164747, "grad_norm": 0.2669737935066223, "learning_rate": 1e-05, "loss": 0.9993, "step": 20945 }, { "epoch": 18.556244464127545, "grad_norm": 0.26444774866104126, "learning_rate": 1e-05, "loss": 1.0163, "step": 20950 }, { "epoch": 18.560673162090346, "grad_norm": 0.3404650092124939, "learning_rate": 1e-05, "loss": 0.9909, "step": 20955 }, { "epoch": 18.565101860053144, "grad_norm": 0.222050279378891, "learning_rate": 1e-05, "loss": 1.0023, "step": 20960 }, { "epoch": 18.569530558015945, "grad_norm": 0.25178003311157227, "learning_rate": 1e-05, "loss": 0.9965, "step": 20965 }, { "epoch": 18.573959255978743, "grad_norm": 0.3069479167461395, "learning_rate": 1e-05, "loss": 0.9819, "step": 20970 }, { "epoch": 18.57838795394154, "grad_norm": 0.2649993598461151, "learning_rate": 1e-05, "loss": 0.9646, "step": 20975 }, { "epoch": 18.58281665190434, "grad_norm": 0.25494804978370667, "learning_rate": 1e-05, "loss": 0.9553, "step": 20980 }, { "epoch": 18.58724534986714, "grad_norm": 0.23358748853206635, "learning_rate": 1e-05, "loss": 1.0081, "step": 20985 }, { "epoch": 18.591674047829937, "grad_norm": 0.258725106716156, "learning_rate": 1e-05, "loss": 0.9871, "step": 20990 }, { "epoch": 18.596102745792738, "grad_norm": 0.2568466067314148, "learning_rate": 1e-05, "loss": 0.9412, "step": 20995 }, { "epoch": 18.600531443755536, "grad_norm": 0.2945703864097595, "learning_rate": 1e-05, "loss": 1.0135, "step": 21000 }, { "epoch": 18.604960141718333, "grad_norm": 0.3263799846172333, "learning_rate": 1e-05, "loss": 0.9803, "step": 21005 }, { "epoch": 18.609388839681134, "grad_norm": 0.25300148129463196, "learning_rate": 1e-05, "loss": 0.9691, "step": 21010 }, { "epoch": 18.613817537643932, "grad_norm": 0.23674839735031128, "learning_rate": 1e-05, "loss": 1.0204, "step": 21015 }, { "epoch": 18.618246235606733, "grad_norm": 0.2520981431007385, "learning_rate": 1e-05, "loss": 1.0249, "step": 21020 }, { "epoch": 18.62267493356953, "grad_norm": 0.240373894572258, "learning_rate": 1e-05, "loss": 0.9786, "step": 21025 }, { "epoch": 18.62710363153233, "grad_norm": 0.26391932368278503, "learning_rate": 1e-05, "loss": 0.9995, "step": 21030 }, { "epoch": 18.63153232949513, "grad_norm": 0.2686443030834198, "learning_rate": 1e-05, "loss": 1.0182, "step": 21035 }, { "epoch": 18.635961027457927, "grad_norm": 0.2981419861316681, "learning_rate": 1e-05, "loss": 0.9662, "step": 21040 }, { "epoch": 18.640389725420725, "grad_norm": 0.24534614384174347, "learning_rate": 1e-05, "loss": 1.0128, "step": 21045 }, { "epoch": 18.644818423383526, "grad_norm": 0.29010310769081116, "learning_rate": 1e-05, "loss": 0.9676, "step": 21050 }, { "epoch": 18.649247121346324, "grad_norm": 0.2278757393360138, "learning_rate": 1e-05, "loss": 0.9557, "step": 21055 }, { "epoch": 18.65367581930912, "grad_norm": 0.2694181203842163, "learning_rate": 1e-05, "loss": 0.9725, "step": 21060 }, { "epoch": 18.658104517271923, "grad_norm": 0.2664870023727417, "learning_rate": 1e-05, "loss": 0.9912, "step": 21065 }, { "epoch": 18.66253321523472, "grad_norm": 0.23377925157546997, "learning_rate": 1e-05, "loss": 0.9716, "step": 21070 }, { "epoch": 18.666961913197518, "grad_norm": 0.2550694942474365, "learning_rate": 1e-05, "loss": 0.9964, "step": 21075 }, { "epoch": 18.67139061116032, "grad_norm": 0.2993178069591522, "learning_rate": 1e-05, "loss": 1.0306, "step": 21080 }, { "epoch": 18.675819309123117, "grad_norm": 0.25599873065948486, "learning_rate": 1e-05, "loss": 0.9417, "step": 21085 }, { "epoch": 18.68024800708592, "grad_norm": 0.2198406308889389, "learning_rate": 1e-05, "loss": 0.95, "step": 21090 }, { "epoch": 18.684676705048716, "grad_norm": 0.3192952275276184, "learning_rate": 1e-05, "loss": 0.975, "step": 21095 }, { "epoch": 18.689105403011514, "grad_norm": 0.2867943048477173, "learning_rate": 1e-05, "loss": 0.9661, "step": 21100 }, { "epoch": 18.693534100974315, "grad_norm": 0.23722036182880402, "learning_rate": 1e-05, "loss": 1.0405, "step": 21105 }, { "epoch": 18.697962798937112, "grad_norm": 0.2737114429473877, "learning_rate": 1e-05, "loss": 0.9791, "step": 21110 }, { "epoch": 18.70239149689991, "grad_norm": 0.28285425901412964, "learning_rate": 1e-05, "loss": 0.9279, "step": 21115 }, { "epoch": 18.70682019486271, "grad_norm": 0.2990366518497467, "learning_rate": 1e-05, "loss": 0.9644, "step": 21120 }, { "epoch": 18.71124889282551, "grad_norm": 0.302739679813385, "learning_rate": 1e-05, "loss": 0.9626, "step": 21125 }, { "epoch": 18.715677590788307, "grad_norm": 0.2583636939525604, "learning_rate": 1e-05, "loss": 0.9853, "step": 21130 }, { "epoch": 18.720106288751108, "grad_norm": 0.20303533971309662, "learning_rate": 1e-05, "loss": 1.0069, "step": 21135 }, { "epoch": 18.724534986713905, "grad_norm": 0.28236815333366394, "learning_rate": 1e-05, "loss": 1.0276, "step": 21140 }, { "epoch": 18.728963684676707, "grad_norm": 0.33288443088531494, "learning_rate": 1e-05, "loss": 0.9545, "step": 21145 }, { "epoch": 18.733392382639504, "grad_norm": 0.24907001852989197, "learning_rate": 1e-05, "loss": 1.0048, "step": 21150 }, { "epoch": 18.737821080602302, "grad_norm": 0.28821179270744324, "learning_rate": 1e-05, "loss": 0.9795, "step": 21155 }, { "epoch": 18.742249778565103, "grad_norm": 0.3428443968296051, "learning_rate": 1e-05, "loss": 1.0173, "step": 21160 }, { "epoch": 18.7466784765279, "grad_norm": 0.2658557891845703, "learning_rate": 1e-05, "loss": 0.9856, "step": 21165 }, { "epoch": 18.7511071744907, "grad_norm": 0.30375638604164124, "learning_rate": 1e-05, "loss": 1.0382, "step": 21170 }, { "epoch": 18.7555358724535, "grad_norm": 0.2688475549221039, "learning_rate": 1e-05, "loss": 0.9794, "step": 21175 }, { "epoch": 18.759964570416297, "grad_norm": 0.2684788107872009, "learning_rate": 1e-05, "loss": 0.9988, "step": 21180 }, { "epoch": 18.764393268379095, "grad_norm": 0.2773300111293793, "learning_rate": 1e-05, "loss": 0.9659, "step": 21185 }, { "epoch": 18.768821966341896, "grad_norm": 0.26814454793930054, "learning_rate": 1e-05, "loss": 0.9837, "step": 21190 }, { "epoch": 18.773250664304694, "grad_norm": 0.263419508934021, "learning_rate": 1e-05, "loss": 1.0075, "step": 21195 }, { "epoch": 18.77767936226749, "grad_norm": 0.22362379729747772, "learning_rate": 1e-05, "loss": 0.9701, "step": 21200 }, { "epoch": 18.782108060230293, "grad_norm": 0.21374699473381042, "learning_rate": 1e-05, "loss": 1.0026, "step": 21205 }, { "epoch": 18.78653675819309, "grad_norm": 0.3004043996334076, "learning_rate": 1e-05, "loss": 0.969, "step": 21210 }, { "epoch": 18.79096545615589, "grad_norm": 0.2347671240568161, "learning_rate": 1e-05, "loss": 1.0108, "step": 21215 }, { "epoch": 18.79539415411869, "grad_norm": 0.22683128714561462, "learning_rate": 1e-05, "loss": 0.9784, "step": 21220 }, { "epoch": 18.799822852081487, "grad_norm": 0.22115366160869598, "learning_rate": 1e-05, "loss": 0.9206, "step": 21225 }, { "epoch": 18.804251550044288, "grad_norm": 0.33044692873954773, "learning_rate": 1e-05, "loss": 0.9605, "step": 21230 }, { "epoch": 18.808680248007086, "grad_norm": 0.2138751894235611, "learning_rate": 1e-05, "loss": 1.0315, "step": 21235 }, { "epoch": 18.813108945969883, "grad_norm": 0.21777422726154327, "learning_rate": 1e-05, "loss": 1.0757, "step": 21240 }, { "epoch": 18.817537643932685, "grad_norm": 0.24124111235141754, "learning_rate": 1e-05, "loss": 1.0129, "step": 21245 }, { "epoch": 18.821966341895482, "grad_norm": 0.2945777475833893, "learning_rate": 1e-05, "loss": 1.0232, "step": 21250 }, { "epoch": 18.82639503985828, "grad_norm": 0.24781376123428345, "learning_rate": 1e-05, "loss": 0.975, "step": 21255 }, { "epoch": 18.83082373782108, "grad_norm": 0.26145169138908386, "learning_rate": 1e-05, "loss": 1.0206, "step": 21260 }, { "epoch": 18.83525243578388, "grad_norm": 0.25660327076911926, "learning_rate": 1e-05, "loss": 0.9666, "step": 21265 }, { "epoch": 18.83968113374668, "grad_norm": 0.23725764453411102, "learning_rate": 1e-05, "loss": 0.9525, "step": 21270 }, { "epoch": 18.844109831709478, "grad_norm": 0.23738716542720795, "learning_rate": 1e-05, "loss": 1.0279, "step": 21275 }, { "epoch": 18.848538529672275, "grad_norm": 0.30419716238975525, "learning_rate": 1e-05, "loss": 1.021, "step": 21280 }, { "epoch": 18.852967227635077, "grad_norm": 0.28879550099372864, "learning_rate": 1e-05, "loss": 0.9393, "step": 21285 }, { "epoch": 18.857395925597874, "grad_norm": 0.25562360882759094, "learning_rate": 1e-05, "loss": 0.944, "step": 21290 }, { "epoch": 18.861824623560672, "grad_norm": 0.2460346519947052, "learning_rate": 1e-05, "loss": 0.9408, "step": 21295 }, { "epoch": 18.866253321523473, "grad_norm": 0.23227177560329437, "learning_rate": 1e-05, "loss": 1.0332, "step": 21300 }, { "epoch": 18.87068201948627, "grad_norm": 0.23724380135536194, "learning_rate": 1e-05, "loss": 1.0057, "step": 21305 }, { "epoch": 18.87511071744907, "grad_norm": 0.2601776123046875, "learning_rate": 1e-05, "loss": 1.0028, "step": 21310 }, { "epoch": 18.87953941541187, "grad_norm": 0.2933485209941864, "learning_rate": 1e-05, "loss": 1.0153, "step": 21315 }, { "epoch": 18.883968113374667, "grad_norm": 0.28493037819862366, "learning_rate": 1e-05, "loss": 1.0288, "step": 21320 }, { "epoch": 18.888396811337465, "grad_norm": 0.2708951532840729, "learning_rate": 1e-05, "loss": 1.0048, "step": 21325 }, { "epoch": 18.892825509300266, "grad_norm": 0.2811450958251953, "learning_rate": 1e-05, "loss": 0.9752, "step": 21330 }, { "epoch": 18.897254207263064, "grad_norm": 0.2341175228357315, "learning_rate": 1e-05, "loss": 1.011, "step": 21335 }, { "epoch": 18.901682905225865, "grad_norm": 0.22528398036956787, "learning_rate": 1e-05, "loss": 1.0494, "step": 21340 }, { "epoch": 18.906111603188663, "grad_norm": 0.26082026958465576, "learning_rate": 1e-05, "loss": 1.0044, "step": 21345 }, { "epoch": 18.91054030115146, "grad_norm": 0.27095597982406616, "learning_rate": 1e-05, "loss": 0.9985, "step": 21350 }, { "epoch": 18.91496899911426, "grad_norm": 0.27901455760002136, "learning_rate": 1e-05, "loss": 0.9875, "step": 21355 }, { "epoch": 18.91939769707706, "grad_norm": 0.2891344130039215, "learning_rate": 1e-05, "loss": 1.009, "step": 21360 }, { "epoch": 18.923826395039857, "grad_norm": 0.2786041796207428, "learning_rate": 1e-05, "loss": 0.9716, "step": 21365 }, { "epoch": 18.928255093002658, "grad_norm": 0.22194671630859375, "learning_rate": 1e-05, "loss": 1.0008, "step": 21370 }, { "epoch": 18.932683790965456, "grad_norm": 0.2665034830570221, "learning_rate": 1e-05, "loss": 0.9853, "step": 21375 }, { "epoch": 18.937112488928253, "grad_norm": 0.23111851513385773, "learning_rate": 1e-05, "loss": 0.9297, "step": 21380 }, { "epoch": 18.941541186891055, "grad_norm": 0.22685515880584717, "learning_rate": 1e-05, "loss": 1.0066, "step": 21385 }, { "epoch": 18.945969884853852, "grad_norm": 0.24442416429519653, "learning_rate": 1e-05, "loss": 0.9409, "step": 21390 }, { "epoch": 18.950398582816653, "grad_norm": 0.2431734800338745, "learning_rate": 1e-05, "loss": 1.0489, "step": 21395 }, { "epoch": 18.95482728077945, "grad_norm": 0.20329251885414124, "learning_rate": 1e-05, "loss": 0.9876, "step": 21400 }, { "epoch": 18.95925597874225, "grad_norm": 0.22948205471038818, "learning_rate": 1e-05, "loss": 0.9195, "step": 21405 }, { "epoch": 18.96368467670505, "grad_norm": 0.2835366427898407, "learning_rate": 1e-05, "loss": 1.0221, "step": 21410 }, { "epoch": 18.968113374667848, "grad_norm": 0.2749370038509369, "learning_rate": 1e-05, "loss": 0.9378, "step": 21415 }, { "epoch": 18.972542072630645, "grad_norm": 0.2701703608036041, "learning_rate": 1e-05, "loss": 1.0146, "step": 21420 }, { "epoch": 18.976970770593447, "grad_norm": 0.25853419303894043, "learning_rate": 1e-05, "loss": 0.987, "step": 21425 }, { "epoch": 18.981399468556244, "grad_norm": 0.304542601108551, "learning_rate": 1e-05, "loss": 0.9865, "step": 21430 }, { "epoch": 18.985828166519042, "grad_norm": 0.2370123565196991, "learning_rate": 1e-05, "loss": 0.9629, "step": 21435 }, { "epoch": 18.990256864481843, "grad_norm": 0.3070536255836487, "learning_rate": 1e-05, "loss": 0.9001, "step": 21440 }, { "epoch": 18.99468556244464, "grad_norm": 0.2961980998516083, "learning_rate": 1e-05, "loss": 1.0115, "step": 21445 }, { "epoch": 18.999114260407442, "grad_norm": 0.27768170833587646, "learning_rate": 1e-05, "loss": 0.9662, "step": 21450 }, { "epoch": 19.00354295837024, "grad_norm": 0.2616350054740906, "learning_rate": 1e-05, "loss": 0.9706, "step": 21455 }, { "epoch": 19.007971656333037, "grad_norm": 0.2388584166765213, "learning_rate": 1e-05, "loss": 0.9551, "step": 21460 }, { "epoch": 19.01240035429584, "grad_norm": 0.2257193773984909, "learning_rate": 1e-05, "loss": 1.0207, "step": 21465 }, { "epoch": 19.016829052258636, "grad_norm": 0.2784733176231384, "learning_rate": 1e-05, "loss": 0.9867, "step": 21470 }, { "epoch": 19.021257750221434, "grad_norm": 0.27685242891311646, "learning_rate": 1e-05, "loss": 0.9253, "step": 21475 }, { "epoch": 19.025686448184235, "grad_norm": 0.23150534927845, "learning_rate": 1e-05, "loss": 0.9496, "step": 21480 }, { "epoch": 19.030115146147033, "grad_norm": 0.34025201201438904, "learning_rate": 1e-05, "loss": 0.9961, "step": 21485 }, { "epoch": 19.03454384410983, "grad_norm": 0.26765719056129456, "learning_rate": 1e-05, "loss": 0.9331, "step": 21490 }, { "epoch": 19.03897254207263, "grad_norm": 0.23880422115325928, "learning_rate": 1e-05, "loss": 0.9624, "step": 21495 }, { "epoch": 19.04340124003543, "grad_norm": 0.38661840558052063, "learning_rate": 1e-05, "loss": 0.9955, "step": 21500 }, { "epoch": 19.047829937998227, "grad_norm": 0.3080753982067108, "learning_rate": 1e-05, "loss": 1.0451, "step": 21505 }, { "epoch": 19.052258635961028, "grad_norm": 0.30306461453437805, "learning_rate": 1e-05, "loss": 0.9511, "step": 21510 }, { "epoch": 19.056687333923826, "grad_norm": 0.297817200422287, "learning_rate": 1e-05, "loss": 0.9675, "step": 21515 }, { "epoch": 19.061116031886627, "grad_norm": 0.3119041621685028, "learning_rate": 1e-05, "loss": 1.0438, "step": 21520 }, { "epoch": 19.065544729849424, "grad_norm": 0.2956949770450592, "learning_rate": 1e-05, "loss": 1.006, "step": 21525 }, { "epoch": 19.069973427812222, "grad_norm": 0.27365535497665405, "learning_rate": 1e-05, "loss": 0.9774, "step": 21530 }, { "epoch": 19.074402125775023, "grad_norm": 0.2655046582221985, "learning_rate": 1e-05, "loss": 1.0085, "step": 21535 }, { "epoch": 19.07883082373782, "grad_norm": 0.23782466351985931, "learning_rate": 1e-05, "loss": 1.0021, "step": 21540 }, { "epoch": 19.08325952170062, "grad_norm": 0.2659986615180969, "learning_rate": 1e-05, "loss": 0.9682, "step": 21545 }, { "epoch": 19.08768821966342, "grad_norm": 0.2969227731227875, "learning_rate": 1e-05, "loss": 1.0613, "step": 21550 }, { "epoch": 19.092116917626218, "grad_norm": 0.3280172049999237, "learning_rate": 1e-05, "loss": 0.9477, "step": 21555 }, { "epoch": 19.096545615589015, "grad_norm": 0.2885426878929138, "learning_rate": 1e-05, "loss": 1.0096, "step": 21560 }, { "epoch": 19.100974313551816, "grad_norm": 0.22005954384803772, "learning_rate": 1e-05, "loss": 1.0181, "step": 21565 }, { "epoch": 19.105403011514614, "grad_norm": 0.20952272415161133, "learning_rate": 1e-05, "loss": 0.9689, "step": 21570 }, { "epoch": 19.109831709477415, "grad_norm": 0.23301991820335388, "learning_rate": 1e-05, "loss": 0.9847, "step": 21575 }, { "epoch": 19.114260407440213, "grad_norm": 0.2801075875759125, "learning_rate": 1e-05, "loss": 1.0011, "step": 21580 }, { "epoch": 19.11868910540301, "grad_norm": 0.23552002012729645, "learning_rate": 1e-05, "loss": 0.989, "step": 21585 }, { "epoch": 19.123117803365812, "grad_norm": 0.24808785319328308, "learning_rate": 1e-05, "loss": 1.0027, "step": 21590 }, { "epoch": 19.12754650132861, "grad_norm": 0.2408321350812912, "learning_rate": 1e-05, "loss": 0.997, "step": 21595 }, { "epoch": 19.131975199291407, "grad_norm": 0.2559404671192169, "learning_rate": 1e-05, "loss": 0.9901, "step": 21600 }, { "epoch": 19.13640389725421, "grad_norm": 0.26444193720817566, "learning_rate": 1e-05, "loss": 0.9858, "step": 21605 }, { "epoch": 19.140832595217006, "grad_norm": 0.2920604944229126, "learning_rate": 1e-05, "loss": 0.9634, "step": 21610 }, { "epoch": 19.145261293179804, "grad_norm": 0.263216495513916, "learning_rate": 1e-05, "loss": 0.9659, "step": 21615 }, { "epoch": 19.149689991142605, "grad_norm": 0.28683018684387207, "learning_rate": 1e-05, "loss": 0.9163, "step": 21620 }, { "epoch": 19.154118689105402, "grad_norm": 0.24363479018211365, "learning_rate": 1e-05, "loss": 0.9901, "step": 21625 }, { "epoch": 19.158547387068204, "grad_norm": 0.2464655339717865, "learning_rate": 1e-05, "loss": 0.9212, "step": 21630 }, { "epoch": 19.162976085031, "grad_norm": 0.2400534600019455, "learning_rate": 1e-05, "loss": 1.0197, "step": 21635 }, { "epoch": 19.1674047829938, "grad_norm": 0.24684584140777588, "learning_rate": 1e-05, "loss": 0.9961, "step": 21640 }, { "epoch": 19.1718334809566, "grad_norm": 0.2557712495326996, "learning_rate": 1e-05, "loss": 0.9881, "step": 21645 }, { "epoch": 19.176262178919398, "grad_norm": 0.25647491216659546, "learning_rate": 1e-05, "loss": 1.0043, "step": 21650 }, { "epoch": 19.180690876882196, "grad_norm": 0.3155398666858673, "learning_rate": 1e-05, "loss": 1.0053, "step": 21655 }, { "epoch": 19.185119574844997, "grad_norm": 0.2577785849571228, "learning_rate": 1e-05, "loss": 0.9907, "step": 21660 }, { "epoch": 19.189548272807794, "grad_norm": 0.21081209182739258, "learning_rate": 1e-05, "loss": 1.0082, "step": 21665 }, { "epoch": 19.193976970770592, "grad_norm": 0.21020257472991943, "learning_rate": 1e-05, "loss": 0.9921, "step": 21670 }, { "epoch": 19.198405668733393, "grad_norm": 0.2684951722621918, "learning_rate": 1e-05, "loss": 0.9797, "step": 21675 }, { "epoch": 19.20283436669619, "grad_norm": 0.22918616235256195, "learning_rate": 1e-05, "loss": 1.0302, "step": 21680 }, { "epoch": 19.20726306465899, "grad_norm": 0.23693285882472992, "learning_rate": 1e-05, "loss": 0.9999, "step": 21685 }, { "epoch": 19.21169176262179, "grad_norm": 0.259158194065094, "learning_rate": 1e-05, "loss": 0.9677, "step": 21690 }, { "epoch": 19.216120460584587, "grad_norm": 0.35630491375923157, "learning_rate": 1e-05, "loss": 0.9559, "step": 21695 }, { "epoch": 19.22054915854739, "grad_norm": 0.2685184180736542, "learning_rate": 1e-05, "loss": 0.9573, "step": 21700 }, { "epoch": 19.224977856510186, "grad_norm": 0.2263508439064026, "learning_rate": 1e-05, "loss": 1.0059, "step": 21705 }, { "epoch": 19.229406554472984, "grad_norm": 0.24197499454021454, "learning_rate": 1e-05, "loss": 1.0028, "step": 21710 }, { "epoch": 19.233835252435785, "grad_norm": 0.2383718341588974, "learning_rate": 1e-05, "loss": 0.9664, "step": 21715 }, { "epoch": 19.238263950398583, "grad_norm": 0.2473609894514084, "learning_rate": 1e-05, "loss": 0.989, "step": 21720 }, { "epoch": 19.24269264836138, "grad_norm": 0.2580435276031494, "learning_rate": 1e-05, "loss": 1.0205, "step": 21725 }, { "epoch": 19.24712134632418, "grad_norm": 0.2924043834209442, "learning_rate": 1e-05, "loss": 0.9705, "step": 21730 }, { "epoch": 19.25155004428698, "grad_norm": 0.23957420885562897, "learning_rate": 1e-05, "loss": 0.9642, "step": 21735 }, { "epoch": 19.255978742249777, "grad_norm": 0.2752174437046051, "learning_rate": 1e-05, "loss": 0.9598, "step": 21740 }, { "epoch": 19.260407440212578, "grad_norm": 0.3000291585922241, "learning_rate": 1e-05, "loss": 1.012, "step": 21745 }, { "epoch": 19.264836138175376, "grad_norm": 0.26889264583587646, "learning_rate": 1e-05, "loss": 1.0141, "step": 21750 }, { "epoch": 19.269264836138177, "grad_norm": 0.3213759660720825, "learning_rate": 1e-05, "loss": 1.0338, "step": 21755 }, { "epoch": 19.273693534100975, "grad_norm": 0.31049710512161255, "learning_rate": 1e-05, "loss": 1.0027, "step": 21760 }, { "epoch": 19.278122232063772, "grad_norm": 0.286528080701828, "learning_rate": 1e-05, "loss": 0.9876, "step": 21765 }, { "epoch": 19.282550930026574, "grad_norm": 0.22837868332862854, "learning_rate": 1e-05, "loss": 1.0083, "step": 21770 }, { "epoch": 19.28697962798937, "grad_norm": 0.27868813276290894, "learning_rate": 1e-05, "loss": 0.9787, "step": 21775 }, { "epoch": 19.29140832595217, "grad_norm": 0.22765976190567017, "learning_rate": 1e-05, "loss": 0.9319, "step": 21780 }, { "epoch": 19.29583702391497, "grad_norm": 0.2459191083908081, "learning_rate": 1e-05, "loss": 0.9805, "step": 21785 }, { "epoch": 19.300265721877768, "grad_norm": 0.2618340849876404, "learning_rate": 1e-05, "loss": 0.9429, "step": 21790 }, { "epoch": 19.304694419840565, "grad_norm": 0.24597235023975372, "learning_rate": 1e-05, "loss": 1.052, "step": 21795 }, { "epoch": 19.309123117803367, "grad_norm": 0.25850048661231995, "learning_rate": 1e-05, "loss": 1.0058, "step": 21800 }, { "epoch": 19.313551815766164, "grad_norm": 0.2148985117673874, "learning_rate": 1e-05, "loss": 0.9863, "step": 21805 }, { "epoch": 19.317980513728962, "grad_norm": 0.2388210892677307, "learning_rate": 1e-05, "loss": 0.972, "step": 21810 }, { "epoch": 19.322409211691763, "grad_norm": 0.22469930350780487, "learning_rate": 1e-05, "loss": 0.9615, "step": 21815 }, { "epoch": 19.32683790965456, "grad_norm": 0.29969337582588196, "learning_rate": 1e-05, "loss": 1.0124, "step": 21820 }, { "epoch": 19.331266607617362, "grad_norm": 0.2328653335571289, "learning_rate": 1e-05, "loss": 0.9907, "step": 21825 }, { "epoch": 19.33569530558016, "grad_norm": 0.2838694155216217, "learning_rate": 1e-05, "loss": 0.9705, "step": 21830 }, { "epoch": 19.340124003542957, "grad_norm": 0.2548776865005493, "learning_rate": 1e-05, "loss": 1.0015, "step": 21835 }, { "epoch": 19.34455270150576, "grad_norm": 0.2791413366794586, "learning_rate": 1e-05, "loss": 0.9285, "step": 21840 }, { "epoch": 19.348981399468556, "grad_norm": 0.21865776181221008, "learning_rate": 1e-05, "loss": 0.9759, "step": 21845 }, { "epoch": 19.353410097431354, "grad_norm": 0.31672561168670654, "learning_rate": 1e-05, "loss": 0.9695, "step": 21850 }, { "epoch": 19.357838795394155, "grad_norm": 0.27901971340179443, "learning_rate": 1e-05, "loss": 0.9658, "step": 21855 }, { "epoch": 19.362267493356953, "grad_norm": 0.2634013593196869, "learning_rate": 1e-05, "loss": 1.007, "step": 21860 }, { "epoch": 19.36669619131975, "grad_norm": 0.28205999732017517, "learning_rate": 1e-05, "loss": 0.9949, "step": 21865 }, { "epoch": 19.37112488928255, "grad_norm": 0.2697298526763916, "learning_rate": 1e-05, "loss": 0.9445, "step": 21870 }, { "epoch": 19.37555358724535, "grad_norm": 0.25014305114746094, "learning_rate": 1e-05, "loss": 1.0438, "step": 21875 }, { "epoch": 19.37998228520815, "grad_norm": 0.23310589790344238, "learning_rate": 1e-05, "loss": 0.9606, "step": 21880 }, { "epoch": 19.384410983170948, "grad_norm": 0.27912992238998413, "learning_rate": 1e-05, "loss": 0.988, "step": 21885 }, { "epoch": 19.388839681133746, "grad_norm": 0.3555620610713959, "learning_rate": 1e-05, "loss": 0.934, "step": 21890 }, { "epoch": 19.393268379096547, "grad_norm": 0.260863333940506, "learning_rate": 1e-05, "loss": 0.953, "step": 21895 }, { "epoch": 19.397697077059345, "grad_norm": 0.2534933388233185, "learning_rate": 1e-05, "loss": 1.027, "step": 21900 }, { "epoch": 19.402125775022142, "grad_norm": 0.2706056237220764, "learning_rate": 1e-05, "loss": 0.9572, "step": 21905 }, { "epoch": 19.406554472984944, "grad_norm": 0.2713649272918701, "learning_rate": 1e-05, "loss": 0.9662, "step": 21910 }, { "epoch": 19.41098317094774, "grad_norm": 0.3194676339626312, "learning_rate": 1e-05, "loss": 0.9962, "step": 21915 }, { "epoch": 19.41541186891054, "grad_norm": 0.2466733455657959, "learning_rate": 1e-05, "loss": 0.9969, "step": 21920 }, { "epoch": 19.41984056687334, "grad_norm": 0.20290613174438477, "learning_rate": 1e-05, "loss": 0.9503, "step": 21925 }, { "epoch": 19.424269264836138, "grad_norm": 0.202281191945076, "learning_rate": 1e-05, "loss": 1.0139, "step": 21930 }, { "epoch": 19.428697962798935, "grad_norm": 0.242069274187088, "learning_rate": 1e-05, "loss": 1.0148, "step": 21935 }, { "epoch": 19.433126660761737, "grad_norm": 0.3062588572502136, "learning_rate": 1e-05, "loss": 1.015, "step": 21940 }, { "epoch": 19.437555358724534, "grad_norm": 0.2583722472190857, "learning_rate": 1e-05, "loss": 0.9844, "step": 21945 }, { "epoch": 19.441984056687335, "grad_norm": 0.2504075765609741, "learning_rate": 1e-05, "loss": 0.994, "step": 21950 }, { "epoch": 19.446412754650133, "grad_norm": 0.2103910595178604, "learning_rate": 1e-05, "loss": 0.9841, "step": 21955 }, { "epoch": 19.45084145261293, "grad_norm": 0.2799288034439087, "learning_rate": 1e-05, "loss": 0.9882, "step": 21960 }, { "epoch": 19.455270150575732, "grad_norm": 0.2419409602880478, "learning_rate": 1e-05, "loss": 0.9675, "step": 21965 }, { "epoch": 19.45969884853853, "grad_norm": 0.26130715012550354, "learning_rate": 1e-05, "loss": 0.99, "step": 21970 }, { "epoch": 19.464127546501327, "grad_norm": 0.23237186670303345, "learning_rate": 1e-05, "loss": 0.9856, "step": 21975 }, { "epoch": 19.46855624446413, "grad_norm": 0.26077741384506226, "learning_rate": 1e-05, "loss": 0.998, "step": 21980 }, { "epoch": 19.472984942426926, "grad_norm": 0.2457740753889084, "learning_rate": 1e-05, "loss": 1.021, "step": 21985 }, { "epoch": 19.477413640389724, "grad_norm": 0.28531643748283386, "learning_rate": 1e-05, "loss": 1.0018, "step": 21990 }, { "epoch": 19.481842338352525, "grad_norm": 0.24584434926509857, "learning_rate": 1e-05, "loss": 0.9693, "step": 21995 }, { "epoch": 19.486271036315323, "grad_norm": 0.2677319347858429, "learning_rate": 1e-05, "loss": 1.0086, "step": 22000 }, { "epoch": 19.490699734278124, "grad_norm": 0.25762227177619934, "learning_rate": 1e-05, "loss": 0.9374, "step": 22005 }, { "epoch": 19.49512843224092, "grad_norm": 0.2576358914375305, "learning_rate": 1e-05, "loss": 1.0578, "step": 22010 }, { "epoch": 19.49955713020372, "grad_norm": 0.2575262784957886, "learning_rate": 1e-05, "loss": 1.0425, "step": 22015 }, { "epoch": 19.50398582816652, "grad_norm": 0.2733260691165924, "learning_rate": 1e-05, "loss": 0.94, "step": 22020 }, { "epoch": 19.508414526129318, "grad_norm": 0.24955831468105316, "learning_rate": 1e-05, "loss": 0.9857, "step": 22025 }, { "epoch": 19.512843224092116, "grad_norm": 0.23731659352779388, "learning_rate": 1e-05, "loss": 0.9893, "step": 22030 }, { "epoch": 19.517271922054917, "grad_norm": 0.27653467655181885, "learning_rate": 1e-05, "loss": 0.9923, "step": 22035 }, { "epoch": 19.521700620017715, "grad_norm": 0.2617150545120239, "learning_rate": 1e-05, "loss": 1.0029, "step": 22040 }, { "epoch": 19.526129317980512, "grad_norm": 0.26434868574142456, "learning_rate": 1e-05, "loss": 1.0002, "step": 22045 }, { "epoch": 19.530558015943313, "grad_norm": 0.26113566756248474, "learning_rate": 1e-05, "loss": 0.9801, "step": 22050 }, { "epoch": 19.53498671390611, "grad_norm": 0.2489299476146698, "learning_rate": 1e-05, "loss": 1.0568, "step": 22055 }, { "epoch": 19.53941541186891, "grad_norm": 0.23893140256404877, "learning_rate": 1e-05, "loss": 0.988, "step": 22060 }, { "epoch": 19.54384410983171, "grad_norm": 0.2802172601222992, "learning_rate": 1e-05, "loss": 1.0, "step": 22065 }, { "epoch": 19.548272807794508, "grad_norm": 0.28759679198265076, "learning_rate": 1e-05, "loss": 0.9752, "step": 22070 }, { "epoch": 19.55270150575731, "grad_norm": 0.2095140516757965, "learning_rate": 1e-05, "loss": 0.9591, "step": 22075 }, { "epoch": 19.557130203720106, "grad_norm": 0.2768591642379761, "learning_rate": 1e-05, "loss": 0.9045, "step": 22080 }, { "epoch": 19.561558901682904, "grad_norm": 0.21753664314746857, "learning_rate": 1e-05, "loss": 0.9647, "step": 22085 }, { "epoch": 19.565987599645705, "grad_norm": 0.22399666905403137, "learning_rate": 1e-05, "loss": 0.9925, "step": 22090 }, { "epoch": 19.570416297608503, "grad_norm": 0.25039008259773254, "learning_rate": 1e-05, "loss": 0.9486, "step": 22095 }, { "epoch": 19.5748449955713, "grad_norm": 0.2238444983959198, "learning_rate": 1e-05, "loss": 0.9865, "step": 22100 }, { "epoch": 19.579273693534102, "grad_norm": 0.27676069736480713, "learning_rate": 1e-05, "loss": 0.9326, "step": 22105 }, { "epoch": 19.5837023914969, "grad_norm": 0.26960572600364685, "learning_rate": 1e-05, "loss": 1.0351, "step": 22110 }, { "epoch": 19.588131089459697, "grad_norm": 0.2727920711040497, "learning_rate": 1e-05, "loss": 0.9477, "step": 22115 }, { "epoch": 19.5925597874225, "grad_norm": 0.2702040374279022, "learning_rate": 1e-05, "loss": 1.0106, "step": 22120 }, { "epoch": 19.596988485385296, "grad_norm": 0.31197258830070496, "learning_rate": 1e-05, "loss": 0.938, "step": 22125 }, { "epoch": 19.601417183348097, "grad_norm": 0.2902476489543915, "learning_rate": 1e-05, "loss": 0.9859, "step": 22130 }, { "epoch": 19.605845881310895, "grad_norm": 0.2764741778373718, "learning_rate": 1e-05, "loss": 0.9958, "step": 22135 }, { "epoch": 19.610274579273693, "grad_norm": 0.2726614773273468, "learning_rate": 1e-05, "loss": 1.0131, "step": 22140 }, { "epoch": 19.614703277236494, "grad_norm": 0.24448367953300476, "learning_rate": 1e-05, "loss": 0.9723, "step": 22145 }, { "epoch": 19.61913197519929, "grad_norm": 0.24003101885318756, "learning_rate": 1e-05, "loss": 1.0143, "step": 22150 }, { "epoch": 19.62356067316209, "grad_norm": 0.21956351399421692, "learning_rate": 1e-05, "loss": 0.9729, "step": 22155 }, { "epoch": 19.62798937112489, "grad_norm": 0.2665596902370453, "learning_rate": 1e-05, "loss": 0.9851, "step": 22160 }, { "epoch": 19.632418069087688, "grad_norm": 0.2401365339756012, "learning_rate": 1e-05, "loss": 0.9925, "step": 22165 }, { "epoch": 19.636846767050486, "grad_norm": 0.23235240578651428, "learning_rate": 1e-05, "loss": 1.0517, "step": 22170 }, { "epoch": 19.641275465013287, "grad_norm": 0.2293304055929184, "learning_rate": 1e-05, "loss": 1.0054, "step": 22175 }, { "epoch": 19.645704162976084, "grad_norm": 0.2629548907279968, "learning_rate": 1e-05, "loss": 1.0645, "step": 22180 }, { "epoch": 19.650132860938886, "grad_norm": 0.27476292848587036, "learning_rate": 1e-05, "loss": 0.9586, "step": 22185 }, { "epoch": 19.654561558901683, "grad_norm": 0.23981112241744995, "learning_rate": 1e-05, "loss": 0.9909, "step": 22190 }, { "epoch": 19.65899025686448, "grad_norm": 0.2123728096485138, "learning_rate": 1e-05, "loss": 0.9736, "step": 22195 }, { "epoch": 19.663418954827282, "grad_norm": 0.25020381808280945, "learning_rate": 1e-05, "loss": 1.008, "step": 22200 }, { "epoch": 19.66784765279008, "grad_norm": 0.25911882519721985, "learning_rate": 1e-05, "loss": 0.9677, "step": 22205 }, { "epoch": 19.672276350752878, "grad_norm": 0.2560748755931854, "learning_rate": 1e-05, "loss": 1.001, "step": 22210 }, { "epoch": 19.67670504871568, "grad_norm": 0.2542824447154999, "learning_rate": 1e-05, "loss": 1.0031, "step": 22215 }, { "epoch": 19.681133746678476, "grad_norm": 0.24735695123672485, "learning_rate": 1e-05, "loss": 0.9911, "step": 22220 }, { "epoch": 19.685562444641274, "grad_norm": 0.2450762242078781, "learning_rate": 1e-05, "loss": 0.9999, "step": 22225 }, { "epoch": 19.689991142604075, "grad_norm": 0.2648891806602478, "learning_rate": 1e-05, "loss": 1.0001, "step": 22230 }, { "epoch": 19.694419840566873, "grad_norm": 0.26009857654571533, "learning_rate": 1e-05, "loss": 1.0316, "step": 22235 }, { "epoch": 19.698848538529674, "grad_norm": 0.2615038752555847, "learning_rate": 1e-05, "loss": 1.0206, "step": 22240 }, { "epoch": 19.70327723649247, "grad_norm": 0.2833743393421173, "learning_rate": 1e-05, "loss": 1.0179, "step": 22245 }, { "epoch": 19.70770593445527, "grad_norm": 0.2354872077703476, "learning_rate": 1e-05, "loss": 0.9969, "step": 22250 }, { "epoch": 19.71213463241807, "grad_norm": 0.24126538634300232, "learning_rate": 1e-05, "loss": 0.9922, "step": 22255 }, { "epoch": 19.71656333038087, "grad_norm": 0.22607719898223877, "learning_rate": 1e-05, "loss": 1.0269, "step": 22260 }, { "epoch": 19.720992028343666, "grad_norm": 0.27492403984069824, "learning_rate": 1e-05, "loss": 1.0021, "step": 22265 }, { "epoch": 19.725420726306467, "grad_norm": 0.2301681786775589, "learning_rate": 1e-05, "loss": 0.9731, "step": 22270 }, { "epoch": 19.729849424269265, "grad_norm": 0.26673516631126404, "learning_rate": 1e-05, "loss": 1.0001, "step": 22275 }, { "epoch": 19.734278122232062, "grad_norm": 0.2584981620311737, "learning_rate": 1e-05, "loss": 0.9546, "step": 22280 }, { "epoch": 19.738706820194864, "grad_norm": 0.27978476881980896, "learning_rate": 1e-05, "loss": 0.965, "step": 22285 }, { "epoch": 19.74313551815766, "grad_norm": 0.2619239091873169, "learning_rate": 1e-05, "loss": 0.9827, "step": 22290 }, { "epoch": 19.74756421612046, "grad_norm": 0.2218603789806366, "learning_rate": 1e-05, "loss": 1.0468, "step": 22295 }, { "epoch": 19.75199291408326, "grad_norm": 0.27625197172164917, "learning_rate": 1e-05, "loss": 0.9587, "step": 22300 }, { "epoch": 19.756421612046058, "grad_norm": 0.2624746561050415, "learning_rate": 1e-05, "loss": 0.9786, "step": 22305 }, { "epoch": 19.76085031000886, "grad_norm": 0.24015900492668152, "learning_rate": 1e-05, "loss": 1.0134, "step": 22310 }, { "epoch": 19.765279007971657, "grad_norm": 0.2397843301296234, "learning_rate": 1e-05, "loss": 1.0216, "step": 22315 }, { "epoch": 19.769707705934454, "grad_norm": 0.26035407185554504, "learning_rate": 1e-05, "loss": 0.9638, "step": 22320 }, { "epoch": 19.774136403897256, "grad_norm": 0.22754158079624176, "learning_rate": 1e-05, "loss": 0.993, "step": 22325 }, { "epoch": 19.778565101860053, "grad_norm": 0.23876163363456726, "learning_rate": 1e-05, "loss": 0.9428, "step": 22330 }, { "epoch": 19.78299379982285, "grad_norm": 0.23701980710029602, "learning_rate": 1e-05, "loss": 1.0421, "step": 22335 }, { "epoch": 19.787422497785652, "grad_norm": 0.24347496032714844, "learning_rate": 1e-05, "loss": 1.0023, "step": 22340 }, { "epoch": 19.79185119574845, "grad_norm": 0.2831503748893738, "learning_rate": 1e-05, "loss": 1.0243, "step": 22345 }, { "epoch": 19.796279893711247, "grad_norm": 0.2691107988357544, "learning_rate": 1e-05, "loss": 0.9583, "step": 22350 }, { "epoch": 19.80070859167405, "grad_norm": 0.274152934551239, "learning_rate": 1e-05, "loss": 1.004, "step": 22355 }, { "epoch": 19.805137289636846, "grad_norm": 0.2526584267616272, "learning_rate": 1e-05, "loss": 0.9277, "step": 22360 }, { "epoch": 19.809565987599647, "grad_norm": 0.2391812801361084, "learning_rate": 1e-05, "loss": 1.0368, "step": 22365 }, { "epoch": 19.813994685562445, "grad_norm": 0.22304296493530273, "learning_rate": 1e-05, "loss": 1.0165, "step": 22370 }, { "epoch": 19.818423383525243, "grad_norm": 0.2387648969888687, "learning_rate": 1e-05, "loss": 0.9882, "step": 22375 }, { "epoch": 19.822852081488044, "grad_norm": 0.29134365916252136, "learning_rate": 1e-05, "loss": 0.9295, "step": 22380 }, { "epoch": 19.82728077945084, "grad_norm": 0.25216689705848694, "learning_rate": 1e-05, "loss": 0.9912, "step": 22385 }, { "epoch": 19.83170947741364, "grad_norm": 0.2788964807987213, "learning_rate": 1e-05, "loss": 0.9723, "step": 22390 }, { "epoch": 19.83613817537644, "grad_norm": 0.24531854689121246, "learning_rate": 1e-05, "loss": 0.9611, "step": 22395 }, { "epoch": 19.840566873339238, "grad_norm": 0.23592737317085266, "learning_rate": 1e-05, "loss": 0.9813, "step": 22400 }, { "epoch": 19.844995571302036, "grad_norm": 0.26679787039756775, "learning_rate": 1e-05, "loss": 0.9735, "step": 22405 }, { "epoch": 19.849424269264837, "grad_norm": 0.24392372369766235, "learning_rate": 1e-05, "loss": 1.0019, "step": 22410 }, { "epoch": 19.853852967227635, "grad_norm": 0.34096843004226685, "learning_rate": 1e-05, "loss": 1.0165, "step": 22415 }, { "epoch": 19.858281665190432, "grad_norm": 0.3159085214138031, "learning_rate": 1e-05, "loss": 0.9956, "step": 22420 }, { "epoch": 19.862710363153234, "grad_norm": 0.3020704984664917, "learning_rate": 1e-05, "loss": 1.0173, "step": 22425 }, { "epoch": 19.86713906111603, "grad_norm": 0.23647166788578033, "learning_rate": 1e-05, "loss": 1.0167, "step": 22430 }, { "epoch": 19.871567759078832, "grad_norm": 0.2795269191265106, "learning_rate": 1e-05, "loss": 0.9935, "step": 22435 }, { "epoch": 19.87599645704163, "grad_norm": 0.22462648153305054, "learning_rate": 1e-05, "loss": 0.9739, "step": 22440 }, { "epoch": 19.880425155004428, "grad_norm": 0.2533538341522217, "learning_rate": 1e-05, "loss": 0.9827, "step": 22445 }, { "epoch": 19.88485385296723, "grad_norm": 0.24216006696224213, "learning_rate": 1e-05, "loss": 0.9812, "step": 22450 }, { "epoch": 19.889282550930027, "grad_norm": 0.2674252986907959, "learning_rate": 1e-05, "loss": 0.9765, "step": 22455 }, { "epoch": 19.893711248892824, "grad_norm": 0.2583083212375641, "learning_rate": 1e-05, "loss": 0.9809, "step": 22460 }, { "epoch": 19.898139946855625, "grad_norm": 0.21754281222820282, "learning_rate": 1e-05, "loss": 0.956, "step": 22465 }, { "epoch": 19.902568644818423, "grad_norm": 0.22411461174488068, "learning_rate": 1e-05, "loss": 0.993, "step": 22470 }, { "epoch": 19.90699734278122, "grad_norm": 0.28816187381744385, "learning_rate": 1e-05, "loss": 0.9925, "step": 22475 }, { "epoch": 19.911426040744022, "grad_norm": 0.2748092710971832, "learning_rate": 1e-05, "loss": 0.9788, "step": 22480 }, { "epoch": 19.91585473870682, "grad_norm": 0.3295396566390991, "learning_rate": 1e-05, "loss": 1.0708, "step": 22485 }, { "epoch": 19.92028343666962, "grad_norm": 0.36053869128227234, "learning_rate": 1e-05, "loss": 0.9766, "step": 22490 }, { "epoch": 19.92471213463242, "grad_norm": 0.2501281499862671, "learning_rate": 1e-05, "loss": 0.9599, "step": 22495 }, { "epoch": 19.929140832595216, "grad_norm": 0.2649358808994293, "learning_rate": 1e-05, "loss": 0.9673, "step": 22500 }, { "epoch": 19.933569530558017, "grad_norm": 0.2116309404373169, "learning_rate": 1e-05, "loss": 0.9998, "step": 22505 }, { "epoch": 19.937998228520815, "grad_norm": 0.23288613557815552, "learning_rate": 1e-05, "loss": 0.9364, "step": 22510 }, { "epoch": 19.942426926483613, "grad_norm": 0.20370088517665863, "learning_rate": 1e-05, "loss": 1.0019, "step": 22515 }, { "epoch": 19.946855624446414, "grad_norm": 0.24225036799907684, "learning_rate": 1e-05, "loss": 0.959, "step": 22520 }, { "epoch": 19.95128432240921, "grad_norm": 0.2097589075565338, "learning_rate": 1e-05, "loss": 1.0129, "step": 22525 }, { "epoch": 19.95571302037201, "grad_norm": 0.22259247303009033, "learning_rate": 1e-05, "loss": 0.9962, "step": 22530 }, { "epoch": 19.96014171833481, "grad_norm": 0.27597904205322266, "learning_rate": 1e-05, "loss": 0.9852, "step": 22535 }, { "epoch": 19.964570416297608, "grad_norm": 0.23908229172229767, "learning_rate": 1e-05, "loss": 1.0241, "step": 22540 }, { "epoch": 19.968999114260406, "grad_norm": 0.2578098177909851, "learning_rate": 1e-05, "loss": 1.034, "step": 22545 }, { "epoch": 19.973427812223207, "grad_norm": 0.2551516592502594, "learning_rate": 1e-05, "loss": 0.9474, "step": 22550 }, { "epoch": 19.977856510186005, "grad_norm": 0.2383735626935959, "learning_rate": 1e-05, "loss": 0.9271, "step": 22555 }, { "epoch": 19.982285208148806, "grad_norm": 0.2537039816379547, "learning_rate": 1e-05, "loss": 1.0289, "step": 22560 }, { "epoch": 19.986713906111603, "grad_norm": 0.25406530499458313, "learning_rate": 1e-05, "loss": 0.9878, "step": 22565 }, { "epoch": 19.9911426040744, "grad_norm": 0.2307620644569397, "learning_rate": 1e-05, "loss": 1.0313, "step": 22570 }, { "epoch": 19.995571302037202, "grad_norm": 0.24770940840244293, "learning_rate": 1e-05, "loss": 0.9836, "step": 22575 }, { "epoch": 20.0, "grad_norm": 0.27704715728759766, "learning_rate": 1e-05, "loss": 1.0308, "step": 22580 }, { "epoch": 20.004428697962798, "grad_norm": 0.30626973509788513, "learning_rate": 1e-05, "loss": 0.9759, "step": 22585 }, { "epoch": 20.0088573959256, "grad_norm": 0.2711142897605896, "learning_rate": 1e-05, "loss": 0.9969, "step": 22590 }, { "epoch": 20.013286093888397, "grad_norm": 0.23982557654380798, "learning_rate": 1e-05, "loss": 0.9637, "step": 22595 }, { "epoch": 20.017714791851194, "grad_norm": 0.2538910508155823, "learning_rate": 1e-05, "loss": 1.0194, "step": 22600 }, { "epoch": 20.022143489813995, "grad_norm": 0.23520027101039886, "learning_rate": 1e-05, "loss": 0.922, "step": 22605 }, { "epoch": 20.026572187776793, "grad_norm": 0.24669069051742554, "learning_rate": 1e-05, "loss": 1.041, "step": 22610 }, { "epoch": 20.031000885739594, "grad_norm": 0.28411203622817993, "learning_rate": 1e-05, "loss": 0.9851, "step": 22615 }, { "epoch": 20.035429583702392, "grad_norm": 0.22540497779846191, "learning_rate": 1e-05, "loss": 0.9806, "step": 22620 }, { "epoch": 20.03985828166519, "grad_norm": 0.2427888959646225, "learning_rate": 1e-05, "loss": 0.9898, "step": 22625 }, { "epoch": 20.04428697962799, "grad_norm": 0.2336624711751938, "learning_rate": 1e-05, "loss": 0.9875, "step": 22630 }, { "epoch": 20.04871567759079, "grad_norm": 0.2394520789384842, "learning_rate": 1e-05, "loss": 0.9791, "step": 22635 }, { "epoch": 20.053144375553586, "grad_norm": 0.28735679388046265, "learning_rate": 1e-05, "loss": 1.0102, "step": 22640 }, { "epoch": 20.057573073516387, "grad_norm": 0.24385744333267212, "learning_rate": 1e-05, "loss": 1.0015, "step": 22645 }, { "epoch": 20.062001771479185, "grad_norm": 0.22770138084888458, "learning_rate": 1e-05, "loss": 0.9505, "step": 22650 }, { "epoch": 20.066430469441983, "grad_norm": 0.2785815894603729, "learning_rate": 1e-05, "loss": 0.9946, "step": 22655 }, { "epoch": 20.070859167404784, "grad_norm": 0.3103032410144806, "learning_rate": 1e-05, "loss": 1.0031, "step": 22660 }, { "epoch": 20.07528786536758, "grad_norm": 0.24815575778484344, "learning_rate": 1e-05, "loss": 1.0243, "step": 22665 }, { "epoch": 20.07971656333038, "grad_norm": 0.24150699377059937, "learning_rate": 1e-05, "loss": 1.0175, "step": 22670 }, { "epoch": 20.08414526129318, "grad_norm": 0.29132309556007385, "learning_rate": 1e-05, "loss": 1.0414, "step": 22675 }, { "epoch": 20.088573959255978, "grad_norm": 0.300889253616333, "learning_rate": 1e-05, "loss": 0.972, "step": 22680 }, { "epoch": 20.09300265721878, "grad_norm": 0.27988967299461365, "learning_rate": 1e-05, "loss": 1.0309, "step": 22685 }, { "epoch": 20.097431355181577, "grad_norm": 0.27934205532073975, "learning_rate": 1e-05, "loss": 0.9862, "step": 22690 }, { "epoch": 20.101860053144375, "grad_norm": 0.25654634833335876, "learning_rate": 1e-05, "loss": 1.0359, "step": 22695 }, { "epoch": 20.106288751107176, "grad_norm": 0.24366258084774017, "learning_rate": 1e-05, "loss": 0.9925, "step": 22700 }, { "epoch": 20.110717449069973, "grad_norm": 0.2615424692630768, "learning_rate": 1e-05, "loss": 0.9466, "step": 22705 }, { "epoch": 20.11514614703277, "grad_norm": 0.2076205313205719, "learning_rate": 1e-05, "loss": 0.9724, "step": 22710 }, { "epoch": 20.119574844995572, "grad_norm": 0.2148674577474594, "learning_rate": 1e-05, "loss": 1.0405, "step": 22715 }, { "epoch": 20.12400354295837, "grad_norm": 0.2351323664188385, "learning_rate": 1e-05, "loss": 1.0314, "step": 22720 }, { "epoch": 20.128432240921168, "grad_norm": 0.21035417914390564, "learning_rate": 1e-05, "loss": 0.9757, "step": 22725 }, { "epoch": 20.13286093888397, "grad_norm": 0.33949536085128784, "learning_rate": 1e-05, "loss": 0.9975, "step": 22730 }, { "epoch": 20.137289636846766, "grad_norm": 0.3018169701099396, "learning_rate": 1e-05, "loss": 0.9927, "step": 22735 }, { "epoch": 20.141718334809568, "grad_norm": 0.3865547180175781, "learning_rate": 1e-05, "loss": 0.9438, "step": 22740 }, { "epoch": 20.146147032772365, "grad_norm": 0.32810071110725403, "learning_rate": 1e-05, "loss": 1.0053, "step": 22745 }, { "epoch": 20.150575730735163, "grad_norm": 0.24213823676109314, "learning_rate": 1e-05, "loss": 0.9656, "step": 22750 }, { "epoch": 20.155004428697964, "grad_norm": 0.2582855820655823, "learning_rate": 1e-05, "loss": 0.9725, "step": 22755 }, { "epoch": 20.159433126660762, "grad_norm": 0.26272791624069214, "learning_rate": 1e-05, "loss": 0.9589, "step": 22760 }, { "epoch": 20.16386182462356, "grad_norm": 0.25247475504875183, "learning_rate": 1e-05, "loss": 0.9378, "step": 22765 }, { "epoch": 20.16829052258636, "grad_norm": 0.3538379669189453, "learning_rate": 1e-05, "loss": 1.0009, "step": 22770 }, { "epoch": 20.17271922054916, "grad_norm": 0.2905731201171875, "learning_rate": 1e-05, "loss": 0.9972, "step": 22775 }, { "epoch": 20.177147918511956, "grad_norm": 0.29343536496162415, "learning_rate": 1e-05, "loss": 1.0399, "step": 22780 }, { "epoch": 20.181576616474757, "grad_norm": 0.2544710636138916, "learning_rate": 1e-05, "loss": 1.0339, "step": 22785 }, { "epoch": 20.186005314437555, "grad_norm": 0.22626975178718567, "learning_rate": 1e-05, "loss": 0.9349, "step": 22790 }, { "epoch": 20.190434012400353, "grad_norm": 0.25439634919166565, "learning_rate": 1e-05, "loss": 0.9422, "step": 22795 }, { "epoch": 20.194862710363154, "grad_norm": 0.2761237919330597, "learning_rate": 1e-05, "loss": 0.9862, "step": 22800 }, { "epoch": 20.19929140832595, "grad_norm": 0.24387550354003906, "learning_rate": 1e-05, "loss": 1.0011, "step": 22805 }, { "epoch": 20.203720106288753, "grad_norm": 0.23981715738773346, "learning_rate": 1e-05, "loss": 0.9831, "step": 22810 }, { "epoch": 20.20814880425155, "grad_norm": 0.24644440412521362, "learning_rate": 1e-05, "loss": 0.9949, "step": 22815 }, { "epoch": 20.212577502214348, "grad_norm": 0.249264657497406, "learning_rate": 1e-05, "loss": 0.945, "step": 22820 }, { "epoch": 20.21700620017715, "grad_norm": 0.2768544852733612, "learning_rate": 1e-05, "loss": 1.0007, "step": 22825 }, { "epoch": 20.221434898139947, "grad_norm": 0.29739975929260254, "learning_rate": 1e-05, "loss": 0.9579, "step": 22830 }, { "epoch": 20.225863596102744, "grad_norm": 0.2584186792373657, "learning_rate": 1e-05, "loss": 1.0362, "step": 22835 }, { "epoch": 20.230292294065546, "grad_norm": 0.362277626991272, "learning_rate": 1e-05, "loss": 0.9582, "step": 22840 }, { "epoch": 20.234720992028343, "grad_norm": 0.2594701051712036, "learning_rate": 1e-05, "loss": 0.9922, "step": 22845 }, { "epoch": 20.23914968999114, "grad_norm": 0.2504386305809021, "learning_rate": 1e-05, "loss": 0.9867, "step": 22850 }, { "epoch": 20.243578387953942, "grad_norm": 0.21802587807178497, "learning_rate": 1e-05, "loss": 0.9868, "step": 22855 }, { "epoch": 20.24800708591674, "grad_norm": 0.20525391399860382, "learning_rate": 1e-05, "loss": 0.9498, "step": 22860 }, { "epoch": 20.25243578387954, "grad_norm": 0.2622871994972229, "learning_rate": 1e-05, "loss": 0.986, "step": 22865 }, { "epoch": 20.25686448184234, "grad_norm": 0.2646791636943817, "learning_rate": 1e-05, "loss": 0.9534, "step": 22870 }, { "epoch": 20.261293179805136, "grad_norm": 0.26397275924682617, "learning_rate": 1e-05, "loss": 0.9959, "step": 22875 }, { "epoch": 20.265721877767938, "grad_norm": 0.3050857186317444, "learning_rate": 1e-05, "loss": 0.9699, "step": 22880 }, { "epoch": 20.270150575730735, "grad_norm": 0.3151940405368805, "learning_rate": 1e-05, "loss": 0.9618, "step": 22885 }, { "epoch": 20.274579273693533, "grad_norm": 0.24490241706371307, "learning_rate": 1e-05, "loss": 0.9892, "step": 22890 }, { "epoch": 20.279007971656334, "grad_norm": 0.28745564818382263, "learning_rate": 1e-05, "loss": 0.9957, "step": 22895 }, { "epoch": 20.28343666961913, "grad_norm": 0.2804032862186432, "learning_rate": 1e-05, "loss": 0.967, "step": 22900 }, { "epoch": 20.28786536758193, "grad_norm": 0.2028869390487671, "learning_rate": 1e-05, "loss": 0.9958, "step": 22905 }, { "epoch": 20.29229406554473, "grad_norm": 0.21999183297157288, "learning_rate": 1e-05, "loss": 1.012, "step": 22910 }, { "epoch": 20.29672276350753, "grad_norm": 0.29831916093826294, "learning_rate": 1e-05, "loss": 0.996, "step": 22915 }, { "epoch": 20.30115146147033, "grad_norm": 0.3363560438156128, "learning_rate": 1e-05, "loss": 0.9767, "step": 22920 }, { "epoch": 20.305580159433127, "grad_norm": 0.24571125209331512, "learning_rate": 1e-05, "loss": 1.0114, "step": 22925 }, { "epoch": 20.310008857395925, "grad_norm": 0.2533973455429077, "learning_rate": 1e-05, "loss": 0.9814, "step": 22930 }, { "epoch": 20.314437555358726, "grad_norm": 0.2748803198337555, "learning_rate": 1e-05, "loss": 0.9845, "step": 22935 }, { "epoch": 20.318866253321524, "grad_norm": 0.31570225954055786, "learning_rate": 1e-05, "loss": 1.0156, "step": 22940 }, { "epoch": 20.32329495128432, "grad_norm": 0.2510981559753418, "learning_rate": 1e-05, "loss": 0.9643, "step": 22945 }, { "epoch": 20.327723649247122, "grad_norm": 0.28418129682540894, "learning_rate": 1e-05, "loss": 1.0448, "step": 22950 }, { "epoch": 20.33215234720992, "grad_norm": 0.26578933000564575, "learning_rate": 1e-05, "loss": 1.0242, "step": 22955 }, { "epoch": 20.336581045172718, "grad_norm": 0.2867553234100342, "learning_rate": 1e-05, "loss": 1.0326, "step": 22960 }, { "epoch": 20.34100974313552, "grad_norm": 0.27747517824172974, "learning_rate": 1e-05, "loss": 1.0179, "step": 22965 }, { "epoch": 20.345438441098317, "grad_norm": 0.2678050398826599, "learning_rate": 1e-05, "loss": 1.0133, "step": 22970 }, { "epoch": 20.349867139061114, "grad_norm": 0.263844758272171, "learning_rate": 1e-05, "loss": 0.9416, "step": 22975 }, { "epoch": 20.354295837023916, "grad_norm": 0.2709397077560425, "learning_rate": 1e-05, "loss": 0.9811, "step": 22980 }, { "epoch": 20.358724534986713, "grad_norm": 0.26302099227905273, "learning_rate": 1e-05, "loss": 0.989, "step": 22985 }, { "epoch": 20.363153232949514, "grad_norm": 0.22915199398994446, "learning_rate": 1e-05, "loss": 1.0067, "step": 22990 }, { "epoch": 20.367581930912312, "grad_norm": 0.2734391987323761, "learning_rate": 1e-05, "loss": 0.9705, "step": 22995 }, { "epoch": 20.37201062887511, "grad_norm": 0.23513121902942657, "learning_rate": 1e-05, "loss": 0.9977, "step": 23000 }, { "epoch": 20.37643932683791, "grad_norm": 0.2585228979587555, "learning_rate": 1e-05, "loss": 0.99, "step": 23005 }, { "epoch": 20.38086802480071, "grad_norm": 0.3046770691871643, "learning_rate": 1e-05, "loss": 1.0073, "step": 23010 }, { "epoch": 20.385296722763506, "grad_norm": 0.31108105182647705, "learning_rate": 1e-05, "loss": 0.9977, "step": 23015 }, { "epoch": 20.389725420726307, "grad_norm": 0.2511794865131378, "learning_rate": 1e-05, "loss": 1.0177, "step": 23020 }, { "epoch": 20.394154118689105, "grad_norm": 0.27853289246559143, "learning_rate": 1e-05, "loss": 0.9682, "step": 23025 }, { "epoch": 20.398582816651903, "grad_norm": 0.25214579701423645, "learning_rate": 1e-05, "loss": 0.983, "step": 23030 }, { "epoch": 20.403011514614704, "grad_norm": 0.25975358486175537, "learning_rate": 1e-05, "loss": 1.0212, "step": 23035 }, { "epoch": 20.4074402125775, "grad_norm": 0.26760604977607727, "learning_rate": 1e-05, "loss": 0.9523, "step": 23040 }, { "epoch": 20.411868910540303, "grad_norm": 0.2410479485988617, "learning_rate": 1e-05, "loss": 0.9824, "step": 23045 }, { "epoch": 20.4162976085031, "grad_norm": 0.26017051935195923, "learning_rate": 1e-05, "loss": 0.9916, "step": 23050 }, { "epoch": 20.420726306465898, "grad_norm": 0.25673961639404297, "learning_rate": 1e-05, "loss": 1.0041, "step": 23055 }, { "epoch": 20.4251550044287, "grad_norm": 0.22346404194831848, "learning_rate": 1e-05, "loss": 0.9748, "step": 23060 }, { "epoch": 20.429583702391497, "grad_norm": 0.23063363134860992, "learning_rate": 1e-05, "loss": 0.9253, "step": 23065 }, { "epoch": 20.434012400354295, "grad_norm": 0.2615554630756378, "learning_rate": 1e-05, "loss": 0.9731, "step": 23070 }, { "epoch": 20.438441098317096, "grad_norm": 0.2427557408809662, "learning_rate": 1e-05, "loss": 1.0271, "step": 23075 }, { "epoch": 20.442869796279894, "grad_norm": 0.23984217643737793, "learning_rate": 1e-05, "loss": 0.9349, "step": 23080 }, { "epoch": 20.44729849424269, "grad_norm": 0.24669641256332397, "learning_rate": 1e-05, "loss": 0.9358, "step": 23085 }, { "epoch": 20.451727192205492, "grad_norm": 0.2829103469848633, "learning_rate": 1e-05, "loss": 1.016, "step": 23090 }, { "epoch": 20.45615589016829, "grad_norm": 0.2261546105146408, "learning_rate": 1e-05, "loss": 0.9323, "step": 23095 }, { "epoch": 20.460584588131088, "grad_norm": 0.26145729422569275, "learning_rate": 1e-05, "loss": 0.9895, "step": 23100 }, { "epoch": 20.46501328609389, "grad_norm": 0.2522001266479492, "learning_rate": 1e-05, "loss": 0.9506, "step": 23105 }, { "epoch": 20.469441984056687, "grad_norm": 0.26703497767448425, "learning_rate": 1e-05, "loss": 0.9875, "step": 23110 }, { "epoch": 20.473870682019488, "grad_norm": 0.2748298645019531, "learning_rate": 1e-05, "loss": 0.9843, "step": 23115 }, { "epoch": 20.478299379982285, "grad_norm": 0.281241238117218, "learning_rate": 1e-05, "loss": 0.9629, "step": 23120 }, { "epoch": 20.482728077945083, "grad_norm": 0.2569335997104645, "learning_rate": 1e-05, "loss": 1.0488, "step": 23125 }, { "epoch": 20.487156775907884, "grad_norm": 0.26251575350761414, "learning_rate": 1e-05, "loss": 0.9891, "step": 23130 }, { "epoch": 20.491585473870682, "grad_norm": 0.27247336506843567, "learning_rate": 1e-05, "loss": 1.0053, "step": 23135 }, { "epoch": 20.49601417183348, "grad_norm": 0.23398526012897491, "learning_rate": 1e-05, "loss": 0.9984, "step": 23140 }, { "epoch": 20.50044286979628, "grad_norm": 0.258674293756485, "learning_rate": 1e-05, "loss": 0.9803, "step": 23145 }, { "epoch": 20.50487156775908, "grad_norm": 0.284990131855011, "learning_rate": 1e-05, "loss": 1.019, "step": 23150 }, { "epoch": 20.509300265721876, "grad_norm": 0.3295852541923523, "learning_rate": 1e-05, "loss": 0.9995, "step": 23155 }, { "epoch": 20.513728963684677, "grad_norm": 0.2717971205711365, "learning_rate": 1e-05, "loss": 0.9544, "step": 23160 }, { "epoch": 20.518157661647475, "grad_norm": 0.25128209590911865, "learning_rate": 1e-05, "loss": 0.9822, "step": 23165 }, { "epoch": 20.522586359610276, "grad_norm": 0.2315579354763031, "learning_rate": 1e-05, "loss": 0.9894, "step": 23170 }, { "epoch": 20.527015057573074, "grad_norm": 0.24004024267196655, "learning_rate": 1e-05, "loss": 0.9549, "step": 23175 }, { "epoch": 20.53144375553587, "grad_norm": 0.25360557436943054, "learning_rate": 1e-05, "loss": 0.9983, "step": 23180 }, { "epoch": 20.535872453498673, "grad_norm": 0.2642561197280884, "learning_rate": 1e-05, "loss": 0.9865, "step": 23185 }, { "epoch": 20.54030115146147, "grad_norm": 0.32052522897720337, "learning_rate": 1e-05, "loss": 0.9868, "step": 23190 }, { "epoch": 20.544729849424268, "grad_norm": 0.2145920842885971, "learning_rate": 1e-05, "loss": 0.9778, "step": 23195 }, { "epoch": 20.54915854738707, "grad_norm": 0.288447767496109, "learning_rate": 1e-05, "loss": 1.0422, "step": 23200 }, { "epoch": 20.553587245349867, "grad_norm": 0.26760920882225037, "learning_rate": 1e-05, "loss": 1.007, "step": 23205 }, { "epoch": 20.558015943312665, "grad_norm": 0.25050586462020874, "learning_rate": 1e-05, "loss": 1.0012, "step": 23210 }, { "epoch": 20.562444641275466, "grad_norm": 0.293376624584198, "learning_rate": 1e-05, "loss": 1.0505, "step": 23215 }, { "epoch": 20.566873339238263, "grad_norm": 0.2831237018108368, "learning_rate": 1e-05, "loss": 0.9408, "step": 23220 }, { "epoch": 20.571302037201065, "grad_norm": 0.2854495048522949, "learning_rate": 1e-05, "loss": 0.9486, "step": 23225 }, { "epoch": 20.575730735163862, "grad_norm": 0.23675929009914398, "learning_rate": 1e-05, "loss": 0.954, "step": 23230 }, { "epoch": 20.58015943312666, "grad_norm": 0.2307220697402954, "learning_rate": 1e-05, "loss": 1.009, "step": 23235 }, { "epoch": 20.58458813108946, "grad_norm": 0.258803129196167, "learning_rate": 1e-05, "loss": 1.0341, "step": 23240 }, { "epoch": 20.58901682905226, "grad_norm": 0.35433143377304077, "learning_rate": 1e-05, "loss": 0.9433, "step": 23245 }, { "epoch": 20.593445527015056, "grad_norm": 0.2867363691329956, "learning_rate": 1e-05, "loss": 1.0272, "step": 23250 }, { "epoch": 20.597874224977858, "grad_norm": 0.31190216541290283, "learning_rate": 1e-05, "loss": 1.0128, "step": 23255 }, { "epoch": 20.602302922940655, "grad_norm": 0.25837382674217224, "learning_rate": 1e-05, "loss": 0.9888, "step": 23260 }, { "epoch": 20.606731620903453, "grad_norm": 0.29861852526664734, "learning_rate": 1e-05, "loss": 0.9784, "step": 23265 }, { "epoch": 20.611160318866254, "grad_norm": 0.2336566150188446, "learning_rate": 1e-05, "loss": 0.9636, "step": 23270 }, { "epoch": 20.615589016829052, "grad_norm": 0.319244384765625, "learning_rate": 1e-05, "loss": 0.9125, "step": 23275 }, { "epoch": 20.62001771479185, "grad_norm": 0.2418716847896576, "learning_rate": 1e-05, "loss": 1.0151, "step": 23280 }, { "epoch": 20.62444641275465, "grad_norm": 0.2546244263648987, "learning_rate": 1e-05, "loss": 0.958, "step": 23285 }, { "epoch": 20.62887511071745, "grad_norm": 0.22904939949512482, "learning_rate": 1e-05, "loss": 0.971, "step": 23290 }, { "epoch": 20.63330380868025, "grad_norm": 0.28012776374816895, "learning_rate": 1e-05, "loss": 1.0341, "step": 23295 }, { "epoch": 20.637732506643047, "grad_norm": 0.30803826451301575, "learning_rate": 1e-05, "loss": 0.9577, "step": 23300 }, { "epoch": 20.642161204605845, "grad_norm": 0.3214375078678131, "learning_rate": 1e-05, "loss": 0.944, "step": 23305 }, { "epoch": 20.646589902568646, "grad_norm": 0.2672041356563568, "learning_rate": 1e-05, "loss": 1.017, "step": 23310 }, { "epoch": 20.651018600531444, "grad_norm": 0.23594099283218384, "learning_rate": 1e-05, "loss": 0.9777, "step": 23315 }, { "epoch": 20.65544729849424, "grad_norm": 0.22315728664398193, "learning_rate": 1e-05, "loss": 0.9788, "step": 23320 }, { "epoch": 20.659875996457043, "grad_norm": 0.25356242060661316, "learning_rate": 1e-05, "loss": 0.975, "step": 23325 }, { "epoch": 20.66430469441984, "grad_norm": 0.3012852966785431, "learning_rate": 1e-05, "loss": 1.0357, "step": 23330 }, { "epoch": 20.668733392382638, "grad_norm": 0.21883249282836914, "learning_rate": 1e-05, "loss": 0.9616, "step": 23335 }, { "epoch": 20.67316209034544, "grad_norm": 0.21413317322731018, "learning_rate": 1e-05, "loss": 0.9807, "step": 23340 }, { "epoch": 20.677590788308237, "grad_norm": 0.2563398778438568, "learning_rate": 1e-05, "loss": 0.9865, "step": 23345 }, { "epoch": 20.682019486271038, "grad_norm": 0.2521705627441406, "learning_rate": 1e-05, "loss": 1.0251, "step": 23350 }, { "epoch": 20.686448184233836, "grad_norm": 0.2602318227291107, "learning_rate": 1e-05, "loss": 1.0174, "step": 23355 }, { "epoch": 20.690876882196633, "grad_norm": 0.21690435707569122, "learning_rate": 1e-05, "loss": 1.0279, "step": 23360 }, { "epoch": 20.695305580159435, "grad_norm": 0.2532104253768921, "learning_rate": 1e-05, "loss": 1.0167, "step": 23365 }, { "epoch": 20.699734278122232, "grad_norm": 0.23877738416194916, "learning_rate": 1e-05, "loss": 0.994, "step": 23370 }, { "epoch": 20.70416297608503, "grad_norm": 0.28318607807159424, "learning_rate": 1e-05, "loss": 0.9917, "step": 23375 }, { "epoch": 20.70859167404783, "grad_norm": 0.2561405301094055, "learning_rate": 1e-05, "loss": 1.0329, "step": 23380 }, { "epoch": 20.71302037201063, "grad_norm": 0.24713988602161407, "learning_rate": 1e-05, "loss": 1.0029, "step": 23385 }, { "epoch": 20.717449069973426, "grad_norm": 0.25251656770706177, "learning_rate": 1e-05, "loss": 0.9769, "step": 23390 }, { "epoch": 20.721877767936228, "grad_norm": 0.24469034373760223, "learning_rate": 1e-05, "loss": 1.0154, "step": 23395 }, { "epoch": 20.726306465899025, "grad_norm": 0.2381730079650879, "learning_rate": 1e-05, "loss": 0.9847, "step": 23400 }, { "epoch": 20.730735163861823, "grad_norm": 0.24882963299751282, "learning_rate": 1e-05, "loss": 0.945, "step": 23405 }, { "epoch": 20.735163861824624, "grad_norm": 0.2904219329357147, "learning_rate": 1e-05, "loss": 0.9651, "step": 23410 }, { "epoch": 20.739592559787422, "grad_norm": 0.2203953117132187, "learning_rate": 1e-05, "loss": 0.9798, "step": 23415 }, { "epoch": 20.744021257750223, "grad_norm": 0.2897529602050781, "learning_rate": 1e-05, "loss": 0.9843, "step": 23420 }, { "epoch": 20.74844995571302, "grad_norm": 0.25622791051864624, "learning_rate": 1e-05, "loss": 1.0129, "step": 23425 }, { "epoch": 20.75287865367582, "grad_norm": 0.3021980822086334, "learning_rate": 1e-05, "loss": 0.9735, "step": 23430 }, { "epoch": 20.75730735163862, "grad_norm": 0.27540022134780884, "learning_rate": 1e-05, "loss": 1.0412, "step": 23435 }, { "epoch": 20.761736049601417, "grad_norm": 0.26078498363494873, "learning_rate": 1e-05, "loss": 0.9296, "step": 23440 }, { "epoch": 20.766164747564215, "grad_norm": 0.2253457009792328, "learning_rate": 1e-05, "loss": 1.0313, "step": 23445 }, { "epoch": 20.770593445527016, "grad_norm": 0.24515530467033386, "learning_rate": 1e-05, "loss": 0.9767, "step": 23450 }, { "epoch": 20.775022143489814, "grad_norm": 0.28406015038490295, "learning_rate": 1e-05, "loss": 0.9705, "step": 23455 }, { "epoch": 20.77945084145261, "grad_norm": 0.2737155854701996, "learning_rate": 1e-05, "loss": 1.0205, "step": 23460 }, { "epoch": 20.783879539415413, "grad_norm": 0.2011604607105255, "learning_rate": 1e-05, "loss": 0.9669, "step": 23465 }, { "epoch": 20.78830823737821, "grad_norm": 0.22934001684188843, "learning_rate": 1e-05, "loss": 0.9615, "step": 23470 }, { "epoch": 20.79273693534101, "grad_norm": 0.26330819725990295, "learning_rate": 1e-05, "loss": 1.0005, "step": 23475 }, { "epoch": 20.79716563330381, "grad_norm": 0.29069122672080994, "learning_rate": 1e-05, "loss": 1.0148, "step": 23480 }, { "epoch": 20.801594331266607, "grad_norm": 0.2525894045829773, "learning_rate": 1e-05, "loss": 0.9661, "step": 23485 }, { "epoch": 20.806023029229408, "grad_norm": 0.23904164135456085, "learning_rate": 1e-05, "loss": 1.0183, "step": 23490 }, { "epoch": 20.810451727192206, "grad_norm": 0.2577880024909973, "learning_rate": 1e-05, "loss": 1.0053, "step": 23495 }, { "epoch": 20.814880425155003, "grad_norm": 0.2428380846977234, "learning_rate": 1e-05, "loss": 1.0012, "step": 23500 }, { "epoch": 20.819309123117804, "grad_norm": 0.240816131234169, "learning_rate": 1e-05, "loss": 0.9365, "step": 23505 }, { "epoch": 20.823737821080602, "grad_norm": 0.21072085201740265, "learning_rate": 1e-05, "loss": 1.0543, "step": 23510 }, { "epoch": 20.8281665190434, "grad_norm": 0.23351332545280457, "learning_rate": 1e-05, "loss": 1.0621, "step": 23515 }, { "epoch": 20.8325952170062, "grad_norm": 0.27252712845802307, "learning_rate": 1e-05, "loss": 0.9677, "step": 23520 }, { "epoch": 20.837023914969, "grad_norm": 0.2045300453901291, "learning_rate": 1e-05, "loss": 1.0194, "step": 23525 }, { "epoch": 20.841452612931796, "grad_norm": 0.2848270833492279, "learning_rate": 1e-05, "loss": 0.9684, "step": 23530 }, { "epoch": 20.845881310894598, "grad_norm": 0.2638493478298187, "learning_rate": 1e-05, "loss": 1.0031, "step": 23535 }, { "epoch": 20.850310008857395, "grad_norm": 0.22406628727912903, "learning_rate": 1e-05, "loss": 1.0184, "step": 23540 }, { "epoch": 20.854738706820196, "grad_norm": 0.29318952560424805, "learning_rate": 1e-05, "loss": 0.9503, "step": 23545 }, { "epoch": 20.859167404782994, "grad_norm": 0.27850982546806335, "learning_rate": 1e-05, "loss": 1.0382, "step": 23550 }, { "epoch": 20.86359610274579, "grad_norm": 0.2463536560535431, "learning_rate": 1e-05, "loss": 1.0051, "step": 23555 }, { "epoch": 20.868024800708593, "grad_norm": 0.2756263017654419, "learning_rate": 1e-05, "loss": 0.9815, "step": 23560 }, { "epoch": 20.87245349867139, "grad_norm": 0.24165046215057373, "learning_rate": 1e-05, "loss": 1.0121, "step": 23565 }, { "epoch": 20.876882196634188, "grad_norm": 0.22937564551830292, "learning_rate": 1e-05, "loss": 1.0296, "step": 23570 }, { "epoch": 20.88131089459699, "grad_norm": 0.23448596894741058, "learning_rate": 1e-05, "loss": 1.0403, "step": 23575 }, { "epoch": 20.885739592559787, "grad_norm": 0.24200119078159332, "learning_rate": 1e-05, "loss": 0.9782, "step": 23580 }, { "epoch": 20.890168290522585, "grad_norm": 0.25208789110183716, "learning_rate": 1e-05, "loss": 1.0246, "step": 23585 }, { "epoch": 20.894596988485386, "grad_norm": 0.2373015284538269, "learning_rate": 1e-05, "loss": 1.0111, "step": 23590 }, { "epoch": 20.899025686448184, "grad_norm": 0.29320967197418213, "learning_rate": 1e-05, "loss": 1.0034, "step": 23595 }, { "epoch": 20.903454384410985, "grad_norm": 0.2730688750743866, "learning_rate": 1e-05, "loss": 1.0144, "step": 23600 }, { "epoch": 20.907883082373782, "grad_norm": 0.28743427991867065, "learning_rate": 1e-05, "loss": 0.9581, "step": 23605 }, { "epoch": 20.91231178033658, "grad_norm": 0.29893437027931213, "learning_rate": 1e-05, "loss": 0.9892, "step": 23610 }, { "epoch": 20.91674047829938, "grad_norm": 0.23643238842487335, "learning_rate": 1e-05, "loss": 0.9967, "step": 23615 }, { "epoch": 20.92116917626218, "grad_norm": 0.24388255178928375, "learning_rate": 1e-05, "loss": 0.9569, "step": 23620 }, { "epoch": 20.925597874224977, "grad_norm": 0.2582964599132538, "learning_rate": 1e-05, "loss": 1.0038, "step": 23625 }, { "epoch": 20.930026572187778, "grad_norm": 0.27074819803237915, "learning_rate": 1e-05, "loss": 0.9672, "step": 23630 }, { "epoch": 20.934455270150576, "grad_norm": 0.25435471534729004, "learning_rate": 1e-05, "loss": 0.9956, "step": 23635 }, { "epoch": 20.938883968113373, "grad_norm": 0.22766220569610596, "learning_rate": 1e-05, "loss": 0.9509, "step": 23640 }, { "epoch": 20.943312666076174, "grad_norm": 0.2583090662956238, "learning_rate": 1e-05, "loss": 1.0062, "step": 23645 }, { "epoch": 20.947741364038972, "grad_norm": 0.2744172513484955, "learning_rate": 1e-05, "loss": 0.9811, "step": 23650 }, { "epoch": 20.95217006200177, "grad_norm": 0.22137551009655, "learning_rate": 1e-05, "loss": 0.9831, "step": 23655 }, { "epoch": 20.95659875996457, "grad_norm": 0.2544724941253662, "learning_rate": 1e-05, "loss": 1.031, "step": 23660 }, { "epoch": 20.96102745792737, "grad_norm": 0.2587026059627533, "learning_rate": 1e-05, "loss": 0.9711, "step": 23665 }, { "epoch": 20.96545615589017, "grad_norm": 0.24419590830802917, "learning_rate": 1e-05, "loss": 1.0277, "step": 23670 }, { "epoch": 20.969884853852967, "grad_norm": 0.2535645663738251, "learning_rate": 1e-05, "loss": 1.0201, "step": 23675 }, { "epoch": 20.974313551815765, "grad_norm": 0.26606446504592896, "learning_rate": 1e-05, "loss": 0.9882, "step": 23680 }, { "epoch": 20.978742249778566, "grad_norm": 0.2687552571296692, "learning_rate": 1e-05, "loss": 0.9458, "step": 23685 }, { "epoch": 20.983170947741364, "grad_norm": 0.288351833820343, "learning_rate": 1e-05, "loss": 0.9977, "step": 23690 }, { "epoch": 20.98759964570416, "grad_norm": 0.2936451733112335, "learning_rate": 1e-05, "loss": 0.9947, "step": 23695 }, { "epoch": 20.992028343666963, "grad_norm": 0.2874999940395355, "learning_rate": 1e-05, "loss": 0.9921, "step": 23700 }, { "epoch": 20.99645704162976, "grad_norm": 0.2770979702472687, "learning_rate": 1e-05, "loss": 1.0412, "step": 23705 }, { "epoch": 21.000885739592558, "grad_norm": 0.25790080428123474, "learning_rate": 1e-05, "loss": 0.9835, "step": 23710 }, { "epoch": 21.00531443755536, "grad_norm": 0.1928500235080719, "learning_rate": 1e-05, "loss": 1.0185, "step": 23715 }, { "epoch": 21.009743135518157, "grad_norm": 0.2296430766582489, "learning_rate": 1e-05, "loss": 0.9675, "step": 23720 }, { "epoch": 21.014171833480958, "grad_norm": 0.2535626292228699, "learning_rate": 1e-05, "loss": 0.9818, "step": 23725 }, { "epoch": 21.018600531443756, "grad_norm": 0.22882448136806488, "learning_rate": 1e-05, "loss": 0.9481, "step": 23730 }, { "epoch": 21.023029229406553, "grad_norm": 0.24649257957935333, "learning_rate": 1e-05, "loss": 0.9639, "step": 23735 }, { "epoch": 21.027457927369355, "grad_norm": 0.21738092601299286, "learning_rate": 1e-05, "loss": 0.988, "step": 23740 }, { "epoch": 21.031886625332152, "grad_norm": 0.25209927558898926, "learning_rate": 1e-05, "loss": 1.0515, "step": 23745 }, { "epoch": 21.03631532329495, "grad_norm": 0.20074862241744995, "learning_rate": 1e-05, "loss": 1.0144, "step": 23750 }, { "epoch": 21.04074402125775, "grad_norm": 0.21273909509181976, "learning_rate": 1e-05, "loss": 0.9632, "step": 23755 }, { "epoch": 21.04517271922055, "grad_norm": 0.22330664098262787, "learning_rate": 1e-05, "loss": 0.9517, "step": 23760 }, { "epoch": 21.049601417183347, "grad_norm": 0.28758561611175537, "learning_rate": 1e-05, "loss": 0.991, "step": 23765 }, { "epoch": 21.054030115146148, "grad_norm": 0.22504737973213196, "learning_rate": 1e-05, "loss": 0.9997, "step": 23770 }, { "epoch": 21.058458813108945, "grad_norm": 0.23204205930233002, "learning_rate": 1e-05, "loss": 1.0172, "step": 23775 }, { "epoch": 21.062887511071747, "grad_norm": 0.26115328073501587, "learning_rate": 1e-05, "loss": 1.0144, "step": 23780 }, { "epoch": 21.067316209034544, "grad_norm": 0.2827441990375519, "learning_rate": 1e-05, "loss": 0.9727, "step": 23785 }, { "epoch": 21.071744906997342, "grad_norm": 0.2512715756893158, "learning_rate": 1e-05, "loss": 0.9752, "step": 23790 }, { "epoch": 21.076173604960143, "grad_norm": 0.32470351457595825, "learning_rate": 1e-05, "loss": 1.0079, "step": 23795 }, { "epoch": 21.08060230292294, "grad_norm": 0.2564871311187744, "learning_rate": 1e-05, "loss": 0.9691, "step": 23800 }, { "epoch": 21.08503100088574, "grad_norm": 0.3475436866283417, "learning_rate": 1e-05, "loss": 0.9786, "step": 23805 }, { "epoch": 21.08945969884854, "grad_norm": 0.24595697224140167, "learning_rate": 1e-05, "loss": 0.989, "step": 23810 }, { "epoch": 21.093888396811337, "grad_norm": 0.20949521660804749, "learning_rate": 1e-05, "loss": 0.9732, "step": 23815 }, { "epoch": 21.098317094774135, "grad_norm": 0.22952190041542053, "learning_rate": 1e-05, "loss": 0.9903, "step": 23820 }, { "epoch": 21.102745792736936, "grad_norm": 0.2702259421348572, "learning_rate": 1e-05, "loss": 0.9791, "step": 23825 }, { "epoch": 21.107174490699734, "grad_norm": 0.23519152402877808, "learning_rate": 1e-05, "loss": 1.0205, "step": 23830 }, { "epoch": 21.11160318866253, "grad_norm": 0.2529354989528656, "learning_rate": 1e-05, "loss": 1.0302, "step": 23835 }, { "epoch": 21.116031886625333, "grad_norm": 0.2953972816467285, "learning_rate": 1e-05, "loss": 0.9577, "step": 23840 }, { "epoch": 21.12046058458813, "grad_norm": 0.21525080502033234, "learning_rate": 1e-05, "loss": 1.0072, "step": 23845 }, { "epoch": 21.12488928255093, "grad_norm": 0.23865020275115967, "learning_rate": 1e-05, "loss": 1.012, "step": 23850 }, { "epoch": 21.12931798051373, "grad_norm": 0.23286272585391998, "learning_rate": 1e-05, "loss": 1.0037, "step": 23855 }, { "epoch": 21.133746678476527, "grad_norm": 0.29467570781707764, "learning_rate": 1e-05, "loss": 1.0257, "step": 23860 }, { "epoch": 21.138175376439328, "grad_norm": 0.2763665020465851, "learning_rate": 1e-05, "loss": 1.0163, "step": 23865 }, { "epoch": 21.142604074402126, "grad_norm": 0.22636842727661133, "learning_rate": 1e-05, "loss": 0.9718, "step": 23870 }, { "epoch": 21.147032772364923, "grad_norm": 0.3360186219215393, "learning_rate": 1e-05, "loss": 1.0069, "step": 23875 }, { "epoch": 21.151461470327725, "grad_norm": 0.2123258411884308, "learning_rate": 1e-05, "loss": 1.006, "step": 23880 }, { "epoch": 21.155890168290522, "grad_norm": 0.26806992292404175, "learning_rate": 1e-05, "loss": 0.9962, "step": 23885 }, { "epoch": 21.16031886625332, "grad_norm": 0.24717672169208527, "learning_rate": 1e-05, "loss": 0.9985, "step": 23890 }, { "epoch": 21.16474756421612, "grad_norm": 0.2531307637691498, "learning_rate": 1e-05, "loss": 0.9558, "step": 23895 }, { "epoch": 21.16917626217892, "grad_norm": 0.21396209299564362, "learning_rate": 1e-05, "loss": 0.9772, "step": 23900 }, { "epoch": 21.17360496014172, "grad_norm": 0.26138243079185486, "learning_rate": 1e-05, "loss": 0.9774, "step": 23905 }, { "epoch": 21.178033658104518, "grad_norm": 0.317848801612854, "learning_rate": 1e-05, "loss": 1.0146, "step": 23910 }, { "epoch": 21.182462356067315, "grad_norm": 0.25748392939567566, "learning_rate": 1e-05, "loss": 1.0039, "step": 23915 }, { "epoch": 21.186891054030117, "grad_norm": 0.23696640133857727, "learning_rate": 1e-05, "loss": 0.9485, "step": 23920 }, { "epoch": 21.191319751992914, "grad_norm": 0.2723008692264557, "learning_rate": 1e-05, "loss": 0.9693, "step": 23925 }, { "epoch": 21.195748449955712, "grad_norm": 0.245529443025589, "learning_rate": 1e-05, "loss": 0.9844, "step": 23930 }, { "epoch": 21.200177147918513, "grad_norm": 0.3217242360115051, "learning_rate": 1e-05, "loss": 0.9863, "step": 23935 }, { "epoch": 21.20460584588131, "grad_norm": 0.24378260970115662, "learning_rate": 1e-05, "loss": 0.9935, "step": 23940 }, { "epoch": 21.20903454384411, "grad_norm": 0.2365870177745819, "learning_rate": 1e-05, "loss": 0.9627, "step": 23945 }, { "epoch": 21.21346324180691, "grad_norm": 0.2485848218202591, "learning_rate": 1e-05, "loss": 1.034, "step": 23950 }, { "epoch": 21.217891939769707, "grad_norm": 0.331729918718338, "learning_rate": 1e-05, "loss": 1.0576, "step": 23955 }, { "epoch": 21.22232063773251, "grad_norm": 0.3377779424190521, "learning_rate": 1e-05, "loss": 0.9583, "step": 23960 }, { "epoch": 21.226749335695306, "grad_norm": 0.22519612312316895, "learning_rate": 1e-05, "loss": 1.0476, "step": 23965 }, { "epoch": 21.231178033658104, "grad_norm": 0.2346481829881668, "learning_rate": 1e-05, "loss": 0.9299, "step": 23970 }, { "epoch": 21.235606731620905, "grad_norm": 0.20597229897975922, "learning_rate": 1e-05, "loss": 0.9706, "step": 23975 }, { "epoch": 21.240035429583703, "grad_norm": 0.24908660352230072, "learning_rate": 1e-05, "loss": 1.0221, "step": 23980 }, { "epoch": 21.2444641275465, "grad_norm": 0.23830406367778778, "learning_rate": 1e-05, "loss": 1.0037, "step": 23985 }, { "epoch": 21.2488928255093, "grad_norm": 0.32512572407722473, "learning_rate": 1e-05, "loss": 0.9901, "step": 23990 }, { "epoch": 21.2533215234721, "grad_norm": 0.26207512617111206, "learning_rate": 1e-05, "loss": 0.9825, "step": 23995 }, { "epoch": 21.257750221434897, "grad_norm": 0.2201402187347412, "learning_rate": 1e-05, "loss": 0.953, "step": 24000 }, { "epoch": 21.262178919397698, "grad_norm": 0.23515959084033966, "learning_rate": 1e-05, "loss": 0.9963, "step": 24005 }, { "epoch": 21.266607617360496, "grad_norm": 0.25755035877227783, "learning_rate": 1e-05, "loss": 1.007, "step": 24010 }, { "epoch": 21.271036315323293, "grad_norm": 0.23354493081569672, "learning_rate": 1e-05, "loss": 1.02, "step": 24015 }, { "epoch": 21.275465013286095, "grad_norm": 0.2569540739059448, "learning_rate": 1e-05, "loss": 0.9833, "step": 24020 }, { "epoch": 21.279893711248892, "grad_norm": 0.24763429164886475, "learning_rate": 1e-05, "loss": 1.0013, "step": 24025 }, { "epoch": 21.284322409211693, "grad_norm": 0.23891708254814148, "learning_rate": 1e-05, "loss": 1.0243, "step": 24030 }, { "epoch": 21.28875110717449, "grad_norm": 0.24977341294288635, "learning_rate": 1e-05, "loss": 0.9519, "step": 24035 }, { "epoch": 21.29317980513729, "grad_norm": 0.25577160716056824, "learning_rate": 1e-05, "loss": 0.96, "step": 24040 }, { "epoch": 21.29760850310009, "grad_norm": 0.2855280935764313, "learning_rate": 1e-05, "loss": 0.9774, "step": 24045 }, { "epoch": 21.302037201062888, "grad_norm": 0.27523383498191833, "learning_rate": 1e-05, "loss": 0.996, "step": 24050 }, { "epoch": 21.306465899025685, "grad_norm": 0.23196375370025635, "learning_rate": 1e-05, "loss": 0.9894, "step": 24055 }, { "epoch": 21.310894596988486, "grad_norm": 0.2422071099281311, "learning_rate": 1e-05, "loss": 0.9985, "step": 24060 }, { "epoch": 21.315323294951284, "grad_norm": 0.23991167545318604, "learning_rate": 1e-05, "loss": 0.9976, "step": 24065 }, { "epoch": 21.31975199291408, "grad_norm": 0.23510125279426575, "learning_rate": 1e-05, "loss": 1.0097, "step": 24070 }, { "epoch": 21.324180690876883, "grad_norm": 0.212108314037323, "learning_rate": 1e-05, "loss": 1.0092, "step": 24075 }, { "epoch": 21.32860938883968, "grad_norm": 0.23386695981025696, "learning_rate": 1e-05, "loss": 1.0036, "step": 24080 }, { "epoch": 21.333038086802482, "grad_norm": 0.25324028730392456, "learning_rate": 1e-05, "loss": 0.9399, "step": 24085 }, { "epoch": 21.33746678476528, "grad_norm": 0.2725676894187927, "learning_rate": 1e-05, "loss": 0.9828, "step": 24090 }, { "epoch": 21.341895482728077, "grad_norm": 0.2590475380420685, "learning_rate": 1e-05, "loss": 0.9773, "step": 24095 }, { "epoch": 21.34632418069088, "grad_norm": 0.26223626732826233, "learning_rate": 1e-05, "loss": 1.0532, "step": 24100 }, { "epoch": 21.350752878653676, "grad_norm": 0.23049882054328918, "learning_rate": 1e-05, "loss": 0.9908, "step": 24105 }, { "epoch": 21.355181576616474, "grad_norm": 0.264771431684494, "learning_rate": 1e-05, "loss": 1.0395, "step": 24110 }, { "epoch": 21.359610274579275, "grad_norm": 0.22449934482574463, "learning_rate": 1e-05, "loss": 1.0097, "step": 24115 }, { "epoch": 21.364038972542073, "grad_norm": 0.24789178371429443, "learning_rate": 1e-05, "loss": 1.0016, "step": 24120 }, { "epoch": 21.36846767050487, "grad_norm": 0.2665213346481323, "learning_rate": 1e-05, "loss": 0.9753, "step": 24125 }, { "epoch": 21.37289636846767, "grad_norm": 0.2734752297401428, "learning_rate": 1e-05, "loss": 1.0048, "step": 24130 }, { "epoch": 21.37732506643047, "grad_norm": 0.2670280933380127, "learning_rate": 1e-05, "loss": 1.0409, "step": 24135 }, { "epoch": 21.381753764393267, "grad_norm": 0.23711976408958435, "learning_rate": 1e-05, "loss": 0.9976, "step": 24140 }, { "epoch": 21.386182462356068, "grad_norm": 0.23111297190189362, "learning_rate": 1e-05, "loss": 1.0127, "step": 24145 }, { "epoch": 21.390611160318866, "grad_norm": 0.2747046947479248, "learning_rate": 1e-05, "loss": 0.9683, "step": 24150 }, { "epoch": 21.395039858281667, "grad_norm": 0.24125155806541443, "learning_rate": 1e-05, "loss": 1.0083, "step": 24155 }, { "epoch": 21.399468556244464, "grad_norm": 0.25119295716285706, "learning_rate": 1e-05, "loss": 0.9261, "step": 24160 }, { "epoch": 21.403897254207262, "grad_norm": 0.23565930128097534, "learning_rate": 1e-05, "loss": 1.0014, "step": 24165 }, { "epoch": 21.408325952170063, "grad_norm": 0.23585757613182068, "learning_rate": 1e-05, "loss": 1.0026, "step": 24170 }, { "epoch": 21.41275465013286, "grad_norm": 0.23910439014434814, "learning_rate": 1e-05, "loss": 0.9455, "step": 24175 }, { "epoch": 21.41718334809566, "grad_norm": 0.233554869890213, "learning_rate": 1e-05, "loss": 1.015, "step": 24180 }, { "epoch": 21.42161204605846, "grad_norm": 0.26669591665267944, "learning_rate": 1e-05, "loss": 0.9874, "step": 24185 }, { "epoch": 21.426040744021257, "grad_norm": 0.2674793601036072, "learning_rate": 1e-05, "loss": 1.0051, "step": 24190 }, { "epoch": 21.430469441984055, "grad_norm": 0.239483043551445, "learning_rate": 1e-05, "loss": 1.0398, "step": 24195 }, { "epoch": 21.434898139946856, "grad_norm": 0.2558232545852661, "learning_rate": 1e-05, "loss": 0.9792, "step": 24200 }, { "epoch": 21.439326837909654, "grad_norm": 0.262599378824234, "learning_rate": 1e-05, "loss": 1.0268, "step": 24205 }, { "epoch": 21.443755535872455, "grad_norm": 0.2378520518541336, "learning_rate": 1e-05, "loss": 0.9547, "step": 24210 }, { "epoch": 21.448184233835253, "grad_norm": 0.35304948687553406, "learning_rate": 1e-05, "loss": 1.005, "step": 24215 }, { "epoch": 21.45261293179805, "grad_norm": 0.25574415922164917, "learning_rate": 1e-05, "loss": 0.9455, "step": 24220 }, { "epoch": 21.45704162976085, "grad_norm": 0.3049648404121399, "learning_rate": 1e-05, "loss": 0.9867, "step": 24225 }, { "epoch": 21.46147032772365, "grad_norm": 0.220179945230484, "learning_rate": 1e-05, "loss": 1.0064, "step": 24230 }, { "epoch": 21.465899025686447, "grad_norm": 0.22716927528381348, "learning_rate": 1e-05, "loss": 0.9408, "step": 24235 }, { "epoch": 21.47032772364925, "grad_norm": 0.25995850563049316, "learning_rate": 1e-05, "loss": 1.0297, "step": 24240 }, { "epoch": 21.474756421612046, "grad_norm": 0.22999300062656403, "learning_rate": 1e-05, "loss": 1.0066, "step": 24245 }, { "epoch": 21.479185119574844, "grad_norm": 0.21122734248638153, "learning_rate": 1e-05, "loss": 1.0044, "step": 24250 }, { "epoch": 21.483613817537645, "grad_norm": 0.197481170296669, "learning_rate": 1e-05, "loss": 0.9494, "step": 24255 }, { "epoch": 21.488042515500442, "grad_norm": 0.22244811058044434, "learning_rate": 1e-05, "loss": 0.9825, "step": 24260 }, { "epoch": 21.49247121346324, "grad_norm": 0.22211964428424835, "learning_rate": 1e-05, "loss": 1.0269, "step": 24265 }, { "epoch": 21.49689991142604, "grad_norm": 0.28239572048187256, "learning_rate": 1e-05, "loss": 0.9876, "step": 24270 }, { "epoch": 21.50132860938884, "grad_norm": 0.21171702444553375, "learning_rate": 1e-05, "loss": 0.9749, "step": 24275 }, { "epoch": 21.50575730735164, "grad_norm": 0.2408788949251175, "learning_rate": 1e-05, "loss": 1.0122, "step": 24280 }, { "epoch": 21.510186005314438, "grad_norm": 0.23125869035720825, "learning_rate": 1e-05, "loss": 0.9517, "step": 24285 }, { "epoch": 21.514614703277235, "grad_norm": 0.2452387511730194, "learning_rate": 1e-05, "loss": 0.9725, "step": 24290 }, { "epoch": 21.519043401240037, "grad_norm": 0.224269837141037, "learning_rate": 1e-05, "loss": 1.0303, "step": 24295 }, { "epoch": 21.523472099202834, "grad_norm": 0.28405871987342834, "learning_rate": 1e-05, "loss": 1.0023, "step": 24300 }, { "epoch": 21.527900797165632, "grad_norm": 0.2887643575668335, "learning_rate": 1e-05, "loss": 0.9883, "step": 24305 }, { "epoch": 21.532329495128433, "grad_norm": 0.29802432656288147, "learning_rate": 1e-05, "loss": 0.9734, "step": 24310 }, { "epoch": 21.53675819309123, "grad_norm": 0.23205694556236267, "learning_rate": 1e-05, "loss": 0.9736, "step": 24315 }, { "epoch": 21.54118689105403, "grad_norm": 0.28083962202072144, "learning_rate": 1e-05, "loss": 0.9776, "step": 24320 }, { "epoch": 21.54561558901683, "grad_norm": 0.3045998811721802, "learning_rate": 1e-05, "loss": 0.9808, "step": 24325 }, { "epoch": 21.550044286979627, "grad_norm": 0.23261308670043945, "learning_rate": 1e-05, "loss": 0.972, "step": 24330 }, { "epoch": 21.55447298494243, "grad_norm": 0.27049073576927185, "learning_rate": 1e-05, "loss": 0.9738, "step": 24335 }, { "epoch": 21.558901682905226, "grad_norm": 0.2971896231174469, "learning_rate": 1e-05, "loss": 0.964, "step": 24340 }, { "epoch": 21.563330380868024, "grad_norm": 0.2367928922176361, "learning_rate": 1e-05, "loss": 1.0059, "step": 24345 }, { "epoch": 21.567759078830825, "grad_norm": 0.28476718068122864, "learning_rate": 1e-05, "loss": 1.0602, "step": 24350 }, { "epoch": 21.572187776793623, "grad_norm": 0.23783431947231293, "learning_rate": 1e-05, "loss": 0.9918, "step": 24355 }, { "epoch": 21.57661647475642, "grad_norm": 0.3048815131187439, "learning_rate": 1e-05, "loss": 1.0457, "step": 24360 }, { "epoch": 21.58104517271922, "grad_norm": 0.2317344695329666, "learning_rate": 1e-05, "loss": 0.9533, "step": 24365 }, { "epoch": 21.58547387068202, "grad_norm": 0.25966066122055054, "learning_rate": 1e-05, "loss": 1.0365, "step": 24370 }, { "epoch": 21.589902568644817, "grad_norm": 0.23673969507217407, "learning_rate": 1e-05, "loss": 0.9809, "step": 24375 }, { "epoch": 21.594331266607618, "grad_norm": 0.26256829500198364, "learning_rate": 1e-05, "loss": 0.9575, "step": 24380 }, { "epoch": 21.598759964570416, "grad_norm": 0.23464412987232208, "learning_rate": 1e-05, "loss": 0.9819, "step": 24385 }, { "epoch": 21.603188662533213, "grad_norm": 0.26200950145721436, "learning_rate": 1e-05, "loss": 1.0271, "step": 24390 }, { "epoch": 21.607617360496015, "grad_norm": 0.24826548993587494, "learning_rate": 1e-05, "loss": 1.0343, "step": 24395 }, { "epoch": 21.612046058458812, "grad_norm": 0.26936453580856323, "learning_rate": 1e-05, "loss": 0.9522, "step": 24400 }, { "epoch": 21.616474756421614, "grad_norm": 0.3161003887653351, "learning_rate": 1e-05, "loss": 1.0235, "step": 24405 }, { "epoch": 21.62090345438441, "grad_norm": 0.25600945949554443, "learning_rate": 1e-05, "loss": 0.9905, "step": 24410 }, { "epoch": 21.62533215234721, "grad_norm": 0.24475471675395966, "learning_rate": 1e-05, "loss": 0.9828, "step": 24415 }, { "epoch": 21.62976085031001, "grad_norm": 0.24165140092372894, "learning_rate": 1e-05, "loss": 0.9871, "step": 24420 }, { "epoch": 21.634189548272808, "grad_norm": 0.22416894137859344, "learning_rate": 1e-05, "loss": 0.9909, "step": 24425 }, { "epoch": 21.638618246235605, "grad_norm": 0.24341599643230438, "learning_rate": 1e-05, "loss": 1.0191, "step": 24430 }, { "epoch": 21.643046944198407, "grad_norm": 0.21842268109321594, "learning_rate": 1e-05, "loss": 1.0243, "step": 24435 }, { "epoch": 21.647475642161204, "grad_norm": 0.28387102484703064, "learning_rate": 1e-05, "loss": 0.9859, "step": 24440 }, { "epoch": 21.651904340124002, "grad_norm": 0.2577364444732666, "learning_rate": 1e-05, "loss": 0.9385, "step": 24445 }, { "epoch": 21.656333038086803, "grad_norm": 0.30951401591300964, "learning_rate": 1e-05, "loss": 0.9934, "step": 24450 }, { "epoch": 21.6607617360496, "grad_norm": 0.418165385723114, "learning_rate": 1e-05, "loss": 0.9957, "step": 24455 }, { "epoch": 21.665190434012402, "grad_norm": 0.2846079468727112, "learning_rate": 1e-05, "loss": 1.0173, "step": 24460 }, { "epoch": 21.6696191319752, "grad_norm": 0.28966230154037476, "learning_rate": 1e-05, "loss": 1.0712, "step": 24465 }, { "epoch": 21.674047829937997, "grad_norm": 0.2394290566444397, "learning_rate": 1e-05, "loss": 0.9985, "step": 24470 }, { "epoch": 21.6784765279008, "grad_norm": 0.2498738318681717, "learning_rate": 1e-05, "loss": 1.0283, "step": 24475 }, { "epoch": 21.682905225863596, "grad_norm": 0.27644601464271545, "learning_rate": 1e-05, "loss": 0.9629, "step": 24480 }, { "epoch": 21.687333923826394, "grad_norm": 0.2564210295677185, "learning_rate": 1e-05, "loss": 0.9687, "step": 24485 }, { "epoch": 21.691762621789195, "grad_norm": 0.21085625886917114, "learning_rate": 1e-05, "loss": 1.0139, "step": 24490 }, { "epoch": 21.696191319751993, "grad_norm": 0.20350492000579834, "learning_rate": 1e-05, "loss": 0.9642, "step": 24495 }, { "epoch": 21.70062001771479, "grad_norm": 0.2390928566455841, "learning_rate": 1e-05, "loss": 0.9995, "step": 24500 }, { "epoch": 21.70504871567759, "grad_norm": 0.26607394218444824, "learning_rate": 1e-05, "loss": 1.0163, "step": 24505 }, { "epoch": 21.70947741364039, "grad_norm": 0.23229064047336578, "learning_rate": 1e-05, "loss": 0.989, "step": 24510 }, { "epoch": 21.713906111603187, "grad_norm": 0.21496142446994781, "learning_rate": 1e-05, "loss": 1.0238, "step": 24515 }, { "epoch": 21.718334809565988, "grad_norm": 0.24185022711753845, "learning_rate": 1e-05, "loss": 1.0588, "step": 24520 }, { "epoch": 21.722763507528786, "grad_norm": 0.2623401880264282, "learning_rate": 1e-05, "loss": 0.9879, "step": 24525 }, { "epoch": 21.727192205491587, "grad_norm": 0.27695274353027344, "learning_rate": 1e-05, "loss": 0.956, "step": 24530 }, { "epoch": 21.731620903454385, "grad_norm": 0.20083735883235931, "learning_rate": 1e-05, "loss": 1.037, "step": 24535 }, { "epoch": 21.736049601417182, "grad_norm": 0.2114323377609253, "learning_rate": 1e-05, "loss": 1.0218, "step": 24540 }, { "epoch": 21.740478299379983, "grad_norm": 0.2885844111442566, "learning_rate": 1e-05, "loss": 1.0058, "step": 24545 }, { "epoch": 21.74490699734278, "grad_norm": 0.2764827311038971, "learning_rate": 1e-05, "loss": 0.9651, "step": 24550 }, { "epoch": 21.74933569530558, "grad_norm": 0.26501521468162537, "learning_rate": 1e-05, "loss": 0.9537, "step": 24555 }, { "epoch": 21.75376439326838, "grad_norm": 0.25946244597435, "learning_rate": 1e-05, "loss": 0.9621, "step": 24560 }, { "epoch": 21.758193091231178, "grad_norm": 0.2269723266363144, "learning_rate": 1e-05, "loss": 0.9769, "step": 24565 }, { "epoch": 21.762621789193975, "grad_norm": 0.2374456822872162, "learning_rate": 1e-05, "loss": 0.9789, "step": 24570 }, { "epoch": 21.767050487156776, "grad_norm": 0.2643706202507019, "learning_rate": 1e-05, "loss": 1.0058, "step": 24575 }, { "epoch": 21.771479185119574, "grad_norm": 0.2324141561985016, "learning_rate": 1e-05, "loss": 0.989, "step": 24580 }, { "epoch": 21.775907883082375, "grad_norm": 0.23101358115673065, "learning_rate": 1e-05, "loss": 0.985, "step": 24585 }, { "epoch": 21.780336581045173, "grad_norm": 0.2550812065601349, "learning_rate": 1e-05, "loss": 0.997, "step": 24590 }, { "epoch": 21.78476527900797, "grad_norm": 0.3111308217048645, "learning_rate": 1e-05, "loss": 0.9868, "step": 24595 }, { "epoch": 21.789193976970772, "grad_norm": 0.21285420656204224, "learning_rate": 1e-05, "loss": 1.0095, "step": 24600 }, { "epoch": 21.79362267493357, "grad_norm": 0.22681789100170135, "learning_rate": 1e-05, "loss": 1.0133, "step": 24605 }, { "epoch": 21.798051372896367, "grad_norm": 0.2354830652475357, "learning_rate": 1e-05, "loss": 0.9713, "step": 24610 }, { "epoch": 21.80248007085917, "grad_norm": 0.23641620576381683, "learning_rate": 1e-05, "loss": 0.9793, "step": 24615 }, { "epoch": 21.806908768821966, "grad_norm": 0.2156049758195877, "learning_rate": 1e-05, "loss": 1.035, "step": 24620 }, { "epoch": 21.811337466784764, "grad_norm": 0.27553847432136536, "learning_rate": 1e-05, "loss": 0.9116, "step": 24625 }, { "epoch": 21.815766164747565, "grad_norm": 0.23027409613132477, "learning_rate": 1e-05, "loss": 0.9599, "step": 24630 }, { "epoch": 21.820194862710363, "grad_norm": 0.2639000713825226, "learning_rate": 1e-05, "loss": 0.969, "step": 24635 }, { "epoch": 21.824623560673164, "grad_norm": 0.2749571204185486, "learning_rate": 1e-05, "loss": 0.9506, "step": 24640 }, { "epoch": 21.82905225863596, "grad_norm": 0.23388560116291046, "learning_rate": 1e-05, "loss": 0.9789, "step": 24645 }, { "epoch": 21.83348095659876, "grad_norm": 0.266191691160202, "learning_rate": 1e-05, "loss": 0.9912, "step": 24650 }, { "epoch": 21.83790965456156, "grad_norm": 0.23108968138694763, "learning_rate": 1e-05, "loss": 0.9595, "step": 24655 }, { "epoch": 21.842338352524358, "grad_norm": 0.23208607733249664, "learning_rate": 1e-05, "loss": 0.999, "step": 24660 }, { "epoch": 21.846767050487156, "grad_norm": 0.2253040373325348, "learning_rate": 1e-05, "loss": 0.9312, "step": 24665 }, { "epoch": 21.851195748449957, "grad_norm": 0.21282123029232025, "learning_rate": 1e-05, "loss": 0.9986, "step": 24670 }, { "epoch": 21.855624446412754, "grad_norm": 0.26727738976478577, "learning_rate": 1e-05, "loss": 0.9381, "step": 24675 }, { "epoch": 21.860053144375552, "grad_norm": 0.2842693030834198, "learning_rate": 1e-05, "loss": 0.9683, "step": 24680 }, { "epoch": 21.864481842338353, "grad_norm": 0.19972069561481476, "learning_rate": 1e-05, "loss": 1.0242, "step": 24685 }, { "epoch": 21.86891054030115, "grad_norm": 0.24891071021556854, "learning_rate": 1e-05, "loss": 1.0439, "step": 24690 }, { "epoch": 21.873339238263952, "grad_norm": 0.23052257299423218, "learning_rate": 1e-05, "loss": 0.9688, "step": 24695 }, { "epoch": 21.87776793622675, "grad_norm": 0.23111027479171753, "learning_rate": 1e-05, "loss": 1.0394, "step": 24700 }, { "epoch": 21.882196634189548, "grad_norm": 0.2436663806438446, "learning_rate": 1e-05, "loss": 1.0035, "step": 24705 }, { "epoch": 21.88662533215235, "grad_norm": 0.27078261971473694, "learning_rate": 1e-05, "loss": 0.9756, "step": 24710 }, { "epoch": 21.891054030115146, "grad_norm": 0.26738667488098145, "learning_rate": 1e-05, "loss": 0.9776, "step": 24715 }, { "epoch": 21.895482728077944, "grad_norm": 0.2642519176006317, "learning_rate": 1e-05, "loss": 0.9779, "step": 24720 }, { "epoch": 21.899911426040745, "grad_norm": 0.2486964464187622, "learning_rate": 1e-05, "loss": 0.9992, "step": 24725 }, { "epoch": 21.904340124003543, "grad_norm": 0.2267775982618332, "learning_rate": 1e-05, "loss": 0.9741, "step": 24730 }, { "epoch": 21.90876882196634, "grad_norm": 0.30306345224380493, "learning_rate": 1e-05, "loss": 0.9536, "step": 24735 }, { "epoch": 21.913197519929142, "grad_norm": 0.2734224796295166, "learning_rate": 1e-05, "loss": 0.9403, "step": 24740 }, { "epoch": 21.91762621789194, "grad_norm": 0.22469951212406158, "learning_rate": 1e-05, "loss": 1.003, "step": 24745 }, { "epoch": 21.922054915854737, "grad_norm": 0.24321064352989197, "learning_rate": 1e-05, "loss": 1.0267, "step": 24750 }, { "epoch": 21.92648361381754, "grad_norm": 0.23115235567092896, "learning_rate": 1e-05, "loss": 1.0103, "step": 24755 }, { "epoch": 21.930912311780336, "grad_norm": 0.2903413474559784, "learning_rate": 1e-05, "loss": 1.0276, "step": 24760 }, { "epoch": 21.935341009743137, "grad_norm": 0.21366137266159058, "learning_rate": 1e-05, "loss": 0.9512, "step": 24765 }, { "epoch": 21.939769707705935, "grad_norm": 0.23998533189296722, "learning_rate": 1e-05, "loss": 0.9906, "step": 24770 }, { "epoch": 21.944198405668732, "grad_norm": 0.2669883072376251, "learning_rate": 1e-05, "loss": 0.9864, "step": 24775 }, { "epoch": 21.948627103631534, "grad_norm": 0.2695714235305786, "learning_rate": 1e-05, "loss": 1.0447, "step": 24780 }, { "epoch": 21.95305580159433, "grad_norm": 0.24322670698165894, "learning_rate": 1e-05, "loss": 0.975, "step": 24785 }, { "epoch": 21.95748449955713, "grad_norm": 0.21124529838562012, "learning_rate": 1e-05, "loss": 0.9938, "step": 24790 }, { "epoch": 21.96191319751993, "grad_norm": 0.21688516438007355, "learning_rate": 1e-05, "loss": 0.9807, "step": 24795 }, { "epoch": 21.966341895482728, "grad_norm": 0.23600834608078003, "learning_rate": 1e-05, "loss": 0.9951, "step": 24800 }, { "epoch": 21.970770593445526, "grad_norm": 0.28674063086509705, "learning_rate": 1e-05, "loss": 0.9864, "step": 24805 }, { "epoch": 21.975199291408327, "grad_norm": 0.26195546984672546, "learning_rate": 1e-05, "loss": 0.9914, "step": 24810 }, { "epoch": 21.979627989371124, "grad_norm": 0.25015777349472046, "learning_rate": 1e-05, "loss": 0.9087, "step": 24815 }, { "epoch": 21.984056687333926, "grad_norm": 0.25432541966438293, "learning_rate": 1e-05, "loss": 0.9812, "step": 24820 }, { "epoch": 21.988485385296723, "grad_norm": 0.27454882860183716, "learning_rate": 1e-05, "loss": 0.9816, "step": 24825 }, { "epoch": 21.99291408325952, "grad_norm": 0.28187116980552673, "learning_rate": 1e-05, "loss": 1.0441, "step": 24830 }, { "epoch": 21.997342781222322, "grad_norm": 0.29521968960762024, "learning_rate": 1e-05, "loss": 0.9726, "step": 24835 }, { "epoch": 22.00177147918512, "grad_norm": 0.3057933449745178, "learning_rate": 1e-05, "loss": 1.0242, "step": 24840 }, { "epoch": 22.006200177147917, "grad_norm": 0.2525843679904938, "learning_rate": 1e-05, "loss": 1.0189, "step": 24845 }, { "epoch": 22.01062887511072, "grad_norm": 0.28382664918899536, "learning_rate": 1e-05, "loss": 1.0151, "step": 24850 }, { "epoch": 22.015057573073516, "grad_norm": 0.23374539613723755, "learning_rate": 1e-05, "loss": 0.9557, "step": 24855 }, { "epoch": 22.019486271036314, "grad_norm": 0.2440481185913086, "learning_rate": 1e-05, "loss": 1.0003, "step": 24860 }, { "epoch": 22.023914968999115, "grad_norm": 0.24902604520320892, "learning_rate": 1e-05, "loss": 0.9985, "step": 24865 }, { "epoch": 22.028343666961913, "grad_norm": 0.2957521378993988, "learning_rate": 1e-05, "loss": 0.9606, "step": 24870 }, { "epoch": 22.03277236492471, "grad_norm": 0.22571849822998047, "learning_rate": 1e-05, "loss": 0.9899, "step": 24875 }, { "epoch": 22.03720106288751, "grad_norm": 0.24343352019786835, "learning_rate": 1e-05, "loss": 1.0588, "step": 24880 }, { "epoch": 22.04162976085031, "grad_norm": 0.23574069142341614, "learning_rate": 1e-05, "loss": 1.0383, "step": 24885 }, { "epoch": 22.04605845881311, "grad_norm": 0.24992144107818604, "learning_rate": 1e-05, "loss": 0.9638, "step": 24890 }, { "epoch": 22.050487156775908, "grad_norm": 0.28429991006851196, "learning_rate": 1e-05, "loss": 0.9682, "step": 24895 }, { "epoch": 22.054915854738706, "grad_norm": 0.2270321100950241, "learning_rate": 1e-05, "loss": 1.0012, "step": 24900 }, { "epoch": 22.059344552701507, "grad_norm": 0.22781485319137573, "learning_rate": 1e-05, "loss": 0.9653, "step": 24905 }, { "epoch": 22.063773250664305, "grad_norm": 0.25927671790122986, "learning_rate": 1e-05, "loss": 0.9955, "step": 24910 }, { "epoch": 22.068201948627102, "grad_norm": 0.22343440353870392, "learning_rate": 1e-05, "loss": 0.9601, "step": 24915 }, { "epoch": 22.072630646589904, "grad_norm": 0.25268563628196716, "learning_rate": 1e-05, "loss": 0.9787, "step": 24920 }, { "epoch": 22.0770593445527, "grad_norm": 0.22193363308906555, "learning_rate": 1e-05, "loss": 1.0321, "step": 24925 }, { "epoch": 22.0814880425155, "grad_norm": 0.30172044038772583, "learning_rate": 1e-05, "loss": 1.0355, "step": 24930 }, { "epoch": 22.0859167404783, "grad_norm": 0.21960721909999847, "learning_rate": 1e-05, "loss": 0.9653, "step": 24935 }, { "epoch": 22.090345438441098, "grad_norm": 0.23669998347759247, "learning_rate": 1e-05, "loss": 0.9829, "step": 24940 }, { "epoch": 22.0947741364039, "grad_norm": 0.31388846039772034, "learning_rate": 1e-05, "loss": 0.9818, "step": 24945 }, { "epoch": 22.099202834366697, "grad_norm": 0.30553311109542847, "learning_rate": 1e-05, "loss": 1.0016, "step": 24950 }, { "epoch": 22.103631532329494, "grad_norm": 0.29542359709739685, "learning_rate": 1e-05, "loss": 0.9797, "step": 24955 }, { "epoch": 22.108060230292296, "grad_norm": 0.21039451658725739, "learning_rate": 1e-05, "loss": 0.9974, "step": 24960 }, { "epoch": 22.112488928255093, "grad_norm": 0.23354622721672058, "learning_rate": 1e-05, "loss": 0.9658, "step": 24965 }, { "epoch": 22.11691762621789, "grad_norm": 0.257804811000824, "learning_rate": 1e-05, "loss": 0.9887, "step": 24970 }, { "epoch": 22.121346324180692, "grad_norm": 0.2120206207036972, "learning_rate": 1e-05, "loss": 1.0069, "step": 24975 }, { "epoch": 22.12577502214349, "grad_norm": 0.26745209097862244, "learning_rate": 1e-05, "loss": 0.9196, "step": 24980 }, { "epoch": 22.130203720106287, "grad_norm": 0.23815174400806427, "learning_rate": 1e-05, "loss": 0.9965, "step": 24985 }, { "epoch": 22.13463241806909, "grad_norm": 0.2787434756755829, "learning_rate": 1e-05, "loss": 1.0029, "step": 24990 }, { "epoch": 22.139061116031886, "grad_norm": 0.3281756639480591, "learning_rate": 1e-05, "loss": 1.0125, "step": 24995 }, { "epoch": 22.143489813994684, "grad_norm": 0.24894240498542786, "learning_rate": 1e-05, "loss": 1.0271, "step": 25000 }, { "epoch": 22.147918511957485, "grad_norm": 0.26528269052505493, "learning_rate": 1e-05, "loss": 0.9923, "step": 25005 }, { "epoch": 22.152347209920283, "grad_norm": 0.243265300989151, "learning_rate": 1e-05, "loss": 1.0, "step": 25010 }, { "epoch": 22.156775907883084, "grad_norm": 0.2229057103395462, "learning_rate": 1e-05, "loss": 1.0074, "step": 25015 }, { "epoch": 22.16120460584588, "grad_norm": 0.269471675157547, "learning_rate": 1e-05, "loss": 0.9714, "step": 25020 }, { "epoch": 22.16563330380868, "grad_norm": 0.24262364208698273, "learning_rate": 1e-05, "loss": 0.9464, "step": 25025 }, { "epoch": 22.17006200177148, "grad_norm": 0.21221621334552765, "learning_rate": 1e-05, "loss": 1.0349, "step": 25030 }, { "epoch": 22.174490699734278, "grad_norm": 0.24311906099319458, "learning_rate": 1e-05, "loss": 1.0526, "step": 25035 }, { "epoch": 22.178919397697076, "grad_norm": 0.28096672892570496, "learning_rate": 1e-05, "loss": 0.9469, "step": 25040 }, { "epoch": 22.183348095659877, "grad_norm": 0.25694939494132996, "learning_rate": 1e-05, "loss": 0.9959, "step": 25045 }, { "epoch": 22.187776793622675, "grad_norm": 0.27143892645835876, "learning_rate": 1e-05, "loss": 0.9809, "step": 25050 }, { "epoch": 22.192205491585472, "grad_norm": 0.30177608132362366, "learning_rate": 1e-05, "loss": 0.9326, "step": 25055 }, { "epoch": 22.196634189548273, "grad_norm": 0.25788095593452454, "learning_rate": 1e-05, "loss": 1.0314, "step": 25060 }, { "epoch": 22.20106288751107, "grad_norm": 0.2544099986553192, "learning_rate": 1e-05, "loss": 0.9988, "step": 25065 }, { "epoch": 22.205491585473872, "grad_norm": 0.19766612350940704, "learning_rate": 1e-05, "loss": 0.9944, "step": 25070 }, { "epoch": 22.20992028343667, "grad_norm": 0.2524663507938385, "learning_rate": 1e-05, "loss": 0.9967, "step": 25075 }, { "epoch": 22.214348981399468, "grad_norm": 0.2551048696041107, "learning_rate": 1e-05, "loss": 0.94, "step": 25080 }, { "epoch": 22.21877767936227, "grad_norm": 0.28066185116767883, "learning_rate": 1e-05, "loss": 1.0417, "step": 25085 }, { "epoch": 22.223206377325067, "grad_norm": 0.2449391633272171, "learning_rate": 1e-05, "loss": 1.0037, "step": 25090 }, { "epoch": 22.227635075287864, "grad_norm": 0.26418206095695496, "learning_rate": 1e-05, "loss": 0.9842, "step": 25095 }, { "epoch": 22.232063773250665, "grad_norm": 0.31985703110694885, "learning_rate": 1e-05, "loss": 0.9587, "step": 25100 }, { "epoch": 22.236492471213463, "grad_norm": 0.2525765895843506, "learning_rate": 1e-05, "loss": 1.0166, "step": 25105 }, { "epoch": 22.24092116917626, "grad_norm": 0.23749807476997375, "learning_rate": 1e-05, "loss": 1.0121, "step": 25110 }, { "epoch": 22.245349867139062, "grad_norm": 0.23759925365447998, "learning_rate": 1e-05, "loss": 1.0201, "step": 25115 }, { "epoch": 22.24977856510186, "grad_norm": 0.24245911836624146, "learning_rate": 1e-05, "loss": 1.0402, "step": 25120 }, { "epoch": 22.254207263064657, "grad_norm": 0.2141829878091812, "learning_rate": 1e-05, "loss": 1.0024, "step": 25125 }, { "epoch": 22.25863596102746, "grad_norm": 0.33661559224128723, "learning_rate": 1e-05, "loss": 0.99, "step": 25130 }, { "epoch": 22.263064658990256, "grad_norm": 0.2650635242462158, "learning_rate": 1e-05, "loss": 0.9685, "step": 25135 }, { "epoch": 22.267493356953057, "grad_norm": 0.29339638352394104, "learning_rate": 1e-05, "loss": 0.9753, "step": 25140 }, { "epoch": 22.271922054915855, "grad_norm": 0.24675299227237701, "learning_rate": 1e-05, "loss": 0.947, "step": 25145 }, { "epoch": 22.276350752878653, "grad_norm": 0.26497048139572144, "learning_rate": 1e-05, "loss": 0.9863, "step": 25150 }, { "epoch": 22.280779450841454, "grad_norm": 0.29946672916412354, "learning_rate": 1e-05, "loss": 0.9576, "step": 25155 }, { "epoch": 22.28520814880425, "grad_norm": 0.2353670448064804, "learning_rate": 1e-05, "loss": 1.0286, "step": 25160 }, { "epoch": 22.28963684676705, "grad_norm": 0.24496451020240784, "learning_rate": 1e-05, "loss": 0.996, "step": 25165 }, { "epoch": 22.29406554472985, "grad_norm": 0.22993889451026917, "learning_rate": 1e-05, "loss": 1.003, "step": 25170 }, { "epoch": 22.298494242692648, "grad_norm": 0.23675395548343658, "learning_rate": 1e-05, "loss": 0.9753, "step": 25175 }, { "epoch": 22.302922940655446, "grad_norm": 0.23097476363182068, "learning_rate": 1e-05, "loss": 0.9805, "step": 25180 }, { "epoch": 22.307351638618247, "grad_norm": 0.2836301326751709, "learning_rate": 1e-05, "loss": 1.0221, "step": 25185 }, { "epoch": 22.311780336581045, "grad_norm": 0.2484685629606247, "learning_rate": 1e-05, "loss": 0.9854, "step": 25190 }, { "epoch": 22.316209034543846, "grad_norm": 0.25793904066085815, "learning_rate": 1e-05, "loss": 0.9971, "step": 25195 }, { "epoch": 22.320637732506643, "grad_norm": 0.21760345995426178, "learning_rate": 1e-05, "loss": 1.0026, "step": 25200 }, { "epoch": 22.32506643046944, "grad_norm": 0.1999516636133194, "learning_rate": 1e-05, "loss": 1.0012, "step": 25205 }, { "epoch": 22.329495128432242, "grad_norm": 0.2596040666103363, "learning_rate": 1e-05, "loss": 0.9782, "step": 25210 }, { "epoch": 22.33392382639504, "grad_norm": 0.24976402521133423, "learning_rate": 1e-05, "loss": 1.0108, "step": 25215 }, { "epoch": 22.338352524357838, "grad_norm": 0.2822668254375458, "learning_rate": 1e-05, "loss": 0.9892, "step": 25220 }, { "epoch": 22.34278122232064, "grad_norm": 0.22268618643283844, "learning_rate": 1e-05, "loss": 0.9626, "step": 25225 }, { "epoch": 22.347209920283436, "grad_norm": 0.24922682344913483, "learning_rate": 1e-05, "loss": 0.9878, "step": 25230 }, { "epoch": 22.351638618246234, "grad_norm": 0.2675280272960663, "learning_rate": 1e-05, "loss": 1.0375, "step": 25235 }, { "epoch": 22.356067316209035, "grad_norm": 0.3110574185848236, "learning_rate": 1e-05, "loss": 0.952, "step": 25240 }, { "epoch": 22.360496014171833, "grad_norm": 0.30851760506629944, "learning_rate": 1e-05, "loss": 1.0076, "step": 25245 }, { "epoch": 22.36492471213463, "grad_norm": 0.27786239981651306, "learning_rate": 1e-05, "loss": 1.024, "step": 25250 }, { "epoch": 22.369353410097432, "grad_norm": 0.2358911782503128, "learning_rate": 1e-05, "loss": 1.0001, "step": 25255 }, { "epoch": 22.37378210806023, "grad_norm": 0.23460493981838226, "learning_rate": 1e-05, "loss": 0.956, "step": 25260 }, { "epoch": 22.37821080602303, "grad_norm": 0.2363087385892868, "learning_rate": 1e-05, "loss": 1.0448, "step": 25265 }, { "epoch": 22.38263950398583, "grad_norm": 0.2724899351596832, "learning_rate": 1e-05, "loss": 0.9887, "step": 25270 }, { "epoch": 22.387068201948626, "grad_norm": 0.2441389113664627, "learning_rate": 1e-05, "loss": 1.0316, "step": 25275 }, { "epoch": 22.391496899911427, "grad_norm": 0.2583214044570923, "learning_rate": 1e-05, "loss": 1.0316, "step": 25280 }, { "epoch": 22.395925597874225, "grad_norm": 0.2398071140050888, "learning_rate": 1e-05, "loss": 1.0022, "step": 25285 }, { "epoch": 22.400354295837023, "grad_norm": 0.23686298727989197, "learning_rate": 1e-05, "loss": 0.9964, "step": 25290 }, { "epoch": 22.404782993799824, "grad_norm": 0.25865161418914795, "learning_rate": 1e-05, "loss": 1.0109, "step": 25295 }, { "epoch": 22.40921169176262, "grad_norm": 0.30793774127960205, "learning_rate": 1e-05, "loss": 1.0211, "step": 25300 }, { "epoch": 22.41364038972542, "grad_norm": 0.2522560954093933, "learning_rate": 1e-05, "loss": 0.9752, "step": 25305 }, { "epoch": 22.41806908768822, "grad_norm": 0.23933781683444977, "learning_rate": 1e-05, "loss": 1.016, "step": 25310 }, { "epoch": 22.422497785651018, "grad_norm": 0.23642078042030334, "learning_rate": 1e-05, "loss": 0.9665, "step": 25315 }, { "epoch": 22.42692648361382, "grad_norm": 0.23104454576969147, "learning_rate": 1e-05, "loss": 1.014, "step": 25320 }, { "epoch": 22.431355181576617, "grad_norm": 0.20739449560642242, "learning_rate": 1e-05, "loss": 1.006, "step": 25325 }, { "epoch": 22.435783879539414, "grad_norm": 0.2163451761007309, "learning_rate": 1e-05, "loss": 0.9906, "step": 25330 }, { "epoch": 22.440212577502216, "grad_norm": 0.25509095191955566, "learning_rate": 1e-05, "loss": 1.0079, "step": 25335 }, { "epoch": 22.444641275465013, "grad_norm": 0.24274949729442596, "learning_rate": 1e-05, "loss": 0.986, "step": 25340 }, { "epoch": 22.44906997342781, "grad_norm": 0.2529536485671997, "learning_rate": 1e-05, "loss": 1.0139, "step": 25345 }, { "epoch": 22.453498671390612, "grad_norm": 0.2632298767566681, "learning_rate": 1e-05, "loss": 0.9808, "step": 25350 }, { "epoch": 22.45792736935341, "grad_norm": 0.24525117874145508, "learning_rate": 1e-05, "loss": 1.0269, "step": 25355 }, { "epoch": 22.462356067316207, "grad_norm": 0.2170303463935852, "learning_rate": 1e-05, "loss": 0.9847, "step": 25360 }, { "epoch": 22.46678476527901, "grad_norm": 0.26130905747413635, "learning_rate": 1e-05, "loss": 0.9507, "step": 25365 }, { "epoch": 22.471213463241806, "grad_norm": 0.23054029047489166, "learning_rate": 1e-05, "loss": 0.9666, "step": 25370 }, { "epoch": 22.475642161204608, "grad_norm": 0.2737879157066345, "learning_rate": 1e-05, "loss": 1.0221, "step": 25375 }, { "epoch": 22.480070859167405, "grad_norm": 0.21854202449321747, "learning_rate": 1e-05, "loss": 0.9812, "step": 25380 }, { "epoch": 22.484499557130203, "grad_norm": 0.2287951409816742, "learning_rate": 1e-05, "loss": 1.0079, "step": 25385 }, { "epoch": 22.488928255093004, "grad_norm": 0.3347814679145813, "learning_rate": 1e-05, "loss": 0.9838, "step": 25390 }, { "epoch": 22.4933569530558, "grad_norm": 0.22403250634670258, "learning_rate": 1e-05, "loss": 1.0374, "step": 25395 }, { "epoch": 22.4977856510186, "grad_norm": 0.21907730400562286, "learning_rate": 1e-05, "loss": 0.9402, "step": 25400 }, { "epoch": 22.5022143489814, "grad_norm": 0.21052448451519012, "learning_rate": 1e-05, "loss": 1.0022, "step": 25405 }, { "epoch": 22.5066430469442, "grad_norm": 0.24547868967056274, "learning_rate": 1e-05, "loss": 0.9926, "step": 25410 }, { "epoch": 22.511071744906996, "grad_norm": 0.2388623058795929, "learning_rate": 1e-05, "loss": 0.9971, "step": 25415 }, { "epoch": 22.515500442869797, "grad_norm": 0.2339182198047638, "learning_rate": 1e-05, "loss": 0.957, "step": 25420 }, { "epoch": 22.519929140832595, "grad_norm": 0.28296196460723877, "learning_rate": 1e-05, "loss": 0.9832, "step": 25425 }, { "epoch": 22.524357838795392, "grad_norm": 0.2755812406539917, "learning_rate": 1e-05, "loss": 0.987, "step": 25430 }, { "epoch": 22.528786536758194, "grad_norm": 0.20562784373760223, "learning_rate": 1e-05, "loss": 1.0522, "step": 25435 }, { "epoch": 22.53321523472099, "grad_norm": 0.24974970519542694, "learning_rate": 1e-05, "loss": 0.9965, "step": 25440 }, { "epoch": 22.537643932683793, "grad_norm": 0.2414158582687378, "learning_rate": 1e-05, "loss": 1.0106, "step": 25445 }, { "epoch": 22.54207263064659, "grad_norm": 0.2662864923477173, "learning_rate": 1e-05, "loss": 0.9931, "step": 25450 }, { "epoch": 22.546501328609388, "grad_norm": 0.21896205842494965, "learning_rate": 1e-05, "loss": 1.0012, "step": 25455 }, { "epoch": 22.55093002657219, "grad_norm": 0.24726009368896484, "learning_rate": 1e-05, "loss": 0.9783, "step": 25460 }, { "epoch": 22.555358724534987, "grad_norm": 0.24781477451324463, "learning_rate": 1e-05, "loss": 1.005, "step": 25465 }, { "epoch": 22.559787422497784, "grad_norm": 0.26446297764778137, "learning_rate": 1e-05, "loss": 1.0157, "step": 25470 }, { "epoch": 22.564216120460586, "grad_norm": 0.23708654940128326, "learning_rate": 1e-05, "loss": 0.9921, "step": 25475 }, { "epoch": 22.568644818423383, "grad_norm": 0.2327011376619339, "learning_rate": 1e-05, "loss": 0.9839, "step": 25480 }, { "epoch": 22.57307351638618, "grad_norm": 0.24683986604213715, "learning_rate": 1e-05, "loss": 0.9764, "step": 25485 }, { "epoch": 22.577502214348982, "grad_norm": 0.29164671897888184, "learning_rate": 1e-05, "loss": 0.9714, "step": 25490 }, { "epoch": 22.58193091231178, "grad_norm": 0.2717437148094177, "learning_rate": 1e-05, "loss": 0.9556, "step": 25495 }, { "epoch": 22.58635961027458, "grad_norm": 0.26747655868530273, "learning_rate": 1e-05, "loss": 0.9748, "step": 25500 }, { "epoch": 22.59078830823738, "grad_norm": 0.3003387153148651, "learning_rate": 1e-05, "loss": 0.9913, "step": 25505 }, { "epoch": 22.595217006200176, "grad_norm": 0.2480490356683731, "learning_rate": 1e-05, "loss": 0.9968, "step": 25510 }, { "epoch": 22.599645704162977, "grad_norm": 0.2751440107822418, "learning_rate": 1e-05, "loss": 0.9896, "step": 25515 }, { "epoch": 22.604074402125775, "grad_norm": 0.25806310772895813, "learning_rate": 1e-05, "loss": 0.9287, "step": 25520 }, { "epoch": 22.608503100088573, "grad_norm": 0.23098978400230408, "learning_rate": 1e-05, "loss": 1.0187, "step": 25525 }, { "epoch": 22.612931798051374, "grad_norm": 0.25656262040138245, "learning_rate": 1e-05, "loss": 0.94, "step": 25530 }, { "epoch": 22.61736049601417, "grad_norm": 0.26678669452667236, "learning_rate": 1e-05, "loss": 0.9572, "step": 25535 }, { "epoch": 22.62178919397697, "grad_norm": 0.23283767700195312, "learning_rate": 1e-05, "loss": 0.9959, "step": 25540 }, { "epoch": 22.62621789193977, "grad_norm": 0.3079436719417572, "learning_rate": 1e-05, "loss": 1.0117, "step": 25545 }, { "epoch": 22.630646589902568, "grad_norm": 0.25933048129081726, "learning_rate": 1e-05, "loss": 1.0048, "step": 25550 }, { "epoch": 22.63507528786537, "grad_norm": 0.27540329098701477, "learning_rate": 1e-05, "loss": 1.0215, "step": 25555 }, { "epoch": 22.639503985828167, "grad_norm": 0.25537315011024475, "learning_rate": 1e-05, "loss": 0.9841, "step": 25560 }, { "epoch": 22.643932683790965, "grad_norm": 0.28954339027404785, "learning_rate": 1e-05, "loss": 0.9689, "step": 25565 }, { "epoch": 22.648361381753766, "grad_norm": 0.24918121099472046, "learning_rate": 1e-05, "loss": 1.0299, "step": 25570 }, { "epoch": 22.652790079716564, "grad_norm": 0.31572872400283813, "learning_rate": 1e-05, "loss": 0.9578, "step": 25575 }, { "epoch": 22.65721877767936, "grad_norm": 0.23251767456531525, "learning_rate": 1e-05, "loss": 1.0201, "step": 25580 }, { "epoch": 22.661647475642162, "grad_norm": 0.23751093447208405, "learning_rate": 1e-05, "loss": 0.9838, "step": 25585 }, { "epoch": 22.66607617360496, "grad_norm": 0.24201689660549164, "learning_rate": 1e-05, "loss": 0.9884, "step": 25590 }, { "epoch": 22.670504871567758, "grad_norm": 0.28515568375587463, "learning_rate": 1e-05, "loss": 0.9683, "step": 25595 }, { "epoch": 22.67493356953056, "grad_norm": 0.2828501760959625, "learning_rate": 1e-05, "loss": 1.0325, "step": 25600 }, { "epoch": 22.679362267493357, "grad_norm": 0.2390662580728531, "learning_rate": 1e-05, "loss": 0.9836, "step": 25605 }, { "epoch": 22.683790965456154, "grad_norm": 0.22238297760486603, "learning_rate": 1e-05, "loss": 0.9522, "step": 25610 }, { "epoch": 22.688219663418955, "grad_norm": 0.24652238190174103, "learning_rate": 1e-05, "loss": 0.9777, "step": 25615 }, { "epoch": 22.692648361381753, "grad_norm": 0.20941878855228424, "learning_rate": 1e-05, "loss": 0.9933, "step": 25620 }, { "epoch": 22.697077059344554, "grad_norm": 0.24829435348510742, "learning_rate": 1e-05, "loss": 1.0082, "step": 25625 }, { "epoch": 22.701505757307352, "grad_norm": 0.2920697331428528, "learning_rate": 1e-05, "loss": 0.979, "step": 25630 }, { "epoch": 22.70593445527015, "grad_norm": 0.2333025187253952, "learning_rate": 1e-05, "loss": 1.0033, "step": 25635 }, { "epoch": 22.71036315323295, "grad_norm": 0.27829641103744507, "learning_rate": 1e-05, "loss": 0.973, "step": 25640 }, { "epoch": 22.71479185119575, "grad_norm": 0.28581103682518005, "learning_rate": 1e-05, "loss": 1.0147, "step": 25645 }, { "epoch": 22.719220549158546, "grad_norm": 0.21458880603313446, "learning_rate": 1e-05, "loss": 0.9775, "step": 25650 }, { "epoch": 22.723649247121347, "grad_norm": 0.26263871788978577, "learning_rate": 1e-05, "loss": 0.9844, "step": 25655 }, { "epoch": 22.728077945084145, "grad_norm": 0.2184339314699173, "learning_rate": 1e-05, "loss": 0.9576, "step": 25660 }, { "epoch": 22.732506643046943, "grad_norm": 0.23649661242961884, "learning_rate": 1e-05, "loss": 0.9658, "step": 25665 }, { "epoch": 22.736935341009744, "grad_norm": 0.22391146421432495, "learning_rate": 1e-05, "loss": 0.9821, "step": 25670 }, { "epoch": 22.74136403897254, "grad_norm": 0.2300713062286377, "learning_rate": 1e-05, "loss": 1.0082, "step": 25675 }, { "epoch": 22.745792736935343, "grad_norm": 0.25490954518318176, "learning_rate": 1e-05, "loss": 0.9786, "step": 25680 }, { "epoch": 22.75022143489814, "grad_norm": 0.23086613416671753, "learning_rate": 1e-05, "loss": 1.0069, "step": 25685 }, { "epoch": 22.754650132860938, "grad_norm": 0.2372780740261078, "learning_rate": 1e-05, "loss": 1.0229, "step": 25690 }, { "epoch": 22.75907883082374, "grad_norm": 0.19394199550151825, "learning_rate": 1e-05, "loss": 0.9496, "step": 25695 }, { "epoch": 22.763507528786537, "grad_norm": 0.18885891139507294, "learning_rate": 1e-05, "loss": 0.9489, "step": 25700 }, { "epoch": 22.767936226749335, "grad_norm": 0.32187581062316895, "learning_rate": 1e-05, "loss": 1.0287, "step": 25705 }, { "epoch": 22.772364924712136, "grad_norm": 0.27318906784057617, "learning_rate": 1e-05, "loss": 1.0339, "step": 25710 }, { "epoch": 22.776793622674933, "grad_norm": 0.2578780949115753, "learning_rate": 1e-05, "loss": 0.9668, "step": 25715 }, { "epoch": 22.78122232063773, "grad_norm": 0.25151413679122925, "learning_rate": 1e-05, "loss": 0.9951, "step": 25720 }, { "epoch": 22.785651018600532, "grad_norm": 0.27815499901771545, "learning_rate": 1e-05, "loss": 0.959, "step": 25725 }, { "epoch": 22.79007971656333, "grad_norm": 0.22998100519180298, "learning_rate": 1e-05, "loss": 0.9877, "step": 25730 }, { "epoch": 22.794508414526128, "grad_norm": 0.2738983631134033, "learning_rate": 1e-05, "loss": 0.9798, "step": 25735 }, { "epoch": 22.79893711248893, "grad_norm": 0.24925392866134644, "learning_rate": 1e-05, "loss": 0.9691, "step": 25740 }, { "epoch": 22.803365810451727, "grad_norm": 0.26162779331207275, "learning_rate": 1e-05, "loss": 0.9705, "step": 25745 }, { "epoch": 22.807794508414528, "grad_norm": 0.24740613996982574, "learning_rate": 1e-05, "loss": 0.9673, "step": 25750 }, { "epoch": 22.812223206377325, "grad_norm": 0.25766319036483765, "learning_rate": 1e-05, "loss": 1.0604, "step": 25755 }, { "epoch": 22.816651904340123, "grad_norm": 0.270840585231781, "learning_rate": 1e-05, "loss": 0.9713, "step": 25760 }, { "epoch": 22.821080602302924, "grad_norm": 0.26112550497055054, "learning_rate": 1e-05, "loss": 0.9727, "step": 25765 }, { "epoch": 22.825509300265722, "grad_norm": 0.2253311723470688, "learning_rate": 1e-05, "loss": 0.9732, "step": 25770 }, { "epoch": 22.82993799822852, "grad_norm": 0.25054895877838135, "learning_rate": 1e-05, "loss": 0.9726, "step": 25775 }, { "epoch": 22.83436669619132, "grad_norm": 0.2464187890291214, "learning_rate": 1e-05, "loss": 1.0316, "step": 25780 }, { "epoch": 22.83879539415412, "grad_norm": 0.2638521194458008, "learning_rate": 1e-05, "loss": 0.9705, "step": 25785 }, { "epoch": 22.843224092116916, "grad_norm": 0.27301034331321716, "learning_rate": 1e-05, "loss": 1.0017, "step": 25790 }, { "epoch": 22.847652790079717, "grad_norm": 0.22551095485687256, "learning_rate": 1e-05, "loss": 0.9947, "step": 25795 }, { "epoch": 22.852081488042515, "grad_norm": 0.28099676966667175, "learning_rate": 1e-05, "loss": 0.972, "step": 25800 }, { "epoch": 22.856510186005316, "grad_norm": 0.2262679636478424, "learning_rate": 1e-05, "loss": 1.0458, "step": 25805 }, { "epoch": 22.860938883968114, "grad_norm": 0.2315719574689865, "learning_rate": 1e-05, "loss": 1.0074, "step": 25810 }, { "epoch": 22.86536758193091, "grad_norm": 0.19639046490192413, "learning_rate": 1e-05, "loss": 0.967, "step": 25815 }, { "epoch": 22.869796279893713, "grad_norm": 0.2535834014415741, "learning_rate": 1e-05, "loss": 1.0018, "step": 25820 }, { "epoch": 22.87422497785651, "grad_norm": 0.25161483883857727, "learning_rate": 1e-05, "loss": 0.9642, "step": 25825 }, { "epoch": 22.878653675819308, "grad_norm": 0.2458488792181015, "learning_rate": 1e-05, "loss": 1.0065, "step": 25830 }, { "epoch": 22.88308237378211, "grad_norm": 0.27616190910339355, "learning_rate": 1e-05, "loss": 0.9847, "step": 25835 }, { "epoch": 22.887511071744907, "grad_norm": 0.22954954206943512, "learning_rate": 1e-05, "loss": 1.0064, "step": 25840 }, { "epoch": 22.891939769707704, "grad_norm": 0.2157122641801834, "learning_rate": 1e-05, "loss": 0.9711, "step": 25845 }, { "epoch": 22.896368467670506, "grad_norm": 0.3065982162952423, "learning_rate": 1e-05, "loss": 0.9938, "step": 25850 }, { "epoch": 22.900797165633303, "grad_norm": 0.28051337599754333, "learning_rate": 1e-05, "loss": 0.9844, "step": 25855 }, { "epoch": 22.9052258635961, "grad_norm": 0.3243139684200287, "learning_rate": 1e-05, "loss": 0.9824, "step": 25860 }, { "epoch": 22.909654561558902, "grad_norm": 0.25547072291374207, "learning_rate": 1e-05, "loss": 0.9618, "step": 25865 }, { "epoch": 22.9140832595217, "grad_norm": 0.28228655457496643, "learning_rate": 1e-05, "loss": 0.9891, "step": 25870 }, { "epoch": 22.9185119574845, "grad_norm": 0.3086131811141968, "learning_rate": 1e-05, "loss": 0.9956, "step": 25875 }, { "epoch": 22.9229406554473, "grad_norm": 0.25552958250045776, "learning_rate": 1e-05, "loss": 1.007, "step": 25880 }, { "epoch": 22.927369353410096, "grad_norm": 0.2505502998828888, "learning_rate": 1e-05, "loss": 0.9799, "step": 25885 }, { "epoch": 22.931798051372898, "grad_norm": 0.24610909819602966, "learning_rate": 1e-05, "loss": 1.0271, "step": 25890 }, { "epoch": 22.936226749335695, "grad_norm": 0.240942120552063, "learning_rate": 1e-05, "loss": 0.9586, "step": 25895 }, { "epoch": 22.940655447298493, "grad_norm": 0.2842324674129486, "learning_rate": 1e-05, "loss": 0.9725, "step": 25900 }, { "epoch": 22.945084145261294, "grad_norm": 0.252953439950943, "learning_rate": 1e-05, "loss": 0.9782, "step": 25905 }, { "epoch": 22.949512843224092, "grad_norm": 0.23336157202720642, "learning_rate": 1e-05, "loss": 1.0012, "step": 25910 }, { "epoch": 22.95394154118689, "grad_norm": 0.28841614723205566, "learning_rate": 1e-05, "loss": 0.9621, "step": 25915 }, { "epoch": 22.95837023914969, "grad_norm": 0.2475043088197708, "learning_rate": 1e-05, "loss": 0.9851, "step": 25920 }, { "epoch": 22.96279893711249, "grad_norm": 0.20819233357906342, "learning_rate": 1e-05, "loss": 1.0152, "step": 25925 }, { "epoch": 22.96722763507529, "grad_norm": 0.27295488119125366, "learning_rate": 1e-05, "loss": 0.9808, "step": 25930 }, { "epoch": 22.971656333038087, "grad_norm": 0.2852492928504944, "learning_rate": 1e-05, "loss": 1.0578, "step": 25935 }, { "epoch": 22.976085031000885, "grad_norm": 0.23579590022563934, "learning_rate": 1e-05, "loss": 1.0002, "step": 25940 }, { "epoch": 22.980513728963686, "grad_norm": 0.28827714920043945, "learning_rate": 1e-05, "loss": 1.0298, "step": 25945 }, { "epoch": 22.984942426926484, "grad_norm": 0.29914212226867676, "learning_rate": 1e-05, "loss": 0.97, "step": 25950 }, { "epoch": 22.98937112488928, "grad_norm": 0.22872090339660645, "learning_rate": 1e-05, "loss": 0.9964, "step": 25955 }, { "epoch": 22.993799822852083, "grad_norm": 0.22265173494815826, "learning_rate": 1e-05, "loss": 0.9644, "step": 25960 }, { "epoch": 22.99822852081488, "grad_norm": 0.3121262192726135, "learning_rate": 1e-05, "loss": 1.0313, "step": 25965 }, { "epoch": 23.002657218777678, "grad_norm": 0.26447778940200806, "learning_rate": 1e-05, "loss": 1.0328, "step": 25970 }, { "epoch": 23.00708591674048, "grad_norm": 0.2370622158050537, "learning_rate": 1e-05, "loss": 0.9439, "step": 25975 }, { "epoch": 23.011514614703277, "grad_norm": 0.2635786831378937, "learning_rate": 1e-05, "loss": 0.9517, "step": 25980 }, { "epoch": 23.015943312666074, "grad_norm": 0.28557124733924866, "learning_rate": 1e-05, "loss": 0.9687, "step": 25985 }, { "epoch": 23.020372010628876, "grad_norm": 0.237621009349823, "learning_rate": 1e-05, "loss": 0.9854, "step": 25990 }, { "epoch": 23.024800708591673, "grad_norm": 0.22556810081005096, "learning_rate": 1e-05, "loss": 0.9711, "step": 25995 }, { "epoch": 23.029229406554474, "grad_norm": 0.2727847099304199, "learning_rate": 1e-05, "loss": 0.9868, "step": 26000 }, { "epoch": 23.033658104517272, "grad_norm": 0.22376036643981934, "learning_rate": 1e-05, "loss": 0.9758, "step": 26005 }, { "epoch": 23.03808680248007, "grad_norm": 0.2665121853351593, "learning_rate": 1e-05, "loss": 1.0376, "step": 26010 }, { "epoch": 23.04251550044287, "grad_norm": 0.2564091980457306, "learning_rate": 1e-05, "loss": 0.9399, "step": 26015 }, { "epoch": 23.04694419840567, "grad_norm": 0.21694253385066986, "learning_rate": 1e-05, "loss": 0.9676, "step": 26020 }, { "epoch": 23.051372896368466, "grad_norm": 0.25690361857414246, "learning_rate": 1e-05, "loss": 0.9886, "step": 26025 }, { "epoch": 23.055801594331268, "grad_norm": 0.2262437343597412, "learning_rate": 1e-05, "loss": 0.9422, "step": 26030 }, { "epoch": 23.060230292294065, "grad_norm": 0.28153860569000244, "learning_rate": 1e-05, "loss": 1.0009, "step": 26035 }, { "epoch": 23.064658990256863, "grad_norm": 0.22980055212974548, "learning_rate": 1e-05, "loss": 1.0168, "step": 26040 }, { "epoch": 23.069087688219664, "grad_norm": 0.22773821651935577, "learning_rate": 1e-05, "loss": 0.9288, "step": 26045 }, { "epoch": 23.07351638618246, "grad_norm": 0.25532132387161255, "learning_rate": 1e-05, "loss": 0.9545, "step": 26050 }, { "epoch": 23.077945084145263, "grad_norm": 0.2534859776496887, "learning_rate": 1e-05, "loss": 0.9463, "step": 26055 }, { "epoch": 23.08237378210806, "grad_norm": 0.23883554339408875, "learning_rate": 1e-05, "loss": 1.0309, "step": 26060 }, { "epoch": 23.086802480070858, "grad_norm": 0.26781368255615234, "learning_rate": 1e-05, "loss": 1.0114, "step": 26065 }, { "epoch": 23.09123117803366, "grad_norm": 0.26020872592926025, "learning_rate": 1e-05, "loss": 0.954, "step": 26070 }, { "epoch": 23.095659875996457, "grad_norm": 0.2362697422504425, "learning_rate": 1e-05, "loss": 0.9895, "step": 26075 }, { "epoch": 23.100088573959255, "grad_norm": 0.21506093442440033, "learning_rate": 1e-05, "loss": 1.0409, "step": 26080 }, { "epoch": 23.104517271922056, "grad_norm": 0.2723117470741272, "learning_rate": 1e-05, "loss": 1.0065, "step": 26085 }, { "epoch": 23.108945969884854, "grad_norm": 0.2622233033180237, "learning_rate": 1e-05, "loss": 0.9316, "step": 26090 }, { "epoch": 23.11337466784765, "grad_norm": 0.21874207258224487, "learning_rate": 1e-05, "loss": 0.9893, "step": 26095 }, { "epoch": 23.117803365810452, "grad_norm": 0.2519303262233734, "learning_rate": 1e-05, "loss": 1.0597, "step": 26100 }, { "epoch": 23.12223206377325, "grad_norm": 0.2639300227165222, "learning_rate": 1e-05, "loss": 1.007, "step": 26105 }, { "epoch": 23.12666076173605, "grad_norm": 0.21876296401023865, "learning_rate": 1e-05, "loss": 0.9931, "step": 26110 }, { "epoch": 23.13108945969885, "grad_norm": 0.29018235206604004, "learning_rate": 1e-05, "loss": 0.9712, "step": 26115 }, { "epoch": 23.135518157661647, "grad_norm": 0.27658510208129883, "learning_rate": 1e-05, "loss": 0.9677, "step": 26120 }, { "epoch": 23.139946855624448, "grad_norm": 0.26128217577934265, "learning_rate": 1e-05, "loss": 0.9819, "step": 26125 }, { "epoch": 23.144375553587246, "grad_norm": 0.21736447513103485, "learning_rate": 1e-05, "loss": 0.9969, "step": 26130 }, { "epoch": 23.148804251550043, "grad_norm": 0.28176945447921753, "learning_rate": 1e-05, "loss": 0.9332, "step": 26135 }, { "epoch": 23.153232949512844, "grad_norm": 0.2477501779794693, "learning_rate": 1e-05, "loss": 0.9808, "step": 26140 }, { "epoch": 23.157661647475642, "grad_norm": 0.26699912548065186, "learning_rate": 1e-05, "loss": 0.9511, "step": 26145 }, { "epoch": 23.16209034543844, "grad_norm": 0.2769632041454315, "learning_rate": 1e-05, "loss": 0.993, "step": 26150 }, { "epoch": 23.16651904340124, "grad_norm": 0.24970702826976776, "learning_rate": 1e-05, "loss": 0.9915, "step": 26155 }, { "epoch": 23.17094774136404, "grad_norm": 0.19275449216365814, "learning_rate": 1e-05, "loss": 0.9487, "step": 26160 }, { "epoch": 23.175376439326836, "grad_norm": 0.25007882714271545, "learning_rate": 1e-05, "loss": 0.9622, "step": 26165 }, { "epoch": 23.179805137289637, "grad_norm": 0.25314250588417053, "learning_rate": 1e-05, "loss": 1.0154, "step": 26170 }, { "epoch": 23.184233835252435, "grad_norm": 0.2311410754919052, "learning_rate": 1e-05, "loss": 1.0029, "step": 26175 }, { "epoch": 23.188662533215236, "grad_norm": 0.2643571197986603, "learning_rate": 1e-05, "loss": 1.0143, "step": 26180 }, { "epoch": 23.193091231178034, "grad_norm": 0.240512877702713, "learning_rate": 1e-05, "loss": 1.0088, "step": 26185 }, { "epoch": 23.19751992914083, "grad_norm": 0.273680180311203, "learning_rate": 1e-05, "loss": 0.9437, "step": 26190 }, { "epoch": 23.201948627103633, "grad_norm": 0.25345948338508606, "learning_rate": 1e-05, "loss": 0.9953, "step": 26195 }, { "epoch": 23.20637732506643, "grad_norm": 0.27781733870506287, "learning_rate": 1e-05, "loss": 1.0163, "step": 26200 }, { "epoch": 23.210806023029228, "grad_norm": 0.26070523262023926, "learning_rate": 1e-05, "loss": 0.9973, "step": 26205 }, { "epoch": 23.21523472099203, "grad_norm": 0.23446990549564362, "learning_rate": 1e-05, "loss": 1.0359, "step": 26210 }, { "epoch": 23.219663418954827, "grad_norm": 0.23746508359909058, "learning_rate": 1e-05, "loss": 0.9426, "step": 26215 }, { "epoch": 23.224092116917625, "grad_norm": 0.2397100180387497, "learning_rate": 1e-05, "loss": 1.0564, "step": 26220 }, { "epoch": 23.228520814880426, "grad_norm": 0.23432020843029022, "learning_rate": 1e-05, "loss": 0.9709, "step": 26225 }, { "epoch": 23.232949512843224, "grad_norm": 0.26136159896850586, "learning_rate": 1e-05, "loss": 0.9619, "step": 26230 }, { "epoch": 23.237378210806025, "grad_norm": 0.2443007081747055, "learning_rate": 1e-05, "loss": 1.0139, "step": 26235 }, { "epoch": 23.241806908768822, "grad_norm": 0.2665751278400421, "learning_rate": 1e-05, "loss": 0.9956, "step": 26240 }, { "epoch": 23.24623560673162, "grad_norm": 0.2399630844593048, "learning_rate": 1e-05, "loss": 0.9312, "step": 26245 }, { "epoch": 23.25066430469442, "grad_norm": 0.26037630438804626, "learning_rate": 1e-05, "loss": 1.0022, "step": 26250 }, { "epoch": 23.25509300265722, "grad_norm": 0.2658340036869049, "learning_rate": 1e-05, "loss": 0.996, "step": 26255 }, { "epoch": 23.259521700620017, "grad_norm": 0.23740796744823456, "learning_rate": 1e-05, "loss": 0.9547, "step": 26260 }, { "epoch": 23.263950398582818, "grad_norm": 0.28052619099617004, "learning_rate": 1e-05, "loss": 0.9596, "step": 26265 }, { "epoch": 23.268379096545615, "grad_norm": 0.2324618250131607, "learning_rate": 1e-05, "loss": 1.0464, "step": 26270 }, { "epoch": 23.272807794508413, "grad_norm": 0.23283697664737701, "learning_rate": 1e-05, "loss": 0.904, "step": 26275 }, { "epoch": 23.277236492471214, "grad_norm": 0.25780996680259705, "learning_rate": 1e-05, "loss": 0.9553, "step": 26280 }, { "epoch": 23.281665190434012, "grad_norm": 0.2611989378929138, "learning_rate": 1e-05, "loss": 1.0288, "step": 26285 }, { "epoch": 23.28609388839681, "grad_norm": 0.2390451729297638, "learning_rate": 1e-05, "loss": 0.9718, "step": 26290 }, { "epoch": 23.29052258635961, "grad_norm": 0.28081047534942627, "learning_rate": 1e-05, "loss": 0.9498, "step": 26295 }, { "epoch": 23.29495128432241, "grad_norm": 0.2402516007423401, "learning_rate": 1e-05, "loss": 0.9753, "step": 26300 }, { "epoch": 23.29937998228521, "grad_norm": 0.30495649576187134, "learning_rate": 1e-05, "loss": 0.97, "step": 26305 }, { "epoch": 23.303808680248007, "grad_norm": 0.24615336954593658, "learning_rate": 1e-05, "loss": 1.0057, "step": 26310 }, { "epoch": 23.308237378210805, "grad_norm": 0.23095014691352844, "learning_rate": 1e-05, "loss": 0.9827, "step": 26315 }, { "epoch": 23.312666076173606, "grad_norm": 0.23776906728744507, "learning_rate": 1e-05, "loss": 0.9644, "step": 26320 }, { "epoch": 23.317094774136404, "grad_norm": 0.24809081852436066, "learning_rate": 1e-05, "loss": 1.0128, "step": 26325 }, { "epoch": 23.3215234720992, "grad_norm": 0.2409420609474182, "learning_rate": 1e-05, "loss": 0.9668, "step": 26330 }, { "epoch": 23.325952170062003, "grad_norm": 0.2269292175769806, "learning_rate": 1e-05, "loss": 0.9488, "step": 26335 }, { "epoch": 23.3303808680248, "grad_norm": 0.22666223347187042, "learning_rate": 1e-05, "loss": 0.966, "step": 26340 }, { "epoch": 23.334809565987598, "grad_norm": 0.25741687417030334, "learning_rate": 1e-05, "loss": 0.9582, "step": 26345 }, { "epoch": 23.3392382639504, "grad_norm": 0.23735107481479645, "learning_rate": 1e-05, "loss": 0.9942, "step": 26350 }, { "epoch": 23.343666961913197, "grad_norm": 0.24654647707939148, "learning_rate": 1e-05, "loss": 0.9333, "step": 26355 }, { "epoch": 23.348095659875998, "grad_norm": 0.2872961759567261, "learning_rate": 1e-05, "loss": 0.9575, "step": 26360 }, { "epoch": 23.352524357838796, "grad_norm": 0.204074427485466, "learning_rate": 1e-05, "loss": 0.9691, "step": 26365 }, { "epoch": 23.356953055801593, "grad_norm": 0.2723001539707184, "learning_rate": 1e-05, "loss": 0.9961, "step": 26370 }, { "epoch": 23.361381753764395, "grad_norm": 0.22920246422290802, "learning_rate": 1e-05, "loss": 0.9757, "step": 26375 }, { "epoch": 23.365810451727192, "grad_norm": 0.2627025842666626, "learning_rate": 1e-05, "loss": 1.055, "step": 26380 }, { "epoch": 23.37023914968999, "grad_norm": 0.222743421792984, "learning_rate": 1e-05, "loss": 0.959, "step": 26385 }, { "epoch": 23.37466784765279, "grad_norm": 0.24090582132339478, "learning_rate": 1e-05, "loss": 1.0313, "step": 26390 }, { "epoch": 23.37909654561559, "grad_norm": 0.24897602200508118, "learning_rate": 1e-05, "loss": 0.995, "step": 26395 }, { "epoch": 23.383525243578386, "grad_norm": 0.20534689724445343, "learning_rate": 1e-05, "loss": 1.0292, "step": 26400 }, { "epoch": 23.387953941541188, "grad_norm": 0.2780756652355194, "learning_rate": 1e-05, "loss": 0.9598, "step": 26405 }, { "epoch": 23.392382639503985, "grad_norm": 0.22269535064697266, "learning_rate": 1e-05, "loss": 0.9124, "step": 26410 }, { "epoch": 23.396811337466787, "grad_norm": 0.24354100227355957, "learning_rate": 1e-05, "loss": 0.9742, "step": 26415 }, { "epoch": 23.401240035429584, "grad_norm": 0.2916207015514374, "learning_rate": 1e-05, "loss": 1.0298, "step": 26420 }, { "epoch": 23.405668733392382, "grad_norm": 0.24010638892650604, "learning_rate": 1e-05, "loss": 0.8943, "step": 26425 }, { "epoch": 23.410097431355183, "grad_norm": 0.2418605238199234, "learning_rate": 1e-05, "loss": 0.9242, "step": 26430 }, { "epoch": 23.41452612931798, "grad_norm": 0.2563546299934387, "learning_rate": 1e-05, "loss": 1.0039, "step": 26435 }, { "epoch": 23.41895482728078, "grad_norm": 0.2244047373533249, "learning_rate": 1e-05, "loss": 1.0009, "step": 26440 }, { "epoch": 23.42338352524358, "grad_norm": 0.27586227655410767, "learning_rate": 1e-05, "loss": 0.9631, "step": 26445 }, { "epoch": 23.427812223206377, "grad_norm": 0.3002455234527588, "learning_rate": 1e-05, "loss": 1.0401, "step": 26450 }, { "epoch": 23.432240921169175, "grad_norm": 0.26960045099258423, "learning_rate": 1e-05, "loss": 0.9588, "step": 26455 }, { "epoch": 23.436669619131976, "grad_norm": 0.22955931723117828, "learning_rate": 1e-05, "loss": 0.9666, "step": 26460 }, { "epoch": 23.441098317094774, "grad_norm": 0.2313987910747528, "learning_rate": 1e-05, "loss": 0.9773, "step": 26465 }, { "epoch": 23.44552701505757, "grad_norm": 0.26987579464912415, "learning_rate": 1e-05, "loss": 1.0134, "step": 26470 }, { "epoch": 23.449955713020373, "grad_norm": 0.23977801203727722, "learning_rate": 1e-05, "loss": 0.9852, "step": 26475 }, { "epoch": 23.45438441098317, "grad_norm": 0.26626765727996826, "learning_rate": 1e-05, "loss": 1.0075, "step": 26480 }, { "epoch": 23.45881310894597, "grad_norm": 0.22593915462493896, "learning_rate": 1e-05, "loss": 0.9515, "step": 26485 }, { "epoch": 23.46324180690877, "grad_norm": 0.28595519065856934, "learning_rate": 1e-05, "loss": 0.9733, "step": 26490 }, { "epoch": 23.467670504871567, "grad_norm": 0.3095269203186035, "learning_rate": 1e-05, "loss": 1.0227, "step": 26495 }, { "epoch": 23.472099202834368, "grad_norm": 0.3001960217952728, "learning_rate": 1e-05, "loss": 1.0061, "step": 26500 }, { "epoch": 23.476527900797166, "grad_norm": 0.24239255487918854, "learning_rate": 1e-05, "loss": 0.9962, "step": 26505 }, { "epoch": 23.480956598759963, "grad_norm": 0.2582003176212311, "learning_rate": 1e-05, "loss": 0.9887, "step": 26510 }, { "epoch": 23.485385296722765, "grad_norm": 0.2729235291481018, "learning_rate": 1e-05, "loss": 0.98, "step": 26515 }, { "epoch": 23.489813994685562, "grad_norm": 0.3105107545852661, "learning_rate": 1e-05, "loss": 1.0541, "step": 26520 }, { "epoch": 23.49424269264836, "grad_norm": 0.29212746024131775, "learning_rate": 1e-05, "loss": 1.0159, "step": 26525 }, { "epoch": 23.49867139061116, "grad_norm": 0.21326103806495667, "learning_rate": 1e-05, "loss": 0.9834, "step": 26530 }, { "epoch": 23.50310008857396, "grad_norm": 0.2569035589694977, "learning_rate": 1e-05, "loss": 0.9602, "step": 26535 }, { "epoch": 23.50752878653676, "grad_norm": 0.23314517736434937, "learning_rate": 1e-05, "loss": 0.9702, "step": 26540 }, { "epoch": 23.511957484499558, "grad_norm": 0.24351991713047028, "learning_rate": 1e-05, "loss": 1.0429, "step": 26545 }, { "epoch": 23.516386182462355, "grad_norm": 0.23723074793815613, "learning_rate": 1e-05, "loss": 1.0061, "step": 26550 }, { "epoch": 23.520814880425156, "grad_norm": 0.2797541320323944, "learning_rate": 1e-05, "loss": 0.9954, "step": 26555 }, { "epoch": 23.525243578387954, "grad_norm": 0.2910453677177429, "learning_rate": 1e-05, "loss": 0.9853, "step": 26560 }, { "epoch": 23.52967227635075, "grad_norm": 0.23377171158790588, "learning_rate": 1e-05, "loss": 0.9982, "step": 26565 }, { "epoch": 23.534100974313553, "grad_norm": 0.24713104963302612, "learning_rate": 1e-05, "loss": 1.0055, "step": 26570 }, { "epoch": 23.53852967227635, "grad_norm": 0.24373966455459595, "learning_rate": 1e-05, "loss": 0.9539, "step": 26575 }, { "epoch": 23.54295837023915, "grad_norm": 0.3163232207298279, "learning_rate": 1e-05, "loss": 0.9967, "step": 26580 }, { "epoch": 23.54738706820195, "grad_norm": 0.2568933963775635, "learning_rate": 1e-05, "loss": 1.003, "step": 26585 }, { "epoch": 23.551815766164747, "grad_norm": 0.2469266951084137, "learning_rate": 1e-05, "loss": 0.9925, "step": 26590 }, { "epoch": 23.556244464127545, "grad_norm": 0.2674744129180908, "learning_rate": 1e-05, "loss": 0.9472, "step": 26595 }, { "epoch": 23.560673162090346, "grad_norm": 0.2934143543243408, "learning_rate": 1e-05, "loss": 0.9862, "step": 26600 }, { "epoch": 23.565101860053144, "grad_norm": 0.22752077877521515, "learning_rate": 1e-05, "loss": 1.0004, "step": 26605 }, { "epoch": 23.569530558015945, "grad_norm": 0.21492210030555725, "learning_rate": 1e-05, "loss": 0.9448, "step": 26610 }, { "epoch": 23.573959255978743, "grad_norm": 0.23824001848697662, "learning_rate": 1e-05, "loss": 1.036, "step": 26615 }, { "epoch": 23.57838795394154, "grad_norm": 0.25493600964546204, "learning_rate": 1e-05, "loss": 1.0313, "step": 26620 }, { "epoch": 23.58281665190434, "grad_norm": 0.2807728052139282, "learning_rate": 1e-05, "loss": 1.002, "step": 26625 }, { "epoch": 23.58724534986714, "grad_norm": 0.2720225155353546, "learning_rate": 1e-05, "loss": 1.0042, "step": 26630 }, { "epoch": 23.591674047829937, "grad_norm": 0.3188835382461548, "learning_rate": 1e-05, "loss": 1.0067, "step": 26635 }, { "epoch": 23.596102745792738, "grad_norm": 0.22400449216365814, "learning_rate": 1e-05, "loss": 0.974, "step": 26640 }, { "epoch": 23.600531443755536, "grad_norm": 0.30098628997802734, "learning_rate": 1e-05, "loss": 0.9976, "step": 26645 }, { "epoch": 23.604960141718333, "grad_norm": 0.2980384826660156, "learning_rate": 1e-05, "loss": 0.9941, "step": 26650 }, { "epoch": 23.609388839681134, "grad_norm": 0.2819817364215851, "learning_rate": 1e-05, "loss": 0.9487, "step": 26655 }, { "epoch": 23.613817537643932, "grad_norm": 0.2706122398376465, "learning_rate": 1e-05, "loss": 0.9953, "step": 26660 }, { "epoch": 23.618246235606733, "grad_norm": 0.22448629140853882, "learning_rate": 1e-05, "loss": 0.9995, "step": 26665 }, { "epoch": 23.62267493356953, "grad_norm": 0.2219371348619461, "learning_rate": 1e-05, "loss": 0.98, "step": 26670 }, { "epoch": 23.62710363153233, "grad_norm": 0.2392638772726059, "learning_rate": 1e-05, "loss": 0.9662, "step": 26675 }, { "epoch": 23.63153232949513, "grad_norm": 0.25862807035446167, "learning_rate": 1e-05, "loss": 0.9736, "step": 26680 }, { "epoch": 23.635961027457927, "grad_norm": 0.25160324573516846, "learning_rate": 1e-05, "loss": 0.9262, "step": 26685 }, { "epoch": 23.640389725420725, "grad_norm": 0.22142229974269867, "learning_rate": 1e-05, "loss": 0.9717, "step": 26690 }, { "epoch": 23.644818423383526, "grad_norm": 0.24858084321022034, "learning_rate": 1e-05, "loss": 1.0403, "step": 26695 }, { "epoch": 23.649247121346324, "grad_norm": 0.2293468862771988, "learning_rate": 1e-05, "loss": 1.0052, "step": 26700 }, { "epoch": 23.65367581930912, "grad_norm": 0.2500147521495819, "learning_rate": 1e-05, "loss": 1.0336, "step": 26705 }, { "epoch": 23.658104517271923, "grad_norm": 0.25640490651130676, "learning_rate": 1e-05, "loss": 1.0098, "step": 26710 }, { "epoch": 23.66253321523472, "grad_norm": 0.25516122579574585, "learning_rate": 1e-05, "loss": 0.9103, "step": 26715 }, { "epoch": 23.666961913197518, "grad_norm": 0.23220662772655487, "learning_rate": 1e-05, "loss": 1.0207, "step": 26720 }, { "epoch": 23.67139061116032, "grad_norm": 0.2558310925960541, "learning_rate": 1e-05, "loss": 1.0237, "step": 26725 }, { "epoch": 23.675819309123117, "grad_norm": 0.2426174432039261, "learning_rate": 1e-05, "loss": 1.0005, "step": 26730 }, { "epoch": 23.68024800708592, "grad_norm": 0.20205755531787872, "learning_rate": 1e-05, "loss": 0.9982, "step": 26735 }, { "epoch": 23.684676705048716, "grad_norm": 0.2598371207714081, "learning_rate": 1e-05, "loss": 0.9658, "step": 26740 }, { "epoch": 23.689105403011514, "grad_norm": 0.2728954553604126, "learning_rate": 1e-05, "loss": 0.9806, "step": 26745 }, { "epoch": 23.693534100974315, "grad_norm": 0.2757314145565033, "learning_rate": 1e-05, "loss": 0.9592, "step": 26750 }, { "epoch": 23.697962798937112, "grad_norm": 0.2727743089199066, "learning_rate": 1e-05, "loss": 0.9999, "step": 26755 }, { "epoch": 23.70239149689991, "grad_norm": 0.2958875000476837, "learning_rate": 1e-05, "loss": 0.9902, "step": 26760 }, { "epoch": 23.70682019486271, "grad_norm": 0.25355657935142517, "learning_rate": 1e-05, "loss": 0.9704, "step": 26765 }, { "epoch": 23.71124889282551, "grad_norm": 0.24307024478912354, "learning_rate": 1e-05, "loss": 0.9721, "step": 26770 }, { "epoch": 23.715677590788307, "grad_norm": 0.23818360269069672, "learning_rate": 1e-05, "loss": 0.9959, "step": 26775 }, { "epoch": 23.720106288751108, "grad_norm": 0.18384967744350433, "learning_rate": 1e-05, "loss": 0.9937, "step": 26780 }, { "epoch": 23.724534986713905, "grad_norm": 0.253822386264801, "learning_rate": 1e-05, "loss": 1.039, "step": 26785 }, { "epoch": 23.728963684676707, "grad_norm": 0.26108798384666443, "learning_rate": 1e-05, "loss": 1.0323, "step": 26790 }, { "epoch": 23.733392382639504, "grad_norm": 0.2243223637342453, "learning_rate": 1e-05, "loss": 0.9786, "step": 26795 }, { "epoch": 23.737821080602302, "grad_norm": 0.2187839150428772, "learning_rate": 1e-05, "loss": 0.993, "step": 26800 }, { "epoch": 23.742249778565103, "grad_norm": 0.28664642572402954, "learning_rate": 1e-05, "loss": 0.9625, "step": 26805 }, { "epoch": 23.7466784765279, "grad_norm": 0.31537994742393494, "learning_rate": 1e-05, "loss": 0.962, "step": 26810 }, { "epoch": 23.7511071744907, "grad_norm": 0.28046277165412903, "learning_rate": 1e-05, "loss": 0.9863, "step": 26815 }, { "epoch": 23.7555358724535, "grad_norm": 0.23828428983688354, "learning_rate": 1e-05, "loss": 1.0157, "step": 26820 }, { "epoch": 23.759964570416297, "grad_norm": 0.2972710132598877, "learning_rate": 1e-05, "loss": 0.9796, "step": 26825 }, { "epoch": 23.764393268379095, "grad_norm": 0.266639769077301, "learning_rate": 1e-05, "loss": 0.9976, "step": 26830 }, { "epoch": 23.768821966341896, "grad_norm": 0.21086819469928741, "learning_rate": 1e-05, "loss": 0.97, "step": 26835 }, { "epoch": 23.773250664304694, "grad_norm": 0.303099125623703, "learning_rate": 1e-05, "loss": 1.0193, "step": 26840 }, { "epoch": 23.77767936226749, "grad_norm": 0.2552616596221924, "learning_rate": 1e-05, "loss": 1.0156, "step": 26845 }, { "epoch": 23.782108060230293, "grad_norm": 0.21964603662490845, "learning_rate": 1e-05, "loss": 0.9613, "step": 26850 }, { "epoch": 23.78653675819309, "grad_norm": 0.23729275166988373, "learning_rate": 1e-05, "loss": 0.9734, "step": 26855 }, { "epoch": 23.79096545615589, "grad_norm": 0.26136288046836853, "learning_rate": 1e-05, "loss": 0.9955, "step": 26860 }, { "epoch": 23.79539415411869, "grad_norm": 0.2399042397737503, "learning_rate": 1e-05, "loss": 1.0261, "step": 26865 }, { "epoch": 23.799822852081487, "grad_norm": 0.25391727685928345, "learning_rate": 1e-05, "loss": 0.9903, "step": 26870 }, { "epoch": 23.804251550044288, "grad_norm": 0.280653178691864, "learning_rate": 1e-05, "loss": 1.0162, "step": 26875 }, { "epoch": 23.808680248007086, "grad_norm": 0.27748042345046997, "learning_rate": 1e-05, "loss": 1.0059, "step": 26880 }, { "epoch": 23.813108945969883, "grad_norm": 0.2171100676059723, "learning_rate": 1e-05, "loss": 0.9591, "step": 26885 }, { "epoch": 23.817537643932685, "grad_norm": 0.26859402656555176, "learning_rate": 1e-05, "loss": 1.0109, "step": 26890 }, { "epoch": 23.821966341895482, "grad_norm": 0.2783423066139221, "learning_rate": 1e-05, "loss": 0.9704, "step": 26895 }, { "epoch": 23.82639503985828, "grad_norm": 0.3145160377025604, "learning_rate": 1e-05, "loss": 1.0251, "step": 26900 }, { "epoch": 23.83082373782108, "grad_norm": 0.26615825295448303, "learning_rate": 1e-05, "loss": 0.9924, "step": 26905 }, { "epoch": 23.83525243578388, "grad_norm": 0.26348012685775757, "learning_rate": 1e-05, "loss": 0.9234, "step": 26910 }, { "epoch": 23.83968113374668, "grad_norm": 0.25390365719795227, "learning_rate": 1e-05, "loss": 0.9818, "step": 26915 }, { "epoch": 23.844109831709478, "grad_norm": 0.22636613249778748, "learning_rate": 1e-05, "loss": 0.9888, "step": 26920 }, { "epoch": 23.848538529672275, "grad_norm": 0.24785874783992767, "learning_rate": 1e-05, "loss": 0.9902, "step": 26925 }, { "epoch": 23.852967227635077, "grad_norm": 0.23465396463871002, "learning_rate": 1e-05, "loss": 1.0359, "step": 26930 }, { "epoch": 23.857395925597874, "grad_norm": 0.2235262095928192, "learning_rate": 1e-05, "loss": 0.9991, "step": 26935 }, { "epoch": 23.861824623560672, "grad_norm": 0.24740758538246155, "learning_rate": 1e-05, "loss": 0.9913, "step": 26940 }, { "epoch": 23.866253321523473, "grad_norm": 0.26221954822540283, "learning_rate": 1e-05, "loss": 1.0154, "step": 26945 }, { "epoch": 23.87068201948627, "grad_norm": 0.23343157768249512, "learning_rate": 1e-05, "loss": 0.9031, "step": 26950 }, { "epoch": 23.87511071744907, "grad_norm": 0.24470555782318115, "learning_rate": 1e-05, "loss": 1.0282, "step": 26955 }, { "epoch": 23.87953941541187, "grad_norm": 0.24176916480064392, "learning_rate": 1e-05, "loss": 1.0157, "step": 26960 }, { "epoch": 23.883968113374667, "grad_norm": 0.2797878086566925, "learning_rate": 1e-05, "loss": 0.9906, "step": 26965 }, { "epoch": 23.888396811337465, "grad_norm": 0.2574193775653839, "learning_rate": 1e-05, "loss": 1.0255, "step": 26970 }, { "epoch": 23.892825509300266, "grad_norm": 0.2504587769508362, "learning_rate": 1e-05, "loss": 0.9221, "step": 26975 }, { "epoch": 23.897254207263064, "grad_norm": 0.20924057066440582, "learning_rate": 1e-05, "loss": 0.9668, "step": 26980 }, { "epoch": 23.901682905225865, "grad_norm": 0.2794031500816345, "learning_rate": 1e-05, "loss": 1.048, "step": 26985 }, { "epoch": 23.906111603188663, "grad_norm": 0.28351205587387085, "learning_rate": 1e-05, "loss": 1.0178, "step": 26990 }, { "epoch": 23.91054030115146, "grad_norm": 0.26499977707862854, "learning_rate": 1e-05, "loss": 0.9502, "step": 26995 }, { "epoch": 23.91496899911426, "grad_norm": 0.2845896780490875, "learning_rate": 1e-05, "loss": 1.0139, "step": 27000 }, { "epoch": 23.91939769707706, "grad_norm": 0.23948568105697632, "learning_rate": 1e-05, "loss": 0.9647, "step": 27005 }, { "epoch": 23.923826395039857, "grad_norm": 0.22813214361667633, "learning_rate": 1e-05, "loss": 0.9919, "step": 27010 }, { "epoch": 23.928255093002658, "grad_norm": 0.2147350162267685, "learning_rate": 1e-05, "loss": 0.973, "step": 27015 }, { "epoch": 23.932683790965456, "grad_norm": 0.2525482177734375, "learning_rate": 1e-05, "loss": 0.971, "step": 27020 }, { "epoch": 23.937112488928253, "grad_norm": 0.226675882935524, "learning_rate": 1e-05, "loss": 0.9418, "step": 27025 }, { "epoch": 23.941541186891055, "grad_norm": 0.25734806060791016, "learning_rate": 1e-05, "loss": 0.9437, "step": 27030 }, { "epoch": 23.945969884853852, "grad_norm": 0.27355778217315674, "learning_rate": 1e-05, "loss": 1.0494, "step": 27035 }, { "epoch": 23.950398582816653, "grad_norm": 0.24465897679328918, "learning_rate": 1e-05, "loss": 0.9701, "step": 27040 }, { "epoch": 23.95482728077945, "grad_norm": 0.24087879061698914, "learning_rate": 1e-05, "loss": 0.9451, "step": 27045 }, { "epoch": 23.95925597874225, "grad_norm": 0.23881228268146515, "learning_rate": 1e-05, "loss": 1.0382, "step": 27050 }, { "epoch": 23.96368467670505, "grad_norm": 0.2686094343662262, "learning_rate": 1e-05, "loss": 0.9762, "step": 27055 }, { "epoch": 23.968113374667848, "grad_norm": 0.28021782636642456, "learning_rate": 1e-05, "loss": 0.9955, "step": 27060 }, { "epoch": 23.972542072630645, "grad_norm": 0.29325753450393677, "learning_rate": 1e-05, "loss": 0.9816, "step": 27065 }, { "epoch": 23.976970770593447, "grad_norm": 0.2697674632072449, "learning_rate": 1e-05, "loss": 0.9965, "step": 27070 }, { "epoch": 23.981399468556244, "grad_norm": 0.30482274293899536, "learning_rate": 1e-05, "loss": 0.9457, "step": 27075 }, { "epoch": 23.985828166519042, "grad_norm": 0.26910868287086487, "learning_rate": 1e-05, "loss": 0.9552, "step": 27080 }, { "epoch": 23.990256864481843, "grad_norm": 0.3090648055076599, "learning_rate": 1e-05, "loss": 0.9891, "step": 27085 }, { "epoch": 23.99468556244464, "grad_norm": 0.2467624545097351, "learning_rate": 1e-05, "loss": 0.9954, "step": 27090 }, { "epoch": 23.999114260407442, "grad_norm": 0.2364131510257721, "learning_rate": 1e-05, "loss": 0.966, "step": 27095 }, { "epoch": 24.00354295837024, "grad_norm": 0.2566116750240326, "learning_rate": 1e-05, "loss": 0.9675, "step": 27100 }, { "epoch": 24.007971656333037, "grad_norm": 0.22930893301963806, "learning_rate": 1e-05, "loss": 0.9978, "step": 27105 }, { "epoch": 24.01240035429584, "grad_norm": 0.2578718364238739, "learning_rate": 1e-05, "loss": 0.9722, "step": 27110 }, { "epoch": 24.016829052258636, "grad_norm": 0.26853352785110474, "learning_rate": 1e-05, "loss": 0.9948, "step": 27115 }, { "epoch": 24.021257750221434, "grad_norm": 0.20939262211322784, "learning_rate": 1e-05, "loss": 0.9977, "step": 27120 }, { "epoch": 24.025686448184235, "grad_norm": 0.2888830304145813, "learning_rate": 1e-05, "loss": 0.9807, "step": 27125 }, { "epoch": 24.030115146147033, "grad_norm": 0.29009345173835754, "learning_rate": 1e-05, "loss": 0.9808, "step": 27130 }, { "epoch": 24.03454384410983, "grad_norm": 0.274711936712265, "learning_rate": 1e-05, "loss": 0.9799, "step": 27135 }, { "epoch": 24.03897254207263, "grad_norm": 0.26842740178108215, "learning_rate": 1e-05, "loss": 0.9587, "step": 27140 }, { "epoch": 24.04340124003543, "grad_norm": 0.279020756483078, "learning_rate": 1e-05, "loss": 0.9554, "step": 27145 }, { "epoch": 24.047829937998227, "grad_norm": 0.2553081214427948, "learning_rate": 1e-05, "loss": 0.9695, "step": 27150 }, { "epoch": 24.052258635961028, "grad_norm": 0.30284035205841064, "learning_rate": 1e-05, "loss": 1.0402, "step": 27155 }, { "epoch": 24.056687333923826, "grad_norm": 0.2558462917804718, "learning_rate": 1e-05, "loss": 0.9837, "step": 27160 }, { "epoch": 24.061116031886627, "grad_norm": 0.2524622678756714, "learning_rate": 1e-05, "loss": 0.9624, "step": 27165 }, { "epoch": 24.065544729849424, "grad_norm": 0.24786490201950073, "learning_rate": 1e-05, "loss": 0.9888, "step": 27170 }, { "epoch": 24.069973427812222, "grad_norm": 0.3078889548778534, "learning_rate": 1e-05, "loss": 0.9606, "step": 27175 }, { "epoch": 24.074402125775023, "grad_norm": 0.24278715252876282, "learning_rate": 1e-05, "loss": 0.9946, "step": 27180 }, { "epoch": 24.07883082373782, "grad_norm": 0.22713172435760498, "learning_rate": 1e-05, "loss": 0.9748, "step": 27185 }, { "epoch": 24.08325952170062, "grad_norm": 0.2256680727005005, "learning_rate": 1e-05, "loss": 1.0314, "step": 27190 }, { "epoch": 24.08768821966342, "grad_norm": 0.24600300192832947, "learning_rate": 1e-05, "loss": 0.9644, "step": 27195 }, { "epoch": 24.092116917626218, "grad_norm": 0.26029497385025024, "learning_rate": 1e-05, "loss": 0.9776, "step": 27200 }, { "epoch": 24.096545615589015, "grad_norm": 0.28288689255714417, "learning_rate": 1e-05, "loss": 0.9958, "step": 27205 }, { "epoch": 24.100974313551816, "grad_norm": 0.26191774010658264, "learning_rate": 1e-05, "loss": 0.9686, "step": 27210 }, { "epoch": 24.105403011514614, "grad_norm": 0.23438803851604462, "learning_rate": 1e-05, "loss": 0.8922, "step": 27215 }, { "epoch": 24.109831709477415, "grad_norm": 0.23090486228466034, "learning_rate": 1e-05, "loss": 1.0033, "step": 27220 }, { "epoch": 24.114260407440213, "grad_norm": 0.26005062460899353, "learning_rate": 1e-05, "loss": 0.9689, "step": 27225 }, { "epoch": 24.11868910540301, "grad_norm": 0.2271093875169754, "learning_rate": 1e-05, "loss": 1.0015, "step": 27230 }, { "epoch": 24.123117803365812, "grad_norm": 0.2458283007144928, "learning_rate": 1e-05, "loss": 0.9642, "step": 27235 }, { "epoch": 24.12754650132861, "grad_norm": 0.2611621022224426, "learning_rate": 1e-05, "loss": 0.9877, "step": 27240 }, { "epoch": 24.131975199291407, "grad_norm": 0.22730910778045654, "learning_rate": 1e-05, "loss": 0.9812, "step": 27245 }, { "epoch": 24.13640389725421, "grad_norm": 0.23347064852714539, "learning_rate": 1e-05, "loss": 0.9876, "step": 27250 }, { "epoch": 24.140832595217006, "grad_norm": 0.2405199259519577, "learning_rate": 1e-05, "loss": 1.003, "step": 27255 }, { "epoch": 24.145261293179804, "grad_norm": 0.29748761653900146, "learning_rate": 1e-05, "loss": 0.9968, "step": 27260 }, { "epoch": 24.149689991142605, "grad_norm": 0.29782795906066895, "learning_rate": 1e-05, "loss": 0.9799, "step": 27265 }, { "epoch": 24.154118689105402, "grad_norm": 0.2286754995584488, "learning_rate": 1e-05, "loss": 1.0049, "step": 27270 }, { "epoch": 24.158547387068204, "grad_norm": 0.26479247212409973, "learning_rate": 1e-05, "loss": 0.947, "step": 27275 }, { "epoch": 24.162976085031, "grad_norm": 0.2673901319503784, "learning_rate": 1e-05, "loss": 1.0313, "step": 27280 }, { "epoch": 24.1674047829938, "grad_norm": 0.267720103263855, "learning_rate": 1e-05, "loss": 0.958, "step": 27285 }, { "epoch": 24.1718334809566, "grad_norm": 0.23425528407096863, "learning_rate": 1e-05, "loss": 0.968, "step": 27290 }, { "epoch": 24.176262178919398, "grad_norm": 0.27703917026519775, "learning_rate": 1e-05, "loss": 0.9849, "step": 27295 }, { "epoch": 24.180690876882196, "grad_norm": 0.25991371273994446, "learning_rate": 1e-05, "loss": 0.9796, "step": 27300 }, { "epoch": 24.185119574844997, "grad_norm": 0.27498453855514526, "learning_rate": 1e-05, "loss": 0.9598, "step": 27305 }, { "epoch": 24.189548272807794, "grad_norm": 0.25150948762893677, "learning_rate": 1e-05, "loss": 1.023, "step": 27310 }, { "epoch": 24.193976970770592, "grad_norm": 0.22732007503509521, "learning_rate": 1e-05, "loss": 1.0033, "step": 27315 }, { "epoch": 24.198405668733393, "grad_norm": 0.21285602450370789, "learning_rate": 1e-05, "loss": 0.9839, "step": 27320 }, { "epoch": 24.20283436669619, "grad_norm": 0.2449236959218979, "learning_rate": 1e-05, "loss": 0.9612, "step": 27325 }, { "epoch": 24.20726306465899, "grad_norm": 0.24172784388065338, "learning_rate": 1e-05, "loss": 0.9622, "step": 27330 }, { "epoch": 24.21169176262179, "grad_norm": 0.3550632894039154, "learning_rate": 1e-05, "loss": 0.9997, "step": 27335 }, { "epoch": 24.216120460584587, "grad_norm": 0.30615052580833435, "learning_rate": 1e-05, "loss": 0.9928, "step": 27340 }, { "epoch": 24.22054915854739, "grad_norm": 0.2539338767528534, "learning_rate": 1e-05, "loss": 0.9455, "step": 27345 }, { "epoch": 24.224977856510186, "grad_norm": 0.28723493218421936, "learning_rate": 1e-05, "loss": 0.9851, "step": 27350 }, { "epoch": 24.229406554472984, "grad_norm": 0.2576487958431244, "learning_rate": 1e-05, "loss": 0.9734, "step": 27355 }, { "epoch": 24.233835252435785, "grad_norm": 0.2898792624473572, "learning_rate": 1e-05, "loss": 0.9581, "step": 27360 }, { "epoch": 24.238263950398583, "grad_norm": 0.2693891227245331, "learning_rate": 1e-05, "loss": 1.0249, "step": 27365 }, { "epoch": 24.24269264836138, "grad_norm": 0.24380403757095337, "learning_rate": 1e-05, "loss": 0.9986, "step": 27370 }, { "epoch": 24.24712134632418, "grad_norm": 0.26417067646980286, "learning_rate": 1e-05, "loss": 1.0023, "step": 27375 }, { "epoch": 24.25155004428698, "grad_norm": 0.2408972829580307, "learning_rate": 1e-05, "loss": 1.0046, "step": 27380 }, { "epoch": 24.255978742249777, "grad_norm": 0.24388913810253143, "learning_rate": 1e-05, "loss": 1.01, "step": 27385 }, { "epoch": 24.260407440212578, "grad_norm": 0.26220259070396423, "learning_rate": 1e-05, "loss": 0.9821, "step": 27390 }, { "epoch": 24.264836138175376, "grad_norm": 0.2537146806716919, "learning_rate": 1e-05, "loss": 1.017, "step": 27395 }, { "epoch": 24.269264836138177, "grad_norm": 0.2604083716869354, "learning_rate": 1e-05, "loss": 0.9791, "step": 27400 }, { "epoch": 24.273693534100975, "grad_norm": 0.21949341893196106, "learning_rate": 1e-05, "loss": 0.963, "step": 27405 }, { "epoch": 24.278122232063772, "grad_norm": 0.22510309517383575, "learning_rate": 1e-05, "loss": 0.9561, "step": 27410 }, { "epoch": 24.282550930026574, "grad_norm": 0.2591549754142761, "learning_rate": 1e-05, "loss": 0.9981, "step": 27415 }, { "epoch": 24.28697962798937, "grad_norm": 0.2637147605419159, "learning_rate": 1e-05, "loss": 0.9928, "step": 27420 }, { "epoch": 24.29140832595217, "grad_norm": 0.22815559804439545, "learning_rate": 1e-05, "loss": 0.9868, "step": 27425 }, { "epoch": 24.29583702391497, "grad_norm": 0.23618504405021667, "learning_rate": 1e-05, "loss": 0.9889, "step": 27430 }, { "epoch": 24.300265721877768, "grad_norm": 0.25723832845687866, "learning_rate": 1e-05, "loss": 0.9216, "step": 27435 }, { "epoch": 24.304694419840565, "grad_norm": 0.2191542685031891, "learning_rate": 1e-05, "loss": 0.9611, "step": 27440 }, { "epoch": 24.309123117803367, "grad_norm": 0.2241484522819519, "learning_rate": 1e-05, "loss": 0.9732, "step": 27445 }, { "epoch": 24.313551815766164, "grad_norm": 0.2691168487071991, "learning_rate": 1e-05, "loss": 1.026, "step": 27450 }, { "epoch": 24.317980513728962, "grad_norm": 0.21560446918010712, "learning_rate": 1e-05, "loss": 0.9947, "step": 27455 }, { "epoch": 24.322409211691763, "grad_norm": 0.23158149421215057, "learning_rate": 1e-05, "loss": 0.9618, "step": 27460 }, { "epoch": 24.32683790965456, "grad_norm": 0.24868138134479523, "learning_rate": 1e-05, "loss": 0.9724, "step": 27465 }, { "epoch": 24.331266607617362, "grad_norm": 0.219271719455719, "learning_rate": 1e-05, "loss": 0.9967, "step": 27470 }, { "epoch": 24.33569530558016, "grad_norm": 0.25831377506256104, "learning_rate": 1e-05, "loss": 1.0129, "step": 27475 }, { "epoch": 24.340124003542957, "grad_norm": 0.24598956108093262, "learning_rate": 1e-05, "loss": 0.9715, "step": 27480 }, { "epoch": 24.34455270150576, "grad_norm": 0.25745877623558044, "learning_rate": 1e-05, "loss": 0.9863, "step": 27485 }, { "epoch": 24.348981399468556, "grad_norm": 0.23913982510566711, "learning_rate": 1e-05, "loss": 0.9955, "step": 27490 }, { "epoch": 24.353410097431354, "grad_norm": 0.28351959586143494, "learning_rate": 1e-05, "loss": 0.9893, "step": 27495 }, { "epoch": 24.357838795394155, "grad_norm": 0.24848684668540955, "learning_rate": 1e-05, "loss": 0.9695, "step": 27500 }, { "epoch": 24.362267493356953, "grad_norm": 0.20931607484817505, "learning_rate": 1e-05, "loss": 0.9719, "step": 27505 }, { "epoch": 24.36669619131975, "grad_norm": 0.23935669660568237, "learning_rate": 1e-05, "loss": 1.0215, "step": 27510 }, { "epoch": 24.37112488928255, "grad_norm": 0.2865438163280487, "learning_rate": 1e-05, "loss": 0.9955, "step": 27515 }, { "epoch": 24.37555358724535, "grad_norm": 0.25080606341362, "learning_rate": 1e-05, "loss": 0.9873, "step": 27520 }, { "epoch": 24.37998228520815, "grad_norm": 0.22788484394550323, "learning_rate": 1e-05, "loss": 1.0043, "step": 27525 }, { "epoch": 24.384410983170948, "grad_norm": 0.2690378725528717, "learning_rate": 1e-05, "loss": 0.9387, "step": 27530 }, { "epoch": 24.388839681133746, "grad_norm": 0.2662249803543091, "learning_rate": 1e-05, "loss": 0.9971, "step": 27535 }, { "epoch": 24.393268379096547, "grad_norm": 0.25115519762039185, "learning_rate": 1e-05, "loss": 0.9295, "step": 27540 }, { "epoch": 24.397697077059345, "grad_norm": 0.27152177691459656, "learning_rate": 1e-05, "loss": 0.9463, "step": 27545 }, { "epoch": 24.402125775022142, "grad_norm": 0.2977381646633148, "learning_rate": 1e-05, "loss": 0.9823, "step": 27550 }, { "epoch": 24.406554472984944, "grad_norm": 0.2588966488838196, "learning_rate": 1e-05, "loss": 0.9761, "step": 27555 }, { "epoch": 24.41098317094774, "grad_norm": 0.22470641136169434, "learning_rate": 1e-05, "loss": 0.9846, "step": 27560 }, { "epoch": 24.41541186891054, "grad_norm": 0.27989575266838074, "learning_rate": 1e-05, "loss": 0.9736, "step": 27565 }, { "epoch": 24.41984056687334, "grad_norm": 0.2517945468425751, "learning_rate": 1e-05, "loss": 1.0279, "step": 27570 }, { "epoch": 24.424269264836138, "grad_norm": 0.23654067516326904, "learning_rate": 1e-05, "loss": 1.0133, "step": 27575 }, { "epoch": 24.428697962798935, "grad_norm": 0.23164743185043335, "learning_rate": 1e-05, "loss": 0.9719, "step": 27580 }, { "epoch": 24.433126660761737, "grad_norm": 0.24988412857055664, "learning_rate": 1e-05, "loss": 0.973, "step": 27585 }, { "epoch": 24.437555358724534, "grad_norm": 0.27342689037323, "learning_rate": 1e-05, "loss": 0.9573, "step": 27590 }, { "epoch": 24.441984056687335, "grad_norm": 0.22409027814865112, "learning_rate": 1e-05, "loss": 0.9362, "step": 27595 }, { "epoch": 24.446412754650133, "grad_norm": 0.21091631054878235, "learning_rate": 1e-05, "loss": 0.9727, "step": 27600 }, { "epoch": 24.45084145261293, "grad_norm": 0.20956432819366455, "learning_rate": 1e-05, "loss": 0.9924, "step": 27605 }, { "epoch": 24.455270150575732, "grad_norm": 0.257333368062973, "learning_rate": 1e-05, "loss": 0.9868, "step": 27610 }, { "epoch": 24.45969884853853, "grad_norm": 0.2603904604911804, "learning_rate": 1e-05, "loss": 0.9765, "step": 27615 }, { "epoch": 24.464127546501327, "grad_norm": 0.24283568561077118, "learning_rate": 1e-05, "loss": 0.9842, "step": 27620 }, { "epoch": 24.46855624446413, "grad_norm": 0.26459357142448425, "learning_rate": 1e-05, "loss": 0.9714, "step": 27625 }, { "epoch": 24.472984942426926, "grad_norm": 0.23536963760852814, "learning_rate": 1e-05, "loss": 0.9817, "step": 27630 }, { "epoch": 24.477413640389724, "grad_norm": 0.26077204942703247, "learning_rate": 1e-05, "loss": 0.9281, "step": 27635 }, { "epoch": 24.481842338352525, "grad_norm": 0.19926729798316956, "learning_rate": 1e-05, "loss": 0.9815, "step": 27640 }, { "epoch": 24.486271036315323, "grad_norm": 0.24146565794944763, "learning_rate": 1e-05, "loss": 0.9637, "step": 27645 }, { "epoch": 24.490699734278124, "grad_norm": 0.24525809288024902, "learning_rate": 1e-05, "loss": 1.0108, "step": 27650 }, { "epoch": 24.49512843224092, "grad_norm": 0.275239497423172, "learning_rate": 1e-05, "loss": 0.9851, "step": 27655 }, { "epoch": 24.49955713020372, "grad_norm": 0.39674660563468933, "learning_rate": 1e-05, "loss": 0.9784, "step": 27660 }, { "epoch": 24.50398582816652, "grad_norm": 0.25674673914909363, "learning_rate": 1e-05, "loss": 0.9873, "step": 27665 }, { "epoch": 24.508414526129318, "grad_norm": 0.27644819021224976, "learning_rate": 1e-05, "loss": 0.9701, "step": 27670 }, { "epoch": 24.512843224092116, "grad_norm": 0.22909076511859894, "learning_rate": 1e-05, "loss": 0.9799, "step": 27675 }, { "epoch": 24.517271922054917, "grad_norm": 0.2632433772087097, "learning_rate": 1e-05, "loss": 0.9517, "step": 27680 }, { "epoch": 24.521700620017715, "grad_norm": 0.24753408133983612, "learning_rate": 1e-05, "loss": 0.9459, "step": 27685 }, { "epoch": 24.526129317980512, "grad_norm": 0.21823808550834656, "learning_rate": 1e-05, "loss": 0.9655, "step": 27690 }, { "epoch": 24.530558015943313, "grad_norm": 0.25986164808273315, "learning_rate": 1e-05, "loss": 1.0138, "step": 27695 }, { "epoch": 24.53498671390611, "grad_norm": 0.2600781321525574, "learning_rate": 1e-05, "loss": 0.9802, "step": 27700 }, { "epoch": 24.53941541186891, "grad_norm": 0.2880846858024597, "learning_rate": 1e-05, "loss": 0.9884, "step": 27705 }, { "epoch": 24.54384410983171, "grad_norm": 0.27464109659194946, "learning_rate": 1e-05, "loss": 0.9467, "step": 27710 }, { "epoch": 24.548272807794508, "grad_norm": 0.26930370926856995, "learning_rate": 1e-05, "loss": 0.9653, "step": 27715 }, { "epoch": 24.55270150575731, "grad_norm": 0.23947246372699738, "learning_rate": 1e-05, "loss": 1.0088, "step": 27720 }, { "epoch": 24.557130203720106, "grad_norm": 0.23891033232212067, "learning_rate": 1e-05, "loss": 0.9878, "step": 27725 }, { "epoch": 24.561558901682904, "grad_norm": 0.21519169211387634, "learning_rate": 1e-05, "loss": 0.9879, "step": 27730 }, { "epoch": 24.565987599645705, "grad_norm": 0.22921505570411682, "learning_rate": 1e-05, "loss": 1.0007, "step": 27735 }, { "epoch": 24.570416297608503, "grad_norm": 0.2791348099708557, "learning_rate": 1e-05, "loss": 0.9652, "step": 27740 }, { "epoch": 24.5748449955713, "grad_norm": 0.293082594871521, "learning_rate": 1e-05, "loss": 0.9712, "step": 27745 }, { "epoch": 24.579273693534102, "grad_norm": 0.2287125140428543, "learning_rate": 1e-05, "loss": 1.0203, "step": 27750 }, { "epoch": 24.5837023914969, "grad_norm": 0.23439842462539673, "learning_rate": 1e-05, "loss": 0.9283, "step": 27755 }, { "epoch": 24.588131089459697, "grad_norm": 0.2438921481370926, "learning_rate": 1e-05, "loss": 0.9732, "step": 27760 }, { "epoch": 24.5925597874225, "grad_norm": 0.27476295828819275, "learning_rate": 1e-05, "loss": 0.9986, "step": 27765 }, { "epoch": 24.596988485385296, "grad_norm": 0.23507197201251984, "learning_rate": 1e-05, "loss": 0.9388, "step": 27770 }, { "epoch": 24.601417183348097, "grad_norm": 0.28731539845466614, "learning_rate": 1e-05, "loss": 1.033, "step": 27775 }, { "epoch": 24.605845881310895, "grad_norm": 0.24496515095233917, "learning_rate": 1e-05, "loss": 1.0075, "step": 27780 }, { "epoch": 24.610274579273693, "grad_norm": 0.2469015121459961, "learning_rate": 1e-05, "loss": 0.9914, "step": 27785 }, { "epoch": 24.614703277236494, "grad_norm": 0.23915302753448486, "learning_rate": 1e-05, "loss": 0.917, "step": 27790 }, { "epoch": 24.61913197519929, "grad_norm": 0.22897915542125702, "learning_rate": 1e-05, "loss": 1.0074, "step": 27795 }, { "epoch": 24.62356067316209, "grad_norm": 0.2170686423778534, "learning_rate": 1e-05, "loss": 0.9838, "step": 27800 }, { "epoch": 24.62798937112489, "grad_norm": 0.26316043734550476, "learning_rate": 1e-05, "loss": 0.9999, "step": 27805 }, { "epoch": 24.632418069087688, "grad_norm": 0.25438329577445984, "learning_rate": 1e-05, "loss": 0.9584, "step": 27810 }, { "epoch": 24.636846767050486, "grad_norm": 0.25038447976112366, "learning_rate": 1e-05, "loss": 0.9611, "step": 27815 }, { "epoch": 24.641275465013287, "grad_norm": 0.20531101524829865, "learning_rate": 1e-05, "loss": 0.9681, "step": 27820 }, { "epoch": 24.645704162976084, "grad_norm": 0.24842555820941925, "learning_rate": 1e-05, "loss": 1.0317, "step": 27825 }, { "epoch": 24.650132860938886, "grad_norm": 0.25941750407218933, "learning_rate": 1e-05, "loss": 1.042, "step": 27830 }, { "epoch": 24.654561558901683, "grad_norm": 0.24607211351394653, "learning_rate": 1e-05, "loss": 0.9851, "step": 27835 }, { "epoch": 24.65899025686448, "grad_norm": 0.22874706983566284, "learning_rate": 1e-05, "loss": 0.9494, "step": 27840 }, { "epoch": 24.663418954827282, "grad_norm": 0.2419596016407013, "learning_rate": 1e-05, "loss": 0.9603, "step": 27845 }, { "epoch": 24.66784765279008, "grad_norm": 0.273303359746933, "learning_rate": 1e-05, "loss": 1.0131, "step": 27850 }, { "epoch": 24.672276350752878, "grad_norm": 0.2364151030778885, "learning_rate": 1e-05, "loss": 0.9529, "step": 27855 }, { "epoch": 24.67670504871568, "grad_norm": 0.2614619731903076, "learning_rate": 1e-05, "loss": 1.0484, "step": 27860 }, { "epoch": 24.681133746678476, "grad_norm": 0.2701669931411743, "learning_rate": 1e-05, "loss": 0.9668, "step": 27865 }, { "epoch": 24.685562444641274, "grad_norm": 0.2146516740322113, "learning_rate": 1e-05, "loss": 0.9912, "step": 27870 }, { "epoch": 24.689991142604075, "grad_norm": 0.23337946832180023, "learning_rate": 1e-05, "loss": 1.05, "step": 27875 }, { "epoch": 24.694419840566873, "grad_norm": 0.20593266189098358, "learning_rate": 1e-05, "loss": 0.9494, "step": 27880 }, { "epoch": 24.698848538529674, "grad_norm": 0.2650032639503479, "learning_rate": 1e-05, "loss": 1.0063, "step": 27885 }, { "epoch": 24.70327723649247, "grad_norm": 0.26032283902168274, "learning_rate": 1e-05, "loss": 1.0688, "step": 27890 }, { "epoch": 24.70770593445527, "grad_norm": 0.24456565082073212, "learning_rate": 1e-05, "loss": 0.9405, "step": 27895 }, { "epoch": 24.71213463241807, "grad_norm": 0.23210467398166656, "learning_rate": 1e-05, "loss": 0.9242, "step": 27900 }, { "epoch": 24.71656333038087, "grad_norm": 0.2659805715084076, "learning_rate": 1e-05, "loss": 1.0241, "step": 27905 }, { "epoch": 24.720992028343666, "grad_norm": 0.2703510522842407, "learning_rate": 1e-05, "loss": 0.9945, "step": 27910 }, { "epoch": 24.725420726306467, "grad_norm": 0.2797582447528839, "learning_rate": 1e-05, "loss": 0.9755, "step": 27915 }, { "epoch": 24.729849424269265, "grad_norm": 0.22575101256370544, "learning_rate": 1e-05, "loss": 1.0073, "step": 27920 }, { "epoch": 24.734278122232062, "grad_norm": 0.22218960523605347, "learning_rate": 1e-05, "loss": 0.9688, "step": 27925 }, { "epoch": 24.738706820194864, "grad_norm": 0.2900409400463104, "learning_rate": 1e-05, "loss": 1.0614, "step": 27930 }, { "epoch": 24.74313551815766, "grad_norm": 0.28105300664901733, "learning_rate": 1e-05, "loss": 0.9946, "step": 27935 }, { "epoch": 24.74756421612046, "grad_norm": 0.22506463527679443, "learning_rate": 1e-05, "loss": 1.0244, "step": 27940 }, { "epoch": 24.75199291408326, "grad_norm": 0.2369133085012436, "learning_rate": 1e-05, "loss": 1.0117, "step": 27945 }, { "epoch": 24.756421612046058, "grad_norm": 0.24252861738204956, "learning_rate": 1e-05, "loss": 0.9944, "step": 27950 }, { "epoch": 24.76085031000886, "grad_norm": 0.2130277007818222, "learning_rate": 1e-05, "loss": 0.9789, "step": 27955 }, { "epoch": 24.765279007971657, "grad_norm": 0.2696848511695862, "learning_rate": 1e-05, "loss": 0.9622, "step": 27960 }, { "epoch": 24.769707705934454, "grad_norm": 0.28770482540130615, "learning_rate": 1e-05, "loss": 1.0187, "step": 27965 }, { "epoch": 24.774136403897256, "grad_norm": 0.2645743489265442, "learning_rate": 1e-05, "loss": 0.9433, "step": 27970 }, { "epoch": 24.778565101860053, "grad_norm": 0.31868138909339905, "learning_rate": 1e-05, "loss": 0.9409, "step": 27975 }, { "epoch": 24.78299379982285, "grad_norm": 0.2536707818508148, "learning_rate": 1e-05, "loss": 0.9817, "step": 27980 }, { "epoch": 24.787422497785652, "grad_norm": 0.2427499145269394, "learning_rate": 1e-05, "loss": 0.9003, "step": 27985 }, { "epoch": 24.79185119574845, "grad_norm": 0.2844630479812622, "learning_rate": 1e-05, "loss": 1.0215, "step": 27990 }, { "epoch": 24.796279893711247, "grad_norm": 0.34206822514533997, "learning_rate": 1e-05, "loss": 0.9336, "step": 27995 }, { "epoch": 24.80070859167405, "grad_norm": 0.2651706337928772, "learning_rate": 1e-05, "loss": 0.9738, "step": 28000 }, { "epoch": 24.805137289636846, "grad_norm": 0.243279829621315, "learning_rate": 1e-05, "loss": 0.9702, "step": 28005 }, { "epoch": 24.809565987599647, "grad_norm": 0.25350362062454224, "learning_rate": 1e-05, "loss": 1.0274, "step": 28010 }, { "epoch": 24.813994685562445, "grad_norm": 0.219048872590065, "learning_rate": 1e-05, "loss": 0.9589, "step": 28015 }, { "epoch": 24.818423383525243, "grad_norm": 0.24372893571853638, "learning_rate": 1e-05, "loss": 0.981, "step": 28020 }, { "epoch": 24.822852081488044, "grad_norm": 0.2852746844291687, "learning_rate": 1e-05, "loss": 1.0375, "step": 28025 }, { "epoch": 24.82728077945084, "grad_norm": 0.22398953139781952, "learning_rate": 1e-05, "loss": 0.9498, "step": 28030 }, { "epoch": 24.83170947741364, "grad_norm": 0.2317638099193573, "learning_rate": 1e-05, "loss": 1.0511, "step": 28035 }, { "epoch": 24.83613817537644, "grad_norm": 0.258124977350235, "learning_rate": 1e-05, "loss": 1.0077, "step": 28040 }, { "epoch": 24.840566873339238, "grad_norm": 0.25679975748062134, "learning_rate": 1e-05, "loss": 1.0077, "step": 28045 }, { "epoch": 24.844995571302036, "grad_norm": 0.22252784669399261, "learning_rate": 1e-05, "loss": 0.9657, "step": 28050 }, { "epoch": 24.849424269264837, "grad_norm": 0.24934500455856323, "learning_rate": 1e-05, "loss": 0.9899, "step": 28055 }, { "epoch": 24.853852967227635, "grad_norm": 0.2803652584552765, "learning_rate": 1e-05, "loss": 0.9734, "step": 28060 }, { "epoch": 24.858281665190432, "grad_norm": 0.21808862686157227, "learning_rate": 1e-05, "loss": 1.0203, "step": 28065 }, { "epoch": 24.862710363153234, "grad_norm": 0.22926940023899078, "learning_rate": 1e-05, "loss": 0.9498, "step": 28070 }, { "epoch": 24.86713906111603, "grad_norm": 0.2412165403366089, "learning_rate": 1e-05, "loss": 1.0588, "step": 28075 }, { "epoch": 24.871567759078832, "grad_norm": 0.24139633774757385, "learning_rate": 1e-05, "loss": 0.9821, "step": 28080 }, { "epoch": 24.87599645704163, "grad_norm": 0.24621202051639557, "learning_rate": 1e-05, "loss": 0.9621, "step": 28085 }, { "epoch": 24.880425155004428, "grad_norm": 0.24362148344516754, "learning_rate": 1e-05, "loss": 1.0373, "step": 28090 }, { "epoch": 24.88485385296723, "grad_norm": 0.24921147525310516, "learning_rate": 1e-05, "loss": 1.0197, "step": 28095 }, { "epoch": 24.889282550930027, "grad_norm": 0.2551196217536926, "learning_rate": 1e-05, "loss": 0.9868, "step": 28100 }, { "epoch": 24.893711248892824, "grad_norm": 0.23885582387447357, "learning_rate": 1e-05, "loss": 0.986, "step": 28105 }, { "epoch": 24.898139946855625, "grad_norm": 0.21594491600990295, "learning_rate": 1e-05, "loss": 0.9453, "step": 28110 }, { "epoch": 24.902568644818423, "grad_norm": 0.2223508208990097, "learning_rate": 1e-05, "loss": 0.9543, "step": 28115 }, { "epoch": 24.90699734278122, "grad_norm": 0.2156670242547989, "learning_rate": 1e-05, "loss": 0.9653, "step": 28120 }, { "epoch": 24.911426040744022, "grad_norm": 0.2858951687812805, "learning_rate": 1e-05, "loss": 0.9614, "step": 28125 }, { "epoch": 24.91585473870682, "grad_norm": 0.2882167398929596, "learning_rate": 1e-05, "loss": 1.0159, "step": 28130 }, { "epoch": 24.92028343666962, "grad_norm": 0.2706875801086426, "learning_rate": 1e-05, "loss": 0.9973, "step": 28135 }, { "epoch": 24.92471213463242, "grad_norm": 0.23093226552009583, "learning_rate": 1e-05, "loss": 0.9889, "step": 28140 }, { "epoch": 24.929140832595216, "grad_norm": 0.2316305786371231, "learning_rate": 1e-05, "loss": 0.9636, "step": 28145 }, { "epoch": 24.933569530558017, "grad_norm": 0.21752788126468658, "learning_rate": 1e-05, "loss": 0.9395, "step": 28150 }, { "epoch": 24.937998228520815, "grad_norm": 0.19479702413082123, "learning_rate": 1e-05, "loss": 1.0063, "step": 28155 }, { "epoch": 24.942426926483613, "grad_norm": 0.2344137579202652, "learning_rate": 1e-05, "loss": 0.9518, "step": 28160 }, { "epoch": 24.946855624446414, "grad_norm": 0.23480965197086334, "learning_rate": 1e-05, "loss": 0.9913, "step": 28165 }, { "epoch": 24.95128432240921, "grad_norm": 0.25966697931289673, "learning_rate": 1e-05, "loss": 0.9762, "step": 28170 }, { "epoch": 24.95571302037201, "grad_norm": 0.2760307490825653, "learning_rate": 1e-05, "loss": 0.9703, "step": 28175 }, { "epoch": 24.96014171833481, "grad_norm": 0.25913602113723755, "learning_rate": 1e-05, "loss": 0.9808, "step": 28180 }, { "epoch": 24.964570416297608, "grad_norm": 0.24504348635673523, "learning_rate": 1e-05, "loss": 1.0109, "step": 28185 }, { "epoch": 24.968999114260406, "grad_norm": 0.2237614244222641, "learning_rate": 1e-05, "loss": 0.9554, "step": 28190 }, { "epoch": 24.973427812223207, "grad_norm": 0.31712502241134644, "learning_rate": 1e-05, "loss": 0.9781, "step": 28195 }, { "epoch": 24.977856510186005, "grad_norm": 0.23510074615478516, "learning_rate": 1e-05, "loss": 1.0628, "step": 28200 }, { "epoch": 24.982285208148806, "grad_norm": 0.3259398341178894, "learning_rate": 1e-05, "loss": 0.9657, "step": 28205 }, { "epoch": 24.986713906111603, "grad_norm": 0.24236249923706055, "learning_rate": 1e-05, "loss": 0.9783, "step": 28210 }, { "epoch": 24.9911426040744, "grad_norm": 0.23763632774353027, "learning_rate": 1e-05, "loss": 1.0069, "step": 28215 }, { "epoch": 24.995571302037202, "grad_norm": 0.22181124985218048, "learning_rate": 1e-05, "loss": 0.9757, "step": 28220 }, { "epoch": 25.0, "grad_norm": 0.26796409487724304, "learning_rate": 1e-05, "loss": 0.9875, "step": 28225 }, { "epoch": 25.004428697962798, "grad_norm": 0.3231574296951294, "learning_rate": 1e-05, "loss": 0.9929, "step": 28230 }, { "epoch": 25.0088573959256, "grad_norm": 0.2569660246372223, "learning_rate": 1e-05, "loss": 0.98, "step": 28235 }, { "epoch": 25.013286093888397, "grad_norm": 0.26533979177474976, "learning_rate": 1e-05, "loss": 0.9954, "step": 28240 }, { "epoch": 25.017714791851194, "grad_norm": 0.22557643055915833, "learning_rate": 1e-05, "loss": 0.965, "step": 28245 }, { "epoch": 25.022143489813995, "grad_norm": 0.30640771985054016, "learning_rate": 1e-05, "loss": 0.9793, "step": 28250 }, { "epoch": 25.026572187776793, "grad_norm": 0.29978352785110474, "learning_rate": 1e-05, "loss": 0.9682, "step": 28255 }, { "epoch": 25.031000885739594, "grad_norm": 0.27134472131729126, "learning_rate": 1e-05, "loss": 1.0151, "step": 28260 }, { "epoch": 25.035429583702392, "grad_norm": 0.2424149513244629, "learning_rate": 1e-05, "loss": 0.9996, "step": 28265 }, { "epoch": 25.03985828166519, "grad_norm": 0.22151607275009155, "learning_rate": 1e-05, "loss": 1.0154, "step": 28270 }, { "epoch": 25.04428697962799, "grad_norm": 0.27944135665893555, "learning_rate": 1e-05, "loss": 1.0036, "step": 28275 }, { "epoch": 25.04871567759079, "grad_norm": 0.2413647174835205, "learning_rate": 1e-05, "loss": 0.9319, "step": 28280 }, { "epoch": 25.053144375553586, "grad_norm": 0.3381389081478119, "learning_rate": 1e-05, "loss": 0.9848, "step": 28285 }, { "epoch": 25.057573073516387, "grad_norm": 0.2824645936489105, "learning_rate": 1e-05, "loss": 1.0087, "step": 28290 }, { "epoch": 25.062001771479185, "grad_norm": 0.26023802161216736, "learning_rate": 1e-05, "loss": 0.9539, "step": 28295 }, { "epoch": 25.066430469441983, "grad_norm": 0.2313794046640396, "learning_rate": 1e-05, "loss": 0.9909, "step": 28300 }, { "epoch": 25.070859167404784, "grad_norm": 0.24783073365688324, "learning_rate": 1e-05, "loss": 0.9884, "step": 28305 }, { "epoch": 25.07528786536758, "grad_norm": 0.2511592209339142, "learning_rate": 1e-05, "loss": 0.9455, "step": 28310 }, { "epoch": 25.07971656333038, "grad_norm": 0.22317899763584137, "learning_rate": 1e-05, "loss": 0.9356, "step": 28315 }, { "epoch": 25.08414526129318, "grad_norm": 0.3170972764492035, "learning_rate": 1e-05, "loss": 0.9801, "step": 28320 }, { "epoch": 25.088573959255978, "grad_norm": 0.2434535026550293, "learning_rate": 1e-05, "loss": 0.9887, "step": 28325 }, { "epoch": 25.09300265721878, "grad_norm": 0.2370983362197876, "learning_rate": 1e-05, "loss": 0.9854, "step": 28330 }, { "epoch": 25.097431355181577, "grad_norm": 0.23729123175144196, "learning_rate": 1e-05, "loss": 0.9091, "step": 28335 }, { "epoch": 25.101860053144375, "grad_norm": 0.2768557369709015, "learning_rate": 1e-05, "loss": 0.9943, "step": 28340 }, { "epoch": 25.106288751107176, "grad_norm": 0.22914046049118042, "learning_rate": 1e-05, "loss": 0.9656, "step": 28345 }, { "epoch": 25.110717449069973, "grad_norm": 0.2848317623138428, "learning_rate": 1e-05, "loss": 0.9853, "step": 28350 }, { "epoch": 25.11514614703277, "grad_norm": 0.24439536035060883, "learning_rate": 1e-05, "loss": 0.9999, "step": 28355 }, { "epoch": 25.119574844995572, "grad_norm": 0.24731579422950745, "learning_rate": 1e-05, "loss": 0.9605, "step": 28360 }, { "epoch": 25.12400354295837, "grad_norm": 0.2253962755203247, "learning_rate": 1e-05, "loss": 0.9655, "step": 28365 }, { "epoch": 25.128432240921168, "grad_norm": 0.23305702209472656, "learning_rate": 1e-05, "loss": 0.9704, "step": 28370 }, { "epoch": 25.13286093888397, "grad_norm": 0.2980017364025116, "learning_rate": 1e-05, "loss": 1.0031, "step": 28375 }, { "epoch": 25.137289636846766, "grad_norm": 0.23258838057518005, "learning_rate": 1e-05, "loss": 0.99, "step": 28380 }, { "epoch": 25.141718334809568, "grad_norm": 0.2909734845161438, "learning_rate": 1e-05, "loss": 1.0273, "step": 28385 }, { "epoch": 25.146147032772365, "grad_norm": 0.2500873804092407, "learning_rate": 1e-05, "loss": 1.0091, "step": 28390 }, { "epoch": 25.150575730735163, "grad_norm": 0.22985301911830902, "learning_rate": 1e-05, "loss": 1.0276, "step": 28395 }, { "epoch": 25.155004428697964, "grad_norm": 0.2257264405488968, "learning_rate": 1e-05, "loss": 1.0225, "step": 28400 }, { "epoch": 25.159433126660762, "grad_norm": 0.23549792170524597, "learning_rate": 1e-05, "loss": 0.9438, "step": 28405 }, { "epoch": 25.16386182462356, "grad_norm": 0.2427898645401001, "learning_rate": 1e-05, "loss": 0.9916, "step": 28410 }, { "epoch": 25.16829052258636, "grad_norm": 0.2809029221534729, "learning_rate": 1e-05, "loss": 0.9688, "step": 28415 }, { "epoch": 25.17271922054916, "grad_norm": 0.21593166887760162, "learning_rate": 1e-05, "loss": 0.9823, "step": 28420 }, { "epoch": 25.177147918511956, "grad_norm": 0.27121323347091675, "learning_rate": 1e-05, "loss": 0.9694, "step": 28425 }, { "epoch": 25.181576616474757, "grad_norm": 0.24392178654670715, "learning_rate": 1e-05, "loss": 0.9786, "step": 28430 }, { "epoch": 25.186005314437555, "grad_norm": 0.21362219750881195, "learning_rate": 1e-05, "loss": 0.9975, "step": 28435 }, { "epoch": 25.190434012400353, "grad_norm": 0.23220038414001465, "learning_rate": 1e-05, "loss": 1.0037, "step": 28440 }, { "epoch": 25.194862710363154, "grad_norm": 0.20844097435474396, "learning_rate": 1e-05, "loss": 0.9855, "step": 28445 }, { "epoch": 25.19929140832595, "grad_norm": 0.22274212539196014, "learning_rate": 1e-05, "loss": 0.979, "step": 28450 }, { "epoch": 25.203720106288753, "grad_norm": 0.213849276304245, "learning_rate": 1e-05, "loss": 0.9541, "step": 28455 }, { "epoch": 25.20814880425155, "grad_norm": 0.22715076804161072, "learning_rate": 1e-05, "loss": 0.9527, "step": 28460 }, { "epoch": 25.212577502214348, "grad_norm": 0.25142112374305725, "learning_rate": 1e-05, "loss": 1.0021, "step": 28465 }, { "epoch": 25.21700620017715, "grad_norm": 0.23326852917671204, "learning_rate": 1e-05, "loss": 0.9621, "step": 28470 }, { "epoch": 25.221434898139947, "grad_norm": 0.26229164004325867, "learning_rate": 1e-05, "loss": 1.0084, "step": 28475 }, { "epoch": 25.225863596102744, "grad_norm": 0.2494177520275116, "learning_rate": 1e-05, "loss": 0.9594, "step": 28480 }, { "epoch": 25.230292294065546, "grad_norm": 0.27019643783569336, "learning_rate": 1e-05, "loss": 0.9554, "step": 28485 }, { "epoch": 25.234720992028343, "grad_norm": 0.2603653073310852, "learning_rate": 1e-05, "loss": 0.9623, "step": 28490 }, { "epoch": 25.23914968999114, "grad_norm": 0.2675544023513794, "learning_rate": 1e-05, "loss": 0.9677, "step": 28495 }, { "epoch": 25.243578387953942, "grad_norm": 0.24533584713935852, "learning_rate": 1e-05, "loss": 0.9739, "step": 28500 }, { "epoch": 25.24800708591674, "grad_norm": 0.22727340459823608, "learning_rate": 1e-05, "loss": 0.9738, "step": 28505 }, { "epoch": 25.25243578387954, "grad_norm": 0.22361747920513153, "learning_rate": 1e-05, "loss": 0.9901, "step": 28510 }, { "epoch": 25.25686448184234, "grad_norm": 0.24781891703605652, "learning_rate": 1e-05, "loss": 0.9893, "step": 28515 }, { "epoch": 25.261293179805136, "grad_norm": 0.2413349747657776, "learning_rate": 1e-05, "loss": 1.0345, "step": 28520 }, { "epoch": 25.265721877767938, "grad_norm": 0.25148409605026245, "learning_rate": 1e-05, "loss": 0.9841, "step": 28525 }, { "epoch": 25.270150575730735, "grad_norm": 0.2741221785545349, "learning_rate": 1e-05, "loss": 0.9613, "step": 28530 }, { "epoch": 25.274579273693533, "grad_norm": 0.21583959460258484, "learning_rate": 1e-05, "loss": 0.9632, "step": 28535 }, { "epoch": 25.279007971656334, "grad_norm": 0.22502276301383972, "learning_rate": 1e-05, "loss": 1.0111, "step": 28540 }, { "epoch": 25.28343666961913, "grad_norm": 0.23255762457847595, "learning_rate": 1e-05, "loss": 1.0065, "step": 28545 }, { "epoch": 25.28786536758193, "grad_norm": 0.257028728723526, "learning_rate": 1e-05, "loss": 0.9593, "step": 28550 }, { "epoch": 25.29229406554473, "grad_norm": 0.22574196755886078, "learning_rate": 1e-05, "loss": 1.0158, "step": 28555 }, { "epoch": 25.29672276350753, "grad_norm": 0.24432362616062164, "learning_rate": 1e-05, "loss": 1.0081, "step": 28560 }, { "epoch": 25.30115146147033, "grad_norm": 0.2594378888607025, "learning_rate": 1e-05, "loss": 0.9932, "step": 28565 }, { "epoch": 25.305580159433127, "grad_norm": 0.23691865801811218, "learning_rate": 1e-05, "loss": 1.0283, "step": 28570 }, { "epoch": 25.310008857395925, "grad_norm": 0.28134483098983765, "learning_rate": 1e-05, "loss": 1.0227, "step": 28575 }, { "epoch": 25.314437555358726, "grad_norm": 0.2507847845554352, "learning_rate": 1e-05, "loss": 0.9573, "step": 28580 }, { "epoch": 25.318866253321524, "grad_norm": 0.23688143491744995, "learning_rate": 1e-05, "loss": 0.997, "step": 28585 }, { "epoch": 25.32329495128432, "grad_norm": 0.26104605197906494, "learning_rate": 1e-05, "loss": 0.9421, "step": 28590 }, { "epoch": 25.327723649247122, "grad_norm": 0.23105032742023468, "learning_rate": 1e-05, "loss": 1.0102, "step": 28595 }, { "epoch": 25.33215234720992, "grad_norm": 0.2216823548078537, "learning_rate": 1e-05, "loss": 0.9768, "step": 28600 }, { "epoch": 25.336581045172718, "grad_norm": 0.27124977111816406, "learning_rate": 1e-05, "loss": 0.9639, "step": 28605 }, { "epoch": 25.34100974313552, "grad_norm": 0.2161342352628708, "learning_rate": 1e-05, "loss": 0.9644, "step": 28610 }, { "epoch": 25.345438441098317, "grad_norm": 0.26108431816101074, "learning_rate": 1e-05, "loss": 0.9681, "step": 28615 }, { "epoch": 25.349867139061114, "grad_norm": 0.25465962290763855, "learning_rate": 1e-05, "loss": 0.9669, "step": 28620 }, { "epoch": 25.354295837023916, "grad_norm": 0.28791970014572144, "learning_rate": 1e-05, "loss": 0.9622, "step": 28625 }, { "epoch": 25.358724534986713, "grad_norm": 0.26569464802742004, "learning_rate": 1e-05, "loss": 1.0099, "step": 28630 }, { "epoch": 25.363153232949514, "grad_norm": 0.24875789880752563, "learning_rate": 1e-05, "loss": 1.0475, "step": 28635 }, { "epoch": 25.367581930912312, "grad_norm": 0.20327898859977722, "learning_rate": 1e-05, "loss": 1.0117, "step": 28640 }, { "epoch": 25.37201062887511, "grad_norm": 0.2711411416530609, "learning_rate": 1e-05, "loss": 0.9752, "step": 28645 }, { "epoch": 25.37643932683791, "grad_norm": 0.20519310235977173, "learning_rate": 1e-05, "loss": 0.9835, "step": 28650 }, { "epoch": 25.38086802480071, "grad_norm": 0.2470201849937439, "learning_rate": 1e-05, "loss": 0.9548, "step": 28655 }, { "epoch": 25.385296722763506, "grad_norm": 0.2653522789478302, "learning_rate": 1e-05, "loss": 0.9966, "step": 28660 }, { "epoch": 25.389725420726307, "grad_norm": 0.2669956386089325, "learning_rate": 1e-05, "loss": 0.9865, "step": 28665 }, { "epoch": 25.394154118689105, "grad_norm": 0.2532452344894409, "learning_rate": 1e-05, "loss": 0.9816, "step": 28670 }, { "epoch": 25.398582816651903, "grad_norm": 0.2404826581478119, "learning_rate": 1e-05, "loss": 0.9728, "step": 28675 }, { "epoch": 25.403011514614704, "grad_norm": 0.21702100336551666, "learning_rate": 1e-05, "loss": 1.0061, "step": 28680 }, { "epoch": 25.4074402125775, "grad_norm": 0.24474535882472992, "learning_rate": 1e-05, "loss": 1.0147, "step": 28685 }, { "epoch": 25.411868910540303, "grad_norm": 0.28809377551078796, "learning_rate": 1e-05, "loss": 1.0199, "step": 28690 }, { "epoch": 25.4162976085031, "grad_norm": 0.27852576971054077, "learning_rate": 1e-05, "loss": 0.9452, "step": 28695 }, { "epoch": 25.420726306465898, "grad_norm": 0.20958438515663147, "learning_rate": 1e-05, "loss": 0.9937, "step": 28700 }, { "epoch": 25.4251550044287, "grad_norm": 0.20069308578968048, "learning_rate": 1e-05, "loss": 0.9308, "step": 28705 }, { "epoch": 25.429583702391497, "grad_norm": 0.2399550825357437, "learning_rate": 1e-05, "loss": 0.9273, "step": 28710 }, { "epoch": 25.434012400354295, "grad_norm": 0.2795233428478241, "learning_rate": 1e-05, "loss": 1.0279, "step": 28715 }, { "epoch": 25.438441098317096, "grad_norm": 0.22888731956481934, "learning_rate": 1e-05, "loss": 0.9782, "step": 28720 }, { "epoch": 25.442869796279894, "grad_norm": 0.23873721063137054, "learning_rate": 1e-05, "loss": 0.9708, "step": 28725 }, { "epoch": 25.44729849424269, "grad_norm": 0.25387635827064514, "learning_rate": 1e-05, "loss": 1.0079, "step": 28730 }, { "epoch": 25.451727192205492, "grad_norm": 0.30345356464385986, "learning_rate": 1e-05, "loss": 0.9362, "step": 28735 }, { "epoch": 25.45615589016829, "grad_norm": 0.29723215103149414, "learning_rate": 1e-05, "loss": 0.9999, "step": 28740 }, { "epoch": 25.460584588131088, "grad_norm": 0.24982304871082306, "learning_rate": 1e-05, "loss": 1.0278, "step": 28745 }, { "epoch": 25.46501328609389, "grad_norm": 0.3130345344543457, "learning_rate": 1e-05, "loss": 1.0473, "step": 28750 }, { "epoch": 25.469441984056687, "grad_norm": 0.20515911281108856, "learning_rate": 1e-05, "loss": 0.9612, "step": 28755 }, { "epoch": 25.473870682019488, "grad_norm": 0.23972462117671967, "learning_rate": 1e-05, "loss": 0.9706, "step": 28760 }, { "epoch": 25.478299379982285, "grad_norm": 0.2404281198978424, "learning_rate": 1e-05, "loss": 0.9391, "step": 28765 }, { "epoch": 25.482728077945083, "grad_norm": 0.23902878165245056, "learning_rate": 1e-05, "loss": 0.9621, "step": 28770 }, { "epoch": 25.487156775907884, "grad_norm": 0.25248533487319946, "learning_rate": 1e-05, "loss": 0.9832, "step": 28775 }, { "epoch": 25.491585473870682, "grad_norm": 0.23704902827739716, "learning_rate": 1e-05, "loss": 0.9698, "step": 28780 }, { "epoch": 25.49601417183348, "grad_norm": 0.2655435800552368, "learning_rate": 1e-05, "loss": 1.0153, "step": 28785 }, { "epoch": 25.50044286979628, "grad_norm": 0.23425692319869995, "learning_rate": 1e-05, "loss": 0.9661, "step": 28790 }, { "epoch": 25.50487156775908, "grad_norm": 0.2840118110179901, "learning_rate": 1e-05, "loss": 1.0216, "step": 28795 }, { "epoch": 25.509300265721876, "grad_norm": 0.31624239683151245, "learning_rate": 1e-05, "loss": 0.9805, "step": 28800 }, { "epoch": 25.513728963684677, "grad_norm": 0.24195078015327454, "learning_rate": 1e-05, "loss": 1.0034, "step": 28805 }, { "epoch": 25.518157661647475, "grad_norm": 0.24833817780017853, "learning_rate": 1e-05, "loss": 0.9688, "step": 28810 }, { "epoch": 25.522586359610276, "grad_norm": 0.22840385138988495, "learning_rate": 1e-05, "loss": 1.014, "step": 28815 }, { "epoch": 25.527015057573074, "grad_norm": 0.2724335193634033, "learning_rate": 1e-05, "loss": 0.9514, "step": 28820 }, { "epoch": 25.53144375553587, "grad_norm": 0.31222543120384216, "learning_rate": 1e-05, "loss": 0.9799, "step": 28825 }, { "epoch": 25.535872453498673, "grad_norm": 0.2051672637462616, "learning_rate": 1e-05, "loss": 1.02, "step": 28830 }, { "epoch": 25.54030115146147, "grad_norm": 0.2828862965106964, "learning_rate": 1e-05, "loss": 0.9742, "step": 28835 }, { "epoch": 25.544729849424268, "grad_norm": 0.22247949242591858, "learning_rate": 1e-05, "loss": 0.974, "step": 28840 }, { "epoch": 25.54915854738707, "grad_norm": 0.2899076044559479, "learning_rate": 1e-05, "loss": 1.0108, "step": 28845 }, { "epoch": 25.553587245349867, "grad_norm": 0.2460116147994995, "learning_rate": 1e-05, "loss": 0.9536, "step": 28850 }, { "epoch": 25.558015943312665, "grad_norm": 0.25456270575523376, "learning_rate": 1e-05, "loss": 0.9158, "step": 28855 }, { "epoch": 25.562444641275466, "grad_norm": 0.23259401321411133, "learning_rate": 1e-05, "loss": 0.9437, "step": 28860 }, { "epoch": 25.566873339238263, "grad_norm": 0.22429171204566956, "learning_rate": 1e-05, "loss": 0.9914, "step": 28865 }, { "epoch": 25.571302037201065, "grad_norm": 0.21937568485736847, "learning_rate": 1e-05, "loss": 1.009, "step": 28870 }, { "epoch": 25.575730735163862, "grad_norm": 0.20632602274417877, "learning_rate": 1e-05, "loss": 0.9799, "step": 28875 }, { "epoch": 25.58015943312666, "grad_norm": 0.25110140442848206, "learning_rate": 1e-05, "loss": 1.002, "step": 28880 }, { "epoch": 25.58458813108946, "grad_norm": 0.2793775200843811, "learning_rate": 1e-05, "loss": 1.0092, "step": 28885 }, { "epoch": 25.58901682905226, "grad_norm": 0.28905123472213745, "learning_rate": 1e-05, "loss": 0.9939, "step": 28890 }, { "epoch": 25.593445527015056, "grad_norm": 0.25348004698753357, "learning_rate": 1e-05, "loss": 1.034, "step": 28895 }, { "epoch": 25.597874224977858, "grad_norm": 0.3241104483604431, "learning_rate": 1e-05, "loss": 0.989, "step": 28900 }, { "epoch": 25.602302922940655, "grad_norm": 0.2509087324142456, "learning_rate": 1e-05, "loss": 1.0168, "step": 28905 }, { "epoch": 25.606731620903453, "grad_norm": 0.2558797001838684, "learning_rate": 1e-05, "loss": 0.9982, "step": 28910 }, { "epoch": 25.611160318866254, "grad_norm": 0.25341325998306274, "learning_rate": 1e-05, "loss": 0.9787, "step": 28915 }, { "epoch": 25.615589016829052, "grad_norm": 0.24359464645385742, "learning_rate": 1e-05, "loss": 1.0067, "step": 28920 }, { "epoch": 25.62001771479185, "grad_norm": 0.23044449090957642, "learning_rate": 1e-05, "loss": 0.9922, "step": 28925 }, { "epoch": 25.62444641275465, "grad_norm": 0.23174871504306793, "learning_rate": 1e-05, "loss": 0.972, "step": 28930 }, { "epoch": 25.62887511071745, "grad_norm": 0.25438809394836426, "learning_rate": 1e-05, "loss": 1.0116, "step": 28935 }, { "epoch": 25.63330380868025, "grad_norm": 0.2380950003862381, "learning_rate": 1e-05, "loss": 1.0165, "step": 28940 }, { "epoch": 25.637732506643047, "grad_norm": 0.26295799016952515, "learning_rate": 1e-05, "loss": 0.9888, "step": 28945 }, { "epoch": 25.642161204605845, "grad_norm": 0.2500235140323639, "learning_rate": 1e-05, "loss": 0.9771, "step": 28950 }, { "epoch": 25.646589902568646, "grad_norm": 0.25703057646751404, "learning_rate": 1e-05, "loss": 1.0289, "step": 28955 }, { "epoch": 25.651018600531444, "grad_norm": 0.23264075815677643, "learning_rate": 1e-05, "loss": 0.9699, "step": 28960 }, { "epoch": 25.65544729849424, "grad_norm": 0.25241145491600037, "learning_rate": 1e-05, "loss": 1.0229, "step": 28965 }, { "epoch": 25.659875996457043, "grad_norm": 0.2852046489715576, "learning_rate": 1e-05, "loss": 0.9527, "step": 28970 }, { "epoch": 25.66430469441984, "grad_norm": 0.3080235421657562, "learning_rate": 1e-05, "loss": 1.0164, "step": 28975 }, { "epoch": 25.668733392382638, "grad_norm": 0.22542570531368256, "learning_rate": 1e-05, "loss": 0.9897, "step": 28980 }, { "epoch": 25.67316209034544, "grad_norm": 0.22936002910137177, "learning_rate": 1e-05, "loss": 0.9273, "step": 28985 }, { "epoch": 25.677590788308237, "grad_norm": 0.23263134062290192, "learning_rate": 1e-05, "loss": 0.9339, "step": 28990 }, { "epoch": 25.682019486271038, "grad_norm": 0.21266630291938782, "learning_rate": 1e-05, "loss": 0.9286, "step": 28995 }, { "epoch": 25.686448184233836, "grad_norm": 0.23370856046676636, "learning_rate": 1e-05, "loss": 0.9638, "step": 29000 }, { "epoch": 25.690876882196633, "grad_norm": 0.21539267897605896, "learning_rate": 1e-05, "loss": 0.9401, "step": 29005 }, { "epoch": 25.695305580159435, "grad_norm": 0.2574823796749115, "learning_rate": 1e-05, "loss": 0.9804, "step": 29010 }, { "epoch": 25.699734278122232, "grad_norm": 0.24346426129341125, "learning_rate": 1e-05, "loss": 1.0067, "step": 29015 }, { "epoch": 25.70416297608503, "grad_norm": 0.2353721708059311, "learning_rate": 1e-05, "loss": 1.0255, "step": 29020 }, { "epoch": 25.70859167404783, "grad_norm": 0.24475936591625214, "learning_rate": 1e-05, "loss": 0.9769, "step": 29025 }, { "epoch": 25.71302037201063, "grad_norm": 0.24502651393413544, "learning_rate": 1e-05, "loss": 1.0313, "step": 29030 }, { "epoch": 25.717449069973426, "grad_norm": 0.24546490609645844, "learning_rate": 1e-05, "loss": 1.0171, "step": 29035 }, { "epoch": 25.721877767936228, "grad_norm": 0.23093603551387787, "learning_rate": 1e-05, "loss": 0.9682, "step": 29040 }, { "epoch": 25.726306465899025, "grad_norm": 0.22885775566101074, "learning_rate": 1e-05, "loss": 0.9912, "step": 29045 }, { "epoch": 25.730735163861823, "grad_norm": 0.2644462585449219, "learning_rate": 1e-05, "loss": 1.0103, "step": 29050 }, { "epoch": 25.735163861824624, "grad_norm": 0.2482924908399582, "learning_rate": 1e-05, "loss": 0.9717, "step": 29055 }, { "epoch": 25.739592559787422, "grad_norm": 0.27061980962753296, "learning_rate": 1e-05, "loss": 0.9711, "step": 29060 }, { "epoch": 25.744021257750223, "grad_norm": 0.2516295909881592, "learning_rate": 1e-05, "loss": 0.984, "step": 29065 }, { "epoch": 25.74844995571302, "grad_norm": 0.2653324007987976, "learning_rate": 1e-05, "loss": 0.9326, "step": 29070 }, { "epoch": 25.75287865367582, "grad_norm": 0.2714124619960785, "learning_rate": 1e-05, "loss": 0.9792, "step": 29075 }, { "epoch": 25.75730735163862, "grad_norm": 0.28832945227622986, "learning_rate": 1e-05, "loss": 1.0179, "step": 29080 }, { "epoch": 25.761736049601417, "grad_norm": 0.3036993443965912, "learning_rate": 1e-05, "loss": 1.0387, "step": 29085 }, { "epoch": 25.766164747564215, "grad_norm": 0.269141286611557, "learning_rate": 1e-05, "loss": 1.0162, "step": 29090 }, { "epoch": 25.770593445527016, "grad_norm": 0.27736595273017883, "learning_rate": 1e-05, "loss": 0.9843, "step": 29095 }, { "epoch": 25.775022143489814, "grad_norm": 0.2391963005065918, "learning_rate": 1e-05, "loss": 0.9947, "step": 29100 }, { "epoch": 25.77945084145261, "grad_norm": 0.25906017422676086, "learning_rate": 1e-05, "loss": 0.9684, "step": 29105 }, { "epoch": 25.783879539415413, "grad_norm": 0.24674907326698303, "learning_rate": 1e-05, "loss": 0.9648, "step": 29110 }, { "epoch": 25.78830823737821, "grad_norm": 0.24169249832630157, "learning_rate": 1e-05, "loss": 0.9988, "step": 29115 }, { "epoch": 25.79273693534101, "grad_norm": 0.22263750433921814, "learning_rate": 1e-05, "loss": 0.983, "step": 29120 }, { "epoch": 25.79716563330381, "grad_norm": 0.24689054489135742, "learning_rate": 1e-05, "loss": 1.0227, "step": 29125 }, { "epoch": 25.801594331266607, "grad_norm": 0.2252499759197235, "learning_rate": 1e-05, "loss": 0.9799, "step": 29130 }, { "epoch": 25.806023029229408, "grad_norm": 0.21843624114990234, "learning_rate": 1e-05, "loss": 0.9776, "step": 29135 }, { "epoch": 25.810451727192206, "grad_norm": 0.23872670531272888, "learning_rate": 1e-05, "loss": 0.9932, "step": 29140 }, { "epoch": 25.814880425155003, "grad_norm": 0.23328737914562225, "learning_rate": 1e-05, "loss": 0.9562, "step": 29145 }, { "epoch": 25.819309123117804, "grad_norm": 0.2513371706008911, "learning_rate": 1e-05, "loss": 1.0043, "step": 29150 }, { "epoch": 25.823737821080602, "grad_norm": 0.24121609330177307, "learning_rate": 1e-05, "loss": 0.9614, "step": 29155 }, { "epoch": 25.8281665190434, "grad_norm": 0.2561293840408325, "learning_rate": 1e-05, "loss": 0.9337, "step": 29160 }, { "epoch": 25.8325952170062, "grad_norm": 0.2826625108718872, "learning_rate": 1e-05, "loss": 0.9984, "step": 29165 }, { "epoch": 25.837023914969, "grad_norm": 0.22753024101257324, "learning_rate": 1e-05, "loss": 0.9763, "step": 29170 }, { "epoch": 25.841452612931796, "grad_norm": 0.28123176097869873, "learning_rate": 1e-05, "loss": 0.9363, "step": 29175 }, { "epoch": 25.845881310894598, "grad_norm": 0.25102218985557556, "learning_rate": 1e-05, "loss": 0.975, "step": 29180 }, { "epoch": 25.850310008857395, "grad_norm": 0.24924074113368988, "learning_rate": 1e-05, "loss": 0.9417, "step": 29185 }, { "epoch": 25.854738706820196, "grad_norm": 0.2137407809495926, "learning_rate": 1e-05, "loss": 0.9697, "step": 29190 }, { "epoch": 25.859167404782994, "grad_norm": 0.2659122347831726, "learning_rate": 1e-05, "loss": 0.9878, "step": 29195 }, { "epoch": 25.86359610274579, "grad_norm": 0.235785573720932, "learning_rate": 1e-05, "loss": 1.0008, "step": 29200 }, { "epoch": 25.868024800708593, "grad_norm": 0.20222915709018707, "learning_rate": 1e-05, "loss": 0.9393, "step": 29205 }, { "epoch": 25.87245349867139, "grad_norm": 0.22771041095256805, "learning_rate": 1e-05, "loss": 1.0067, "step": 29210 }, { "epoch": 25.876882196634188, "grad_norm": 0.2526921331882477, "learning_rate": 1e-05, "loss": 1.0313, "step": 29215 }, { "epoch": 25.88131089459699, "grad_norm": 0.24338418245315552, "learning_rate": 1e-05, "loss": 0.946, "step": 29220 }, { "epoch": 25.885739592559787, "grad_norm": 0.25345486402511597, "learning_rate": 1e-05, "loss": 0.99, "step": 29225 }, { "epoch": 25.890168290522585, "grad_norm": 0.21984684467315674, "learning_rate": 1e-05, "loss": 1.0044, "step": 29230 }, { "epoch": 25.894596988485386, "grad_norm": 0.23430711030960083, "learning_rate": 1e-05, "loss": 1.01, "step": 29235 }, { "epoch": 25.899025686448184, "grad_norm": 0.24225330352783203, "learning_rate": 1e-05, "loss": 1.035, "step": 29240 }, { "epoch": 25.903454384410985, "grad_norm": 0.2422517091035843, "learning_rate": 1e-05, "loss": 0.9951, "step": 29245 }, { "epoch": 25.907883082373782, "grad_norm": 0.23553062975406647, "learning_rate": 1e-05, "loss": 1.0135, "step": 29250 }, { "epoch": 25.91231178033658, "grad_norm": 0.32093924283981323, "learning_rate": 1e-05, "loss": 0.9949, "step": 29255 }, { "epoch": 25.91674047829938, "grad_norm": 0.23774869740009308, "learning_rate": 1e-05, "loss": 0.9851, "step": 29260 }, { "epoch": 25.92116917626218, "grad_norm": 0.216340571641922, "learning_rate": 1e-05, "loss": 0.9823, "step": 29265 }, { "epoch": 25.925597874224977, "grad_norm": 0.20214806497097015, "learning_rate": 1e-05, "loss": 0.9212, "step": 29270 }, { "epoch": 25.930026572187778, "grad_norm": 0.21292157471179962, "learning_rate": 1e-05, "loss": 0.9741, "step": 29275 }, { "epoch": 25.934455270150576, "grad_norm": 0.22030073404312134, "learning_rate": 1e-05, "loss": 0.991, "step": 29280 }, { "epoch": 25.938883968113373, "grad_norm": 0.2435506135225296, "learning_rate": 1e-05, "loss": 0.9465, "step": 29285 }, { "epoch": 25.943312666076174, "grad_norm": 0.2213594764471054, "learning_rate": 1e-05, "loss": 1.0103, "step": 29290 }, { "epoch": 25.947741364038972, "grad_norm": 0.2633857727050781, "learning_rate": 1e-05, "loss": 0.9876, "step": 29295 }, { "epoch": 25.95217006200177, "grad_norm": 0.23637594282627106, "learning_rate": 1e-05, "loss": 0.9876, "step": 29300 }, { "epoch": 25.95659875996457, "grad_norm": 0.2753964960575104, "learning_rate": 1e-05, "loss": 0.9572, "step": 29305 }, { "epoch": 25.96102745792737, "grad_norm": 0.22327548265457153, "learning_rate": 1e-05, "loss": 0.989, "step": 29310 }, { "epoch": 25.96545615589017, "grad_norm": 0.2317304164171219, "learning_rate": 1e-05, "loss": 1.0227, "step": 29315 }, { "epoch": 25.969884853852967, "grad_norm": 0.24318106472492218, "learning_rate": 1e-05, "loss": 1.033, "step": 29320 }, { "epoch": 25.974313551815765, "grad_norm": 0.2720038592815399, "learning_rate": 1e-05, "loss": 0.9649, "step": 29325 }, { "epoch": 25.978742249778566, "grad_norm": 0.26555389165878296, "learning_rate": 1e-05, "loss": 1.0017, "step": 29330 }, { "epoch": 25.983170947741364, "grad_norm": 0.2475748062133789, "learning_rate": 1e-05, "loss": 0.9809, "step": 29335 }, { "epoch": 25.98759964570416, "grad_norm": 0.25087156891822815, "learning_rate": 1e-05, "loss": 0.9556, "step": 29340 }, { "epoch": 25.992028343666963, "grad_norm": 0.24291250109672546, "learning_rate": 1e-05, "loss": 0.9579, "step": 29345 }, { "epoch": 25.99645704162976, "grad_norm": 0.25244009494781494, "learning_rate": 1e-05, "loss": 0.9771, "step": 29350 }, { "epoch": 26.000885739592558, "grad_norm": 0.19791699945926666, "learning_rate": 1e-05, "loss": 0.9837, "step": 29355 }, { "epoch": 26.00531443755536, "grad_norm": 0.244821235537529, "learning_rate": 1e-05, "loss": 0.9893, "step": 29360 }, { "epoch": 26.009743135518157, "grad_norm": 0.28714078664779663, "learning_rate": 1e-05, "loss": 1.0206, "step": 29365 }, { "epoch": 26.014171833480958, "grad_norm": 0.2341163009405136, "learning_rate": 1e-05, "loss": 0.9514, "step": 29370 }, { "epoch": 26.018600531443756, "grad_norm": 0.2755536735057831, "learning_rate": 1e-05, "loss": 1.0148, "step": 29375 }, { "epoch": 26.023029229406553, "grad_norm": 0.24109947681427002, "learning_rate": 1e-05, "loss": 1.0037, "step": 29380 }, { "epoch": 26.027457927369355, "grad_norm": 0.23363451659679413, "learning_rate": 1e-05, "loss": 0.9404, "step": 29385 }, { "epoch": 26.031886625332152, "grad_norm": 0.24639639258384705, "learning_rate": 1e-05, "loss": 0.9944, "step": 29390 }, { "epoch": 26.03631532329495, "grad_norm": 0.27804136276245117, "learning_rate": 1e-05, "loss": 0.9802, "step": 29395 }, { "epoch": 26.04074402125775, "grad_norm": 0.27713873982429504, "learning_rate": 1e-05, "loss": 1.0192, "step": 29400 }, { "epoch": 26.04517271922055, "grad_norm": 0.27685269713401794, "learning_rate": 1e-05, "loss": 0.9563, "step": 29405 }, { "epoch": 26.049601417183347, "grad_norm": 0.2663515508174896, "learning_rate": 1e-05, "loss": 0.9972, "step": 29410 }, { "epoch": 26.054030115146148, "grad_norm": 0.24985182285308838, "learning_rate": 1e-05, "loss": 1.0405, "step": 29415 }, { "epoch": 26.058458813108945, "grad_norm": 0.2278592586517334, "learning_rate": 1e-05, "loss": 0.9762, "step": 29420 }, { "epoch": 26.062887511071747, "grad_norm": 0.21478793025016785, "learning_rate": 1e-05, "loss": 0.9486, "step": 29425 }, { "epoch": 26.067316209034544, "grad_norm": 0.24094019830226898, "learning_rate": 1e-05, "loss": 0.9629, "step": 29430 }, { "epoch": 26.071744906997342, "grad_norm": 0.3006095588207245, "learning_rate": 1e-05, "loss": 1.0507, "step": 29435 }, { "epoch": 26.076173604960143, "grad_norm": 0.23204229772090912, "learning_rate": 1e-05, "loss": 1.0561, "step": 29440 }, { "epoch": 26.08060230292294, "grad_norm": 0.24658440053462982, "learning_rate": 1e-05, "loss": 1.0016, "step": 29445 }, { "epoch": 26.08503100088574, "grad_norm": 0.26472967863082886, "learning_rate": 1e-05, "loss": 0.9603, "step": 29450 }, { "epoch": 26.08945969884854, "grad_norm": 0.22890281677246094, "learning_rate": 1e-05, "loss": 0.9649, "step": 29455 }, { "epoch": 26.093888396811337, "grad_norm": 0.2702520787715912, "learning_rate": 1e-05, "loss": 0.9722, "step": 29460 }, { "epoch": 26.098317094774135, "grad_norm": 0.2833285629749298, "learning_rate": 1e-05, "loss": 1.0355, "step": 29465 }, { "epoch": 26.102745792736936, "grad_norm": 0.29363709688186646, "learning_rate": 1e-05, "loss": 0.9926, "step": 29470 }, { "epoch": 26.107174490699734, "grad_norm": 0.19844835996627808, "learning_rate": 1e-05, "loss": 0.9631, "step": 29475 }, { "epoch": 26.11160318866253, "grad_norm": 0.20662382245063782, "learning_rate": 1e-05, "loss": 1.0188, "step": 29480 }, { "epoch": 26.116031886625333, "grad_norm": 0.2563677728176117, "learning_rate": 1e-05, "loss": 0.9626, "step": 29485 }, { "epoch": 26.12046058458813, "grad_norm": 0.2552192807197571, "learning_rate": 1e-05, "loss": 1.033, "step": 29490 }, { "epoch": 26.12488928255093, "grad_norm": 0.24195873737335205, "learning_rate": 1e-05, "loss": 0.9663, "step": 29495 }, { "epoch": 26.12931798051373, "grad_norm": 0.2592959702014923, "learning_rate": 1e-05, "loss": 1.0076, "step": 29500 }, { "epoch": 26.133746678476527, "grad_norm": 0.2350163459777832, "learning_rate": 1e-05, "loss": 0.9973, "step": 29505 }, { "epoch": 26.138175376439328, "grad_norm": 0.25672343373298645, "learning_rate": 1e-05, "loss": 0.9359, "step": 29510 }, { "epoch": 26.142604074402126, "grad_norm": 0.20711758732795715, "learning_rate": 1e-05, "loss": 1.0013, "step": 29515 }, { "epoch": 26.147032772364923, "grad_norm": 0.27545568346977234, "learning_rate": 1e-05, "loss": 1.0198, "step": 29520 }, { "epoch": 26.151461470327725, "grad_norm": 0.2389090359210968, "learning_rate": 1e-05, "loss": 0.9847, "step": 29525 }, { "epoch": 26.155890168290522, "grad_norm": 0.21873405575752258, "learning_rate": 1e-05, "loss": 0.9756, "step": 29530 }, { "epoch": 26.16031886625332, "grad_norm": 0.25126391649246216, "learning_rate": 1e-05, "loss": 0.9567, "step": 29535 }, { "epoch": 26.16474756421612, "grad_norm": 0.2871840000152588, "learning_rate": 1e-05, "loss": 0.973, "step": 29540 }, { "epoch": 26.16917626217892, "grad_norm": 0.22746191918849945, "learning_rate": 1e-05, "loss": 1.0209, "step": 29545 }, { "epoch": 26.17360496014172, "grad_norm": 0.23948286473751068, "learning_rate": 1e-05, "loss": 0.9426, "step": 29550 }, { "epoch": 26.178033658104518, "grad_norm": 0.23553849756717682, "learning_rate": 1e-05, "loss": 0.9841, "step": 29555 }, { "epoch": 26.182462356067315, "grad_norm": 0.2790740132331848, "learning_rate": 1e-05, "loss": 0.9588, "step": 29560 }, { "epoch": 26.186891054030117, "grad_norm": 0.24037010967731476, "learning_rate": 1e-05, "loss": 0.9086, "step": 29565 }, { "epoch": 26.191319751992914, "grad_norm": 0.22348038852214813, "learning_rate": 1e-05, "loss": 1.0121, "step": 29570 }, { "epoch": 26.195748449955712, "grad_norm": 0.25426185131073, "learning_rate": 1e-05, "loss": 1.02, "step": 29575 }, { "epoch": 26.200177147918513, "grad_norm": 0.31084033846855164, "learning_rate": 1e-05, "loss": 1.0086, "step": 29580 }, { "epoch": 26.20460584588131, "grad_norm": 0.2768554389476776, "learning_rate": 1e-05, "loss": 0.9607, "step": 29585 }, { "epoch": 26.20903454384411, "grad_norm": 0.22014443576335907, "learning_rate": 1e-05, "loss": 0.9711, "step": 29590 }, { "epoch": 26.21346324180691, "grad_norm": 0.2207522690296173, "learning_rate": 1e-05, "loss": 0.9867, "step": 29595 }, { "epoch": 26.217891939769707, "grad_norm": 0.2294379025697708, "learning_rate": 1e-05, "loss": 0.9485, "step": 29600 }, { "epoch": 26.22232063773251, "grad_norm": 0.26175636053085327, "learning_rate": 1e-05, "loss": 0.9993, "step": 29605 }, { "epoch": 26.226749335695306, "grad_norm": 0.22617003321647644, "learning_rate": 1e-05, "loss": 0.9393, "step": 29610 }, { "epoch": 26.231178033658104, "grad_norm": 0.2585447132587433, "learning_rate": 1e-05, "loss": 0.9723, "step": 29615 }, { "epoch": 26.235606731620905, "grad_norm": 0.18052157759666443, "learning_rate": 1e-05, "loss": 0.9734, "step": 29620 }, { "epoch": 26.240035429583703, "grad_norm": 0.22431935369968414, "learning_rate": 1e-05, "loss": 0.9271, "step": 29625 }, { "epoch": 26.2444641275465, "grad_norm": 0.22433078289031982, "learning_rate": 1e-05, "loss": 1.0149, "step": 29630 }, { "epoch": 26.2488928255093, "grad_norm": 0.285879522562027, "learning_rate": 1e-05, "loss": 1.0044, "step": 29635 }, { "epoch": 26.2533215234721, "grad_norm": 0.2589271366596222, "learning_rate": 1e-05, "loss": 0.9535, "step": 29640 }, { "epoch": 26.257750221434897, "grad_norm": 0.23681409657001495, "learning_rate": 1e-05, "loss": 1.0169, "step": 29645 }, { "epoch": 26.262178919397698, "grad_norm": 0.2588694095611572, "learning_rate": 1e-05, "loss": 1.052, "step": 29650 }, { "epoch": 26.266607617360496, "grad_norm": 0.2883615791797638, "learning_rate": 1e-05, "loss": 0.9605, "step": 29655 }, { "epoch": 26.271036315323293, "grad_norm": 0.25754493474960327, "learning_rate": 1e-05, "loss": 0.9633, "step": 29660 }, { "epoch": 26.275465013286095, "grad_norm": 0.25957587361335754, "learning_rate": 1e-05, "loss": 0.9886, "step": 29665 }, { "epoch": 26.279893711248892, "grad_norm": 0.32041653990745544, "learning_rate": 1e-05, "loss": 1.0325, "step": 29670 }, { "epoch": 26.284322409211693, "grad_norm": 0.23580865561962128, "learning_rate": 1e-05, "loss": 1.0189, "step": 29675 }, { "epoch": 26.28875110717449, "grad_norm": 0.25689220428466797, "learning_rate": 1e-05, "loss": 1.0137, "step": 29680 }, { "epoch": 26.29317980513729, "grad_norm": 0.2997405230998993, "learning_rate": 1e-05, "loss": 0.9855, "step": 29685 }, { "epoch": 26.29760850310009, "grad_norm": 0.2702265679836273, "learning_rate": 1e-05, "loss": 0.9703, "step": 29690 }, { "epoch": 26.302037201062888, "grad_norm": 0.27100005745887756, "learning_rate": 1e-05, "loss": 0.9973, "step": 29695 }, { "epoch": 26.306465899025685, "grad_norm": 0.2557739019393921, "learning_rate": 1e-05, "loss": 0.9631, "step": 29700 }, { "epoch": 26.310894596988486, "grad_norm": 0.23851528763771057, "learning_rate": 1e-05, "loss": 0.9686, "step": 29705 }, { "epoch": 26.315323294951284, "grad_norm": 0.25407180190086365, "learning_rate": 1e-05, "loss": 1.0598, "step": 29710 }, { "epoch": 26.31975199291408, "grad_norm": 0.2696511447429657, "learning_rate": 1e-05, "loss": 0.996, "step": 29715 }, { "epoch": 26.324180690876883, "grad_norm": 0.23409490287303925, "learning_rate": 1e-05, "loss": 0.9988, "step": 29720 }, { "epoch": 26.32860938883968, "grad_norm": 0.20391660928726196, "learning_rate": 1e-05, "loss": 0.9321, "step": 29725 }, { "epoch": 26.333038086802482, "grad_norm": 0.23955675959587097, "learning_rate": 1e-05, "loss": 0.9295, "step": 29730 }, { "epoch": 26.33746678476528, "grad_norm": 0.25519853830337524, "learning_rate": 1e-05, "loss": 1.0211, "step": 29735 }, { "epoch": 26.341895482728077, "grad_norm": 0.3160719871520996, "learning_rate": 1e-05, "loss": 0.9678, "step": 29740 }, { "epoch": 26.34632418069088, "grad_norm": 0.2571757435798645, "learning_rate": 1e-05, "loss": 1.0137, "step": 29745 }, { "epoch": 26.350752878653676, "grad_norm": 0.25098538398742676, "learning_rate": 1e-05, "loss": 0.9345, "step": 29750 }, { "epoch": 26.355181576616474, "grad_norm": 0.2709645628929138, "learning_rate": 1e-05, "loss": 1.0077, "step": 29755 }, { "epoch": 26.359610274579275, "grad_norm": 0.3633810579776764, "learning_rate": 1e-05, "loss": 0.9883, "step": 29760 }, { "epoch": 26.364038972542073, "grad_norm": 0.273794949054718, "learning_rate": 1e-05, "loss": 1.0108, "step": 29765 }, { "epoch": 26.36846767050487, "grad_norm": 0.21977777779102325, "learning_rate": 1e-05, "loss": 1.013, "step": 29770 }, { "epoch": 26.37289636846767, "grad_norm": 0.24285535514354706, "learning_rate": 1e-05, "loss": 0.956, "step": 29775 }, { "epoch": 26.37732506643047, "grad_norm": 0.22680318355560303, "learning_rate": 1e-05, "loss": 1.0053, "step": 29780 }, { "epoch": 26.381753764393267, "grad_norm": 0.21320123970508575, "learning_rate": 1e-05, "loss": 1.0108, "step": 29785 }, { "epoch": 26.386182462356068, "grad_norm": 0.3039967715740204, "learning_rate": 1e-05, "loss": 0.9799, "step": 29790 }, { "epoch": 26.390611160318866, "grad_norm": 0.3038237392902374, "learning_rate": 1e-05, "loss": 0.9807, "step": 29795 }, { "epoch": 26.395039858281667, "grad_norm": 0.23035113513469696, "learning_rate": 1e-05, "loss": 0.9985, "step": 29800 }, { "epoch": 26.399468556244464, "grad_norm": 0.24465003609657288, "learning_rate": 1e-05, "loss": 0.9909, "step": 29805 }, { "epoch": 26.403897254207262, "grad_norm": 0.2605549395084381, "learning_rate": 1e-05, "loss": 0.9907, "step": 29810 }, { "epoch": 26.408325952170063, "grad_norm": 0.23271866142749786, "learning_rate": 1e-05, "loss": 0.9819, "step": 29815 }, { "epoch": 26.41275465013286, "grad_norm": 0.2813819944858551, "learning_rate": 1e-05, "loss": 0.9705, "step": 29820 }, { "epoch": 26.41718334809566, "grad_norm": 0.23463965952396393, "learning_rate": 1e-05, "loss": 0.9768, "step": 29825 }, { "epoch": 26.42161204605846, "grad_norm": 0.2190408855676651, "learning_rate": 1e-05, "loss": 0.9586, "step": 29830 }, { "epoch": 26.426040744021257, "grad_norm": 0.2265297770500183, "learning_rate": 1e-05, "loss": 0.9919, "step": 29835 }, { "epoch": 26.430469441984055, "grad_norm": 0.280929833650589, "learning_rate": 1e-05, "loss": 0.9998, "step": 29840 }, { "epoch": 26.434898139946856, "grad_norm": 0.2773053050041199, "learning_rate": 1e-05, "loss": 0.9989, "step": 29845 }, { "epoch": 26.439326837909654, "grad_norm": 0.2736125588417053, "learning_rate": 1e-05, "loss": 0.9784, "step": 29850 }, { "epoch": 26.443755535872455, "grad_norm": 0.21759001910686493, "learning_rate": 1e-05, "loss": 0.9813, "step": 29855 }, { "epoch": 26.448184233835253, "grad_norm": 0.2628740966320038, "learning_rate": 1e-05, "loss": 0.9671, "step": 29860 }, { "epoch": 26.45261293179805, "grad_norm": 0.23844057321548462, "learning_rate": 1e-05, "loss": 0.961, "step": 29865 }, { "epoch": 26.45704162976085, "grad_norm": 0.23006317019462585, "learning_rate": 1e-05, "loss": 1.0044, "step": 29870 }, { "epoch": 26.46147032772365, "grad_norm": 0.24007225036621094, "learning_rate": 1e-05, "loss": 1.0046, "step": 29875 }, { "epoch": 26.465899025686447, "grad_norm": 0.29693323373794556, "learning_rate": 1e-05, "loss": 0.9722, "step": 29880 }, { "epoch": 26.47032772364925, "grad_norm": 0.23353172838687897, "learning_rate": 1e-05, "loss": 0.9438, "step": 29885 }, { "epoch": 26.474756421612046, "grad_norm": 0.21376121044158936, "learning_rate": 1e-05, "loss": 0.9944, "step": 29890 }, { "epoch": 26.479185119574844, "grad_norm": 0.2312644124031067, "learning_rate": 1e-05, "loss": 1.0286, "step": 29895 }, { "epoch": 26.483613817537645, "grad_norm": 0.2439718395471573, "learning_rate": 1e-05, "loss": 0.9613, "step": 29900 }, { "epoch": 26.488042515500442, "grad_norm": 0.28455981612205505, "learning_rate": 1e-05, "loss": 0.9806, "step": 29905 }, { "epoch": 26.49247121346324, "grad_norm": 0.2266928255558014, "learning_rate": 1e-05, "loss": 0.9962, "step": 29910 }, { "epoch": 26.49689991142604, "grad_norm": 0.22934497892856598, "learning_rate": 1e-05, "loss": 1.0183, "step": 29915 }, { "epoch": 26.50132860938884, "grad_norm": 0.23625163733959198, "learning_rate": 1e-05, "loss": 0.9514, "step": 29920 }, { "epoch": 26.50575730735164, "grad_norm": 0.21642480790615082, "learning_rate": 1e-05, "loss": 0.9414, "step": 29925 }, { "epoch": 26.510186005314438, "grad_norm": 0.21227529644966125, "learning_rate": 1e-05, "loss": 0.9529, "step": 29930 }, { "epoch": 26.514614703277235, "grad_norm": 0.22259730100631714, "learning_rate": 1e-05, "loss": 1.0482, "step": 29935 }, { "epoch": 26.519043401240037, "grad_norm": 0.22608643770217896, "learning_rate": 1e-05, "loss": 0.9928, "step": 29940 }, { "epoch": 26.523472099202834, "grad_norm": 0.22166387736797333, "learning_rate": 1e-05, "loss": 1.0214, "step": 29945 }, { "epoch": 26.527900797165632, "grad_norm": 0.27043792605400085, "learning_rate": 1e-05, "loss": 0.9994, "step": 29950 }, { "epoch": 26.532329495128433, "grad_norm": 0.24039876461029053, "learning_rate": 1e-05, "loss": 0.9515, "step": 29955 }, { "epoch": 26.53675819309123, "grad_norm": 0.23946158587932587, "learning_rate": 1e-05, "loss": 0.9759, "step": 29960 }, { "epoch": 26.54118689105403, "grad_norm": 0.25826382637023926, "learning_rate": 1e-05, "loss": 0.9822, "step": 29965 }, { "epoch": 26.54561558901683, "grad_norm": 0.2664659023284912, "learning_rate": 1e-05, "loss": 0.9334, "step": 29970 }, { "epoch": 26.550044286979627, "grad_norm": 0.22836853563785553, "learning_rate": 1e-05, "loss": 0.956, "step": 29975 }, { "epoch": 26.55447298494243, "grad_norm": 0.2150292694568634, "learning_rate": 1e-05, "loss": 0.9904, "step": 29980 }, { "epoch": 26.558901682905226, "grad_norm": 0.28751879930496216, "learning_rate": 1e-05, "loss": 0.9565, "step": 29985 }, { "epoch": 26.563330380868024, "grad_norm": 0.26578569412231445, "learning_rate": 1e-05, "loss": 1.0244, "step": 29990 }, { "epoch": 26.567759078830825, "grad_norm": 0.2752525806427002, "learning_rate": 1e-05, "loss": 0.9975, "step": 29995 }, { "epoch": 26.572187776793623, "grad_norm": 0.20902705192565918, "learning_rate": 1e-05, "loss": 0.9718, "step": 30000 }, { "epoch": 26.57661647475642, "grad_norm": 0.247431218624115, "learning_rate": 1e-05, "loss": 0.995, "step": 30005 }, { "epoch": 26.58104517271922, "grad_norm": 0.2535596787929535, "learning_rate": 1e-05, "loss": 0.9771, "step": 30010 }, { "epoch": 26.58547387068202, "grad_norm": 0.30459001660346985, "learning_rate": 1e-05, "loss": 0.9239, "step": 30015 }, { "epoch": 26.589902568644817, "grad_norm": 0.24338839948177338, "learning_rate": 1e-05, "loss": 0.9491, "step": 30020 }, { "epoch": 26.594331266607618, "grad_norm": 0.21212758123874664, "learning_rate": 1e-05, "loss": 0.9919, "step": 30025 }, { "epoch": 26.598759964570416, "grad_norm": 0.27010592818260193, "learning_rate": 1e-05, "loss": 0.978, "step": 30030 }, { "epoch": 26.603188662533213, "grad_norm": 0.226255863904953, "learning_rate": 1e-05, "loss": 0.964, "step": 30035 }, { "epoch": 26.607617360496015, "grad_norm": 0.22579582035541534, "learning_rate": 1e-05, "loss": 1.0167, "step": 30040 }, { "epoch": 26.612046058458812, "grad_norm": 0.23832368850708008, "learning_rate": 1e-05, "loss": 0.9716, "step": 30045 }, { "epoch": 26.616474756421614, "grad_norm": 0.24104498326778412, "learning_rate": 1e-05, "loss": 1.0171, "step": 30050 }, { "epoch": 26.62090345438441, "grad_norm": 0.25107672810554504, "learning_rate": 1e-05, "loss": 1.0098, "step": 30055 }, { "epoch": 26.62533215234721, "grad_norm": 0.24267186224460602, "learning_rate": 1e-05, "loss": 0.9557, "step": 30060 }, { "epoch": 26.62976085031001, "grad_norm": 0.24306286871433258, "learning_rate": 1e-05, "loss": 1.0154, "step": 30065 }, { "epoch": 26.634189548272808, "grad_norm": 0.22851808369159698, "learning_rate": 1e-05, "loss": 0.9871, "step": 30070 }, { "epoch": 26.638618246235605, "grad_norm": 0.2540264427661896, "learning_rate": 1e-05, "loss": 1.0123, "step": 30075 }, { "epoch": 26.643046944198407, "grad_norm": 0.23075030744075775, "learning_rate": 1e-05, "loss": 0.9582, "step": 30080 }, { "epoch": 26.647475642161204, "grad_norm": 0.25420352816581726, "learning_rate": 1e-05, "loss": 0.975, "step": 30085 }, { "epoch": 26.651904340124002, "grad_norm": 0.23552055656909943, "learning_rate": 1e-05, "loss": 0.95, "step": 30090 }, { "epoch": 26.656333038086803, "grad_norm": 0.27110713720321655, "learning_rate": 1e-05, "loss": 0.9881, "step": 30095 }, { "epoch": 26.6607617360496, "grad_norm": 0.3211550712585449, "learning_rate": 1e-05, "loss": 0.961, "step": 30100 }, { "epoch": 26.665190434012402, "grad_norm": 0.28638020157814026, "learning_rate": 1e-05, "loss": 0.9753, "step": 30105 }, { "epoch": 26.6696191319752, "grad_norm": 0.2437484860420227, "learning_rate": 1e-05, "loss": 1.0003, "step": 30110 }, { "epoch": 26.674047829937997, "grad_norm": 0.23589135706424713, "learning_rate": 1e-05, "loss": 0.9749, "step": 30115 }, { "epoch": 26.6784765279008, "grad_norm": 0.2294664829969406, "learning_rate": 1e-05, "loss": 1.0037, "step": 30120 }, { "epoch": 26.682905225863596, "grad_norm": 0.2592249810695648, "learning_rate": 1e-05, "loss": 1.0193, "step": 30125 }, { "epoch": 26.687333923826394, "grad_norm": 0.2469903528690338, "learning_rate": 1e-05, "loss": 0.9951, "step": 30130 }, { "epoch": 26.691762621789195, "grad_norm": 0.23921608924865723, "learning_rate": 1e-05, "loss": 0.9543, "step": 30135 }, { "epoch": 26.696191319751993, "grad_norm": 0.24063198268413544, "learning_rate": 1e-05, "loss": 0.9964, "step": 30140 }, { "epoch": 26.70062001771479, "grad_norm": 0.2603273093700409, "learning_rate": 1e-05, "loss": 1.0077, "step": 30145 }, { "epoch": 26.70504871567759, "grad_norm": 0.2889840304851532, "learning_rate": 1e-05, "loss": 1.0324, "step": 30150 }, { "epoch": 26.70947741364039, "grad_norm": 0.2850040793418884, "learning_rate": 1e-05, "loss": 1.0045, "step": 30155 }, { "epoch": 26.713906111603187, "grad_norm": 0.2411225140094757, "learning_rate": 1e-05, "loss": 0.9429, "step": 30160 }, { "epoch": 26.718334809565988, "grad_norm": 0.2227754145860672, "learning_rate": 1e-05, "loss": 0.9886, "step": 30165 }, { "epoch": 26.722763507528786, "grad_norm": 0.2847329080104828, "learning_rate": 1e-05, "loss": 0.9962, "step": 30170 }, { "epoch": 26.727192205491587, "grad_norm": 0.2909183204174042, "learning_rate": 1e-05, "loss": 1.0192, "step": 30175 }, { "epoch": 26.731620903454385, "grad_norm": 0.23366843163967133, "learning_rate": 1e-05, "loss": 1.0485, "step": 30180 }, { "epoch": 26.736049601417182, "grad_norm": 0.25701847672462463, "learning_rate": 1e-05, "loss": 1.0325, "step": 30185 }, { "epoch": 26.740478299379983, "grad_norm": 0.241773322224617, "learning_rate": 1e-05, "loss": 0.9858, "step": 30190 }, { "epoch": 26.74490699734278, "grad_norm": 0.26165956258773804, "learning_rate": 1e-05, "loss": 0.9415, "step": 30195 }, { "epoch": 26.74933569530558, "grad_norm": 0.2630947232246399, "learning_rate": 1e-05, "loss": 0.9732, "step": 30200 }, { "epoch": 26.75376439326838, "grad_norm": 0.30287283658981323, "learning_rate": 1e-05, "loss": 0.9715, "step": 30205 }, { "epoch": 26.758193091231178, "grad_norm": 0.2831924855709076, "learning_rate": 1e-05, "loss": 0.9733, "step": 30210 }, { "epoch": 26.762621789193975, "grad_norm": 0.2565245032310486, "learning_rate": 1e-05, "loss": 0.9561, "step": 30215 }, { "epoch": 26.767050487156776, "grad_norm": 0.22565393149852753, "learning_rate": 1e-05, "loss": 0.9983, "step": 30220 }, { "epoch": 26.771479185119574, "grad_norm": 0.29895102977752686, "learning_rate": 1e-05, "loss": 0.9644, "step": 30225 }, { "epoch": 26.775907883082375, "grad_norm": 0.2604537904262543, "learning_rate": 1e-05, "loss": 0.9856, "step": 30230 }, { "epoch": 26.780336581045173, "grad_norm": 0.30843308568000793, "learning_rate": 1e-05, "loss": 0.9839, "step": 30235 }, { "epoch": 26.78476527900797, "grad_norm": 0.3403940200805664, "learning_rate": 1e-05, "loss": 0.9623, "step": 30240 }, { "epoch": 26.789193976970772, "grad_norm": 0.21627341210842133, "learning_rate": 1e-05, "loss": 0.9981, "step": 30245 }, { "epoch": 26.79362267493357, "grad_norm": 0.2071480005979538, "learning_rate": 1e-05, "loss": 0.9511, "step": 30250 }, { "epoch": 26.798051372896367, "grad_norm": 0.24159276485443115, "learning_rate": 1e-05, "loss": 1.0304, "step": 30255 }, { "epoch": 26.80248007085917, "grad_norm": 0.1980116367340088, "learning_rate": 1e-05, "loss": 0.9865, "step": 30260 }, { "epoch": 26.806908768821966, "grad_norm": 0.21964040398597717, "learning_rate": 1e-05, "loss": 0.9856, "step": 30265 }, { "epoch": 26.811337466784764, "grad_norm": 0.2412816286087036, "learning_rate": 1e-05, "loss": 1.009, "step": 30270 }, { "epoch": 26.815766164747565, "grad_norm": 0.261136531829834, "learning_rate": 1e-05, "loss": 1.0036, "step": 30275 }, { "epoch": 26.820194862710363, "grad_norm": 0.23315303027629852, "learning_rate": 1e-05, "loss": 0.9749, "step": 30280 }, { "epoch": 26.824623560673164, "grad_norm": 0.21400538086891174, "learning_rate": 1e-05, "loss": 1.0123, "step": 30285 }, { "epoch": 26.82905225863596, "grad_norm": 0.2653231620788574, "learning_rate": 1e-05, "loss": 0.9743, "step": 30290 }, { "epoch": 26.83348095659876, "grad_norm": 0.23129649460315704, "learning_rate": 1e-05, "loss": 1.0217, "step": 30295 }, { "epoch": 26.83790965456156, "grad_norm": 0.39349043369293213, "learning_rate": 1e-05, "loss": 0.975, "step": 30300 }, { "epoch": 26.842338352524358, "grad_norm": 0.21276351809501648, "learning_rate": 1e-05, "loss": 1.0039, "step": 30305 }, { "epoch": 26.846767050487156, "grad_norm": 0.2156832367181778, "learning_rate": 1e-05, "loss": 1.0458, "step": 30310 }, { "epoch": 26.851195748449957, "grad_norm": 0.2102944254875183, "learning_rate": 1e-05, "loss": 0.9973, "step": 30315 }, { "epoch": 26.855624446412754, "grad_norm": 0.30699586868286133, "learning_rate": 1e-05, "loss": 1.0105, "step": 30320 }, { "epoch": 26.860053144375552, "grad_norm": 0.24376645684242249, "learning_rate": 1e-05, "loss": 1.053, "step": 30325 }, { "epoch": 26.864481842338353, "grad_norm": 0.23732689023017883, "learning_rate": 1e-05, "loss": 0.999, "step": 30330 }, { "epoch": 26.86891054030115, "grad_norm": 0.24358725547790527, "learning_rate": 1e-05, "loss": 0.9888, "step": 30335 }, { "epoch": 26.873339238263952, "grad_norm": 0.22225724160671234, "learning_rate": 1e-05, "loss": 0.9479, "step": 30340 }, { "epoch": 26.87776793622675, "grad_norm": 0.3010594844818115, "learning_rate": 1e-05, "loss": 0.9966, "step": 30345 }, { "epoch": 26.882196634189548, "grad_norm": 0.21877768635749817, "learning_rate": 1e-05, "loss": 0.9438, "step": 30350 }, { "epoch": 26.88662533215235, "grad_norm": 0.29026171565055847, "learning_rate": 1e-05, "loss": 1.0364, "step": 30355 }, { "epoch": 26.891054030115146, "grad_norm": 0.25234198570251465, "learning_rate": 1e-05, "loss": 0.9656, "step": 30360 }, { "epoch": 26.895482728077944, "grad_norm": 0.22945880889892578, "learning_rate": 1e-05, "loss": 0.9971, "step": 30365 }, { "epoch": 26.899911426040745, "grad_norm": 0.25953271985054016, "learning_rate": 1e-05, "loss": 0.9853, "step": 30370 }, { "epoch": 26.904340124003543, "grad_norm": 0.27650877833366394, "learning_rate": 1e-05, "loss": 0.996, "step": 30375 }, { "epoch": 26.90876882196634, "grad_norm": 0.2854640483856201, "learning_rate": 1e-05, "loss": 1.0073, "step": 30380 }, { "epoch": 26.913197519929142, "grad_norm": 0.3169996440410614, "learning_rate": 1e-05, "loss": 0.9784, "step": 30385 }, { "epoch": 26.91762621789194, "grad_norm": 0.20262137055397034, "learning_rate": 1e-05, "loss": 1.0222, "step": 30390 }, { "epoch": 26.922054915854737, "grad_norm": 0.2926405370235443, "learning_rate": 1e-05, "loss": 0.9892, "step": 30395 }, { "epoch": 26.92648361381754, "grad_norm": 0.24764838814735413, "learning_rate": 1e-05, "loss": 0.9518, "step": 30400 }, { "epoch": 26.930912311780336, "grad_norm": 0.2266751080751419, "learning_rate": 1e-05, "loss": 0.9924, "step": 30405 }, { "epoch": 26.935341009743137, "grad_norm": 0.2836935818195343, "learning_rate": 1e-05, "loss": 0.9865, "step": 30410 }, { "epoch": 26.939769707705935, "grad_norm": 0.26348793506622314, "learning_rate": 1e-05, "loss": 0.9933, "step": 30415 }, { "epoch": 26.944198405668732, "grad_norm": 0.25096118450164795, "learning_rate": 1e-05, "loss": 0.9629, "step": 30420 }, { "epoch": 26.948627103631534, "grad_norm": 0.24840302765369415, "learning_rate": 1e-05, "loss": 0.9526, "step": 30425 }, { "epoch": 26.95305580159433, "grad_norm": 0.25247102975845337, "learning_rate": 1e-05, "loss": 1.0003, "step": 30430 }, { "epoch": 26.95748449955713, "grad_norm": 0.23725663125514984, "learning_rate": 1e-05, "loss": 0.9526, "step": 30435 }, { "epoch": 26.96191319751993, "grad_norm": 0.2660263478755951, "learning_rate": 1e-05, "loss": 0.9673, "step": 30440 }, { "epoch": 26.966341895482728, "grad_norm": 0.24798409640789032, "learning_rate": 1e-05, "loss": 1.0049, "step": 30445 }, { "epoch": 26.970770593445526, "grad_norm": 0.2632289528846741, "learning_rate": 1e-05, "loss": 0.9383, "step": 30450 }, { "epoch": 26.975199291408327, "grad_norm": 0.2203081101179123, "learning_rate": 1e-05, "loss": 1.0022, "step": 30455 }, { "epoch": 26.979627989371124, "grad_norm": 0.25215771794319153, "learning_rate": 1e-05, "loss": 0.9661, "step": 30460 }, { "epoch": 26.984056687333926, "grad_norm": 0.25819385051727295, "learning_rate": 1e-05, "loss": 1.0196, "step": 30465 }, { "epoch": 26.988485385296723, "grad_norm": 0.24728737771511078, "learning_rate": 1e-05, "loss": 1.0101, "step": 30470 }, { "epoch": 26.99291408325952, "grad_norm": 0.21859033405780792, "learning_rate": 1e-05, "loss": 0.9418, "step": 30475 }, { "epoch": 26.997342781222322, "grad_norm": 0.2844151556491852, "learning_rate": 1e-05, "loss": 0.9628, "step": 30480 }, { "epoch": 27.00177147918512, "grad_norm": 0.3071330487728119, "learning_rate": 1e-05, "loss": 0.9652, "step": 30485 }, { "epoch": 27.006200177147917, "grad_norm": 0.23135030269622803, "learning_rate": 1e-05, "loss": 1.0283, "step": 30490 }, { "epoch": 27.01062887511072, "grad_norm": 0.22536525130271912, "learning_rate": 1e-05, "loss": 1.0062, "step": 30495 }, { "epoch": 27.015057573073516, "grad_norm": 0.24716205894947052, "learning_rate": 1e-05, "loss": 0.9857, "step": 30500 }, { "epoch": 27.019486271036314, "grad_norm": 0.2711803615093231, "learning_rate": 1e-05, "loss": 0.9824, "step": 30505 }, { "epoch": 27.023914968999115, "grad_norm": 0.21956685185432434, "learning_rate": 1e-05, "loss": 1.0012, "step": 30510 }, { "epoch": 27.028343666961913, "grad_norm": 0.24644488096237183, "learning_rate": 1e-05, "loss": 1.0037, "step": 30515 }, { "epoch": 27.03277236492471, "grad_norm": 0.21150024235248566, "learning_rate": 1e-05, "loss": 0.9574, "step": 30520 }, { "epoch": 27.03720106288751, "grad_norm": 0.288254052400589, "learning_rate": 1e-05, "loss": 0.981, "step": 30525 }, { "epoch": 27.04162976085031, "grad_norm": 0.22987912595272064, "learning_rate": 1e-05, "loss": 1.0402, "step": 30530 }, { "epoch": 27.04605845881311, "grad_norm": 0.22410458326339722, "learning_rate": 1e-05, "loss": 1.0001, "step": 30535 }, { "epoch": 27.050487156775908, "grad_norm": 0.2752321660518646, "learning_rate": 1e-05, "loss": 1.0426, "step": 30540 }, { "epoch": 27.054915854738706, "grad_norm": 0.25184276700019836, "learning_rate": 1e-05, "loss": 0.9757, "step": 30545 }, { "epoch": 27.059344552701507, "grad_norm": 0.26193737983703613, "learning_rate": 1e-05, "loss": 0.9712, "step": 30550 }, { "epoch": 27.063773250664305, "grad_norm": 0.2575017809867859, "learning_rate": 1e-05, "loss": 1.0073, "step": 30555 }, { "epoch": 27.068201948627102, "grad_norm": 0.24047674238681793, "learning_rate": 1e-05, "loss": 1.007, "step": 30560 }, { "epoch": 27.072630646589904, "grad_norm": 0.3317027688026428, "learning_rate": 1e-05, "loss": 1.0166, "step": 30565 }, { "epoch": 27.0770593445527, "grad_norm": 0.2667787969112396, "learning_rate": 1e-05, "loss": 1.0454, "step": 30570 }, { "epoch": 27.0814880425155, "grad_norm": 0.24323886632919312, "learning_rate": 1e-05, "loss": 0.9685, "step": 30575 }, { "epoch": 27.0859167404783, "grad_norm": 0.20749755203723907, "learning_rate": 1e-05, "loss": 0.9552, "step": 30580 }, { "epoch": 27.090345438441098, "grad_norm": 0.2156824767589569, "learning_rate": 1e-05, "loss": 0.9403, "step": 30585 }, { "epoch": 27.0947741364039, "grad_norm": 0.27501180768013, "learning_rate": 1e-05, "loss": 0.9879, "step": 30590 }, { "epoch": 27.099202834366697, "grad_norm": 0.23036827147006989, "learning_rate": 1e-05, "loss": 0.9508, "step": 30595 }, { "epoch": 27.103631532329494, "grad_norm": 0.23696354031562805, "learning_rate": 1e-05, "loss": 1.0656, "step": 30600 }, { "epoch": 27.108060230292296, "grad_norm": 0.23960939049720764, "learning_rate": 1e-05, "loss": 0.9659, "step": 30605 }, { "epoch": 27.112488928255093, "grad_norm": 0.24240179359912872, "learning_rate": 1e-05, "loss": 1.0534, "step": 30610 }, { "epoch": 27.11691762621789, "grad_norm": 0.21522845327854156, "learning_rate": 1e-05, "loss": 1.048, "step": 30615 }, { "epoch": 27.121346324180692, "grad_norm": 0.24755887687206268, "learning_rate": 1e-05, "loss": 1.0035, "step": 30620 }, { "epoch": 27.12577502214349, "grad_norm": 0.2615501880645752, "learning_rate": 1e-05, "loss": 0.9958, "step": 30625 }, { "epoch": 27.130203720106287, "grad_norm": 0.2601321339607239, "learning_rate": 1e-05, "loss": 1.0272, "step": 30630 }, { "epoch": 27.13463241806909, "grad_norm": 0.27466320991516113, "learning_rate": 1e-05, "loss": 0.9606, "step": 30635 }, { "epoch": 27.139061116031886, "grad_norm": 0.23908357322216034, "learning_rate": 1e-05, "loss": 0.978, "step": 30640 }, { "epoch": 27.143489813994684, "grad_norm": 0.28776228427886963, "learning_rate": 1e-05, "loss": 1.0012, "step": 30645 }, { "epoch": 27.147918511957485, "grad_norm": 0.20808722078800201, "learning_rate": 1e-05, "loss": 0.931, "step": 30650 }, { "epoch": 27.152347209920283, "grad_norm": 0.24358826875686646, "learning_rate": 1e-05, "loss": 1.0012, "step": 30655 }, { "epoch": 27.156775907883084, "grad_norm": 0.18493781983852386, "learning_rate": 1e-05, "loss": 1.0306, "step": 30660 }, { "epoch": 27.16120460584588, "grad_norm": 0.23946912586688995, "learning_rate": 1e-05, "loss": 1.0244, "step": 30665 }, { "epoch": 27.16563330380868, "grad_norm": 0.25911280512809753, "learning_rate": 1e-05, "loss": 0.9833, "step": 30670 }, { "epoch": 27.17006200177148, "grad_norm": 0.21719790995121002, "learning_rate": 1e-05, "loss": 0.9434, "step": 30675 }, { "epoch": 27.174490699734278, "grad_norm": 0.22320930659770966, "learning_rate": 1e-05, "loss": 1.0539, "step": 30680 }, { "epoch": 27.178919397697076, "grad_norm": 0.22360873222351074, "learning_rate": 1e-05, "loss": 0.9704, "step": 30685 }, { "epoch": 27.183348095659877, "grad_norm": 0.24945010244846344, "learning_rate": 1e-05, "loss": 0.9957, "step": 30690 }, { "epoch": 27.187776793622675, "grad_norm": 0.2389744222164154, "learning_rate": 1e-05, "loss": 0.9808, "step": 30695 }, { "epoch": 27.192205491585472, "grad_norm": 0.22741013765335083, "learning_rate": 1e-05, "loss": 1.0146, "step": 30700 }, { "epoch": 27.196634189548273, "grad_norm": 0.23818716406822205, "learning_rate": 1e-05, "loss": 0.9922, "step": 30705 }, { "epoch": 27.20106288751107, "grad_norm": 0.23088152706623077, "learning_rate": 1e-05, "loss": 1.0209, "step": 30710 }, { "epoch": 27.205491585473872, "grad_norm": 0.20602001249790192, "learning_rate": 1e-05, "loss": 1.0543, "step": 30715 }, { "epoch": 27.20992028343667, "grad_norm": 0.2460908591747284, "learning_rate": 1e-05, "loss": 0.9589, "step": 30720 }, { "epoch": 27.214348981399468, "grad_norm": 0.22483669221401215, "learning_rate": 1e-05, "loss": 0.9957, "step": 30725 }, { "epoch": 27.21877767936227, "grad_norm": 0.2405012995004654, "learning_rate": 1e-05, "loss": 0.9898, "step": 30730 }, { "epoch": 27.223206377325067, "grad_norm": 0.2231002002954483, "learning_rate": 1e-05, "loss": 0.9479, "step": 30735 }, { "epoch": 27.227635075287864, "grad_norm": 0.2702237367630005, "learning_rate": 1e-05, "loss": 0.983, "step": 30740 }, { "epoch": 27.232063773250665, "grad_norm": 0.27780887484550476, "learning_rate": 1e-05, "loss": 0.9434, "step": 30745 }, { "epoch": 27.236492471213463, "grad_norm": 0.2398044317960739, "learning_rate": 1e-05, "loss": 1.0023, "step": 30750 }, { "epoch": 27.24092116917626, "grad_norm": 0.21106013655662537, "learning_rate": 1e-05, "loss": 0.9586, "step": 30755 }, { "epoch": 27.245349867139062, "grad_norm": 0.2806750237941742, "learning_rate": 1e-05, "loss": 1.0571, "step": 30760 }, { "epoch": 27.24977856510186, "grad_norm": 0.3151966333389282, "learning_rate": 1e-05, "loss": 0.9939, "step": 30765 }, { "epoch": 27.254207263064657, "grad_norm": 0.23356717824935913, "learning_rate": 1e-05, "loss": 0.9398, "step": 30770 }, { "epoch": 27.25863596102746, "grad_norm": 0.26284605264663696, "learning_rate": 1e-05, "loss": 1.0577, "step": 30775 }, { "epoch": 27.263064658990256, "grad_norm": 0.2558484673500061, "learning_rate": 1e-05, "loss": 0.9423, "step": 30780 }, { "epoch": 27.267493356953057, "grad_norm": 0.239601731300354, "learning_rate": 1e-05, "loss": 1.0183, "step": 30785 }, { "epoch": 27.271922054915855, "grad_norm": 0.2502806484699249, "learning_rate": 1e-05, "loss": 1.0057, "step": 30790 }, { "epoch": 27.276350752878653, "grad_norm": 0.22724245488643646, "learning_rate": 1e-05, "loss": 0.989, "step": 30795 }, { "epoch": 27.280779450841454, "grad_norm": 0.22719928622245789, "learning_rate": 1e-05, "loss": 1.0205, "step": 30800 }, { "epoch": 27.28520814880425, "grad_norm": 0.2547604739665985, "learning_rate": 1e-05, "loss": 1.0107, "step": 30805 }, { "epoch": 27.28963684676705, "grad_norm": 0.23706574738025665, "learning_rate": 1e-05, "loss": 0.9947, "step": 30810 }, { "epoch": 27.29406554472985, "grad_norm": 0.2269642949104309, "learning_rate": 1e-05, "loss": 0.966, "step": 30815 }, { "epoch": 27.298494242692648, "grad_norm": 0.21294556558132172, "learning_rate": 1e-05, "loss": 1.0224, "step": 30820 }, { "epoch": 27.302922940655446, "grad_norm": 0.23332804441452026, "learning_rate": 1e-05, "loss": 0.9863, "step": 30825 }, { "epoch": 27.307351638618247, "grad_norm": 0.2975403964519501, "learning_rate": 1e-05, "loss": 0.9824, "step": 30830 }, { "epoch": 27.311780336581045, "grad_norm": 0.22429126501083374, "learning_rate": 1e-05, "loss": 0.9436, "step": 30835 }, { "epoch": 27.316209034543846, "grad_norm": 0.2342357337474823, "learning_rate": 1e-05, "loss": 0.9299, "step": 30840 }, { "epoch": 27.320637732506643, "grad_norm": 0.2459591180086136, "learning_rate": 1e-05, "loss": 0.9968, "step": 30845 }, { "epoch": 27.32506643046944, "grad_norm": 0.21968969702720642, "learning_rate": 1e-05, "loss": 0.987, "step": 30850 }, { "epoch": 27.329495128432242, "grad_norm": 0.25242751836776733, "learning_rate": 1e-05, "loss": 0.9622, "step": 30855 }, { "epoch": 27.33392382639504, "grad_norm": 0.29921382665634155, "learning_rate": 1e-05, "loss": 0.9933, "step": 30860 }, { "epoch": 27.338352524357838, "grad_norm": 0.26967331767082214, "learning_rate": 1e-05, "loss": 0.9944, "step": 30865 }, { "epoch": 27.34278122232064, "grad_norm": 0.29027464985847473, "learning_rate": 1e-05, "loss": 1.0089, "step": 30870 }, { "epoch": 27.347209920283436, "grad_norm": 0.2600877285003662, "learning_rate": 1e-05, "loss": 1.0114, "step": 30875 }, { "epoch": 27.351638618246234, "grad_norm": 0.24870745837688446, "learning_rate": 1e-05, "loss": 0.991, "step": 30880 }, { "epoch": 27.356067316209035, "grad_norm": 0.2901571989059448, "learning_rate": 1e-05, "loss": 0.9839, "step": 30885 }, { "epoch": 27.360496014171833, "grad_norm": 0.2893698513507843, "learning_rate": 1e-05, "loss": 0.9971, "step": 30890 }, { "epoch": 27.36492471213463, "grad_norm": 0.2636055052280426, "learning_rate": 1e-05, "loss": 1.02, "step": 30895 }, { "epoch": 27.369353410097432, "grad_norm": 0.2832021117210388, "learning_rate": 1e-05, "loss": 1.0002, "step": 30900 }, { "epoch": 27.37378210806023, "grad_norm": 0.23784591257572174, "learning_rate": 1e-05, "loss": 0.9951, "step": 30905 }, { "epoch": 27.37821080602303, "grad_norm": 0.23933926224708557, "learning_rate": 1e-05, "loss": 0.9793, "step": 30910 }, { "epoch": 27.38263950398583, "grad_norm": 0.20947566628456116, "learning_rate": 1e-05, "loss": 1.0974, "step": 30915 }, { "epoch": 27.387068201948626, "grad_norm": 0.23381993174552917, "learning_rate": 1e-05, "loss": 0.9388, "step": 30920 }, { "epoch": 27.391496899911427, "grad_norm": 0.24552865326404572, "learning_rate": 1e-05, "loss": 0.9922, "step": 30925 }, { "epoch": 27.395925597874225, "grad_norm": 0.24369806051254272, "learning_rate": 1e-05, "loss": 1.0151, "step": 30930 }, { "epoch": 27.400354295837023, "grad_norm": 0.2227746844291687, "learning_rate": 1e-05, "loss": 0.9952, "step": 30935 }, { "epoch": 27.404782993799824, "grad_norm": 0.2737066149711609, "learning_rate": 1e-05, "loss": 1.0201, "step": 30940 }, { "epoch": 27.40921169176262, "grad_norm": 0.2747707962989807, "learning_rate": 1e-05, "loss": 0.9882, "step": 30945 }, { "epoch": 27.41364038972542, "grad_norm": 0.1957477331161499, "learning_rate": 1e-05, "loss": 0.9843, "step": 30950 }, { "epoch": 27.41806908768822, "grad_norm": 0.2482810616493225, "learning_rate": 1e-05, "loss": 0.956, "step": 30955 }, { "epoch": 27.422497785651018, "grad_norm": 0.24321091175079346, "learning_rate": 1e-05, "loss": 0.9602, "step": 30960 }, { "epoch": 27.42692648361382, "grad_norm": 0.2579958438873291, "learning_rate": 1e-05, "loss": 0.9747, "step": 30965 }, { "epoch": 27.431355181576617, "grad_norm": 0.20409387350082397, "learning_rate": 1e-05, "loss": 0.9365, "step": 30970 }, { "epoch": 27.435783879539414, "grad_norm": 0.23095521330833435, "learning_rate": 1e-05, "loss": 1.0027, "step": 30975 }, { "epoch": 27.440212577502216, "grad_norm": 0.22307327389717102, "learning_rate": 1e-05, "loss": 0.9791, "step": 30980 }, { "epoch": 27.444641275465013, "grad_norm": 0.23792791366577148, "learning_rate": 1e-05, "loss": 0.9097, "step": 30985 }, { "epoch": 27.44906997342781, "grad_norm": 0.23176367580890656, "learning_rate": 1e-05, "loss": 1.0227, "step": 30990 }, { "epoch": 27.453498671390612, "grad_norm": 0.2149556428194046, "learning_rate": 1e-05, "loss": 0.9833, "step": 30995 }, { "epoch": 27.45792736935341, "grad_norm": 0.21995078027248383, "learning_rate": 1e-05, "loss": 0.9803, "step": 31000 }, { "epoch": 27.462356067316207, "grad_norm": 0.2358456552028656, "learning_rate": 1e-05, "loss": 0.9896, "step": 31005 }, { "epoch": 27.46678476527901, "grad_norm": 0.25158458948135376, "learning_rate": 1e-05, "loss": 0.9541, "step": 31010 }, { "epoch": 27.471213463241806, "grad_norm": 0.23530571162700653, "learning_rate": 1e-05, "loss": 0.9959, "step": 31015 }, { "epoch": 27.475642161204608, "grad_norm": 0.24893873929977417, "learning_rate": 1e-05, "loss": 1.0249, "step": 31020 }, { "epoch": 27.480070859167405, "grad_norm": 0.23119038343429565, "learning_rate": 1e-05, "loss": 0.9312, "step": 31025 }, { "epoch": 27.484499557130203, "grad_norm": 0.21887554228305817, "learning_rate": 1e-05, "loss": 1.0072, "step": 31030 }, { "epoch": 27.488928255093004, "grad_norm": 0.258976012468338, "learning_rate": 1e-05, "loss": 0.9994, "step": 31035 }, { "epoch": 27.4933569530558, "grad_norm": 0.2321832776069641, "learning_rate": 1e-05, "loss": 0.974, "step": 31040 }, { "epoch": 27.4977856510186, "grad_norm": 0.22107261419296265, "learning_rate": 1e-05, "loss": 0.9535, "step": 31045 }, { "epoch": 27.5022143489814, "grad_norm": 0.2509700357913971, "learning_rate": 1e-05, "loss": 0.9812, "step": 31050 }, { "epoch": 27.5066430469442, "grad_norm": 0.31972596049308777, "learning_rate": 1e-05, "loss": 0.934, "step": 31055 }, { "epoch": 27.511071744906996, "grad_norm": 0.2478359490633011, "learning_rate": 1e-05, "loss": 0.9699, "step": 31060 }, { "epoch": 27.515500442869797, "grad_norm": 0.24427036941051483, "learning_rate": 1e-05, "loss": 0.9687, "step": 31065 }, { "epoch": 27.519929140832595, "grad_norm": 0.2811807096004486, "learning_rate": 1e-05, "loss": 0.9992, "step": 31070 }, { "epoch": 27.524357838795392, "grad_norm": 0.23756143450737, "learning_rate": 1e-05, "loss": 0.9657, "step": 31075 }, { "epoch": 27.528786536758194, "grad_norm": 0.2270578145980835, "learning_rate": 1e-05, "loss": 0.9779, "step": 31080 }, { "epoch": 27.53321523472099, "grad_norm": 0.24863387644290924, "learning_rate": 1e-05, "loss": 0.9739, "step": 31085 }, { "epoch": 27.537643932683793, "grad_norm": 0.23438431322574615, "learning_rate": 1e-05, "loss": 0.9684, "step": 31090 }, { "epoch": 27.54207263064659, "grad_norm": 0.29311737418174744, "learning_rate": 1e-05, "loss": 0.9774, "step": 31095 }, { "epoch": 27.546501328609388, "grad_norm": 0.23446708917617798, "learning_rate": 1e-05, "loss": 0.9889, "step": 31100 }, { "epoch": 27.55093002657219, "grad_norm": 0.23869706690311432, "learning_rate": 1e-05, "loss": 0.953, "step": 31105 }, { "epoch": 27.555358724534987, "grad_norm": 0.2336687445640564, "learning_rate": 1e-05, "loss": 0.9852, "step": 31110 }, { "epoch": 27.559787422497784, "grad_norm": 0.28089210391044617, "learning_rate": 1e-05, "loss": 0.9916, "step": 31115 }, { "epoch": 27.564216120460586, "grad_norm": 0.23779523372650146, "learning_rate": 1e-05, "loss": 1.026, "step": 31120 }, { "epoch": 27.568644818423383, "grad_norm": 0.22791455686092377, "learning_rate": 1e-05, "loss": 0.9954, "step": 31125 }, { "epoch": 27.57307351638618, "grad_norm": 0.26404720544815063, "learning_rate": 1e-05, "loss": 0.9759, "step": 31130 }, { "epoch": 27.577502214348982, "grad_norm": 0.30810046195983887, "learning_rate": 1e-05, "loss": 0.9893, "step": 31135 }, { "epoch": 27.58193091231178, "grad_norm": 0.2704094648361206, "learning_rate": 1e-05, "loss": 0.9306, "step": 31140 }, { "epoch": 27.58635961027458, "grad_norm": 0.2914482355117798, "learning_rate": 1e-05, "loss": 0.9781, "step": 31145 }, { "epoch": 27.59078830823738, "grad_norm": 0.23683074116706848, "learning_rate": 1e-05, "loss": 0.9717, "step": 31150 }, { "epoch": 27.595217006200176, "grad_norm": 0.22593069076538086, "learning_rate": 1e-05, "loss": 1.0067, "step": 31155 }, { "epoch": 27.599645704162977, "grad_norm": 0.23497292399406433, "learning_rate": 1e-05, "loss": 1.0043, "step": 31160 }, { "epoch": 27.604074402125775, "grad_norm": 0.22512322664260864, "learning_rate": 1e-05, "loss": 1.0225, "step": 31165 }, { "epoch": 27.608503100088573, "grad_norm": 0.22328370809555054, "learning_rate": 1e-05, "loss": 1.0325, "step": 31170 }, { "epoch": 27.612931798051374, "grad_norm": 0.2554346024990082, "learning_rate": 1e-05, "loss": 1.0485, "step": 31175 }, { "epoch": 27.61736049601417, "grad_norm": 0.23263664543628693, "learning_rate": 1e-05, "loss": 1.0211, "step": 31180 }, { "epoch": 27.62178919397697, "grad_norm": 0.19805167615413666, "learning_rate": 1e-05, "loss": 1.019, "step": 31185 }, { "epoch": 27.62621789193977, "grad_norm": 0.2554255723953247, "learning_rate": 1e-05, "loss": 0.9837, "step": 31190 }, { "epoch": 27.630646589902568, "grad_norm": 0.24750465154647827, "learning_rate": 1e-05, "loss": 1.0274, "step": 31195 }, { "epoch": 27.63507528786537, "grad_norm": 0.23358836770057678, "learning_rate": 1e-05, "loss": 0.9829, "step": 31200 }, { "epoch": 27.639503985828167, "grad_norm": 0.23879177868366241, "learning_rate": 1e-05, "loss": 0.9303, "step": 31205 }, { "epoch": 27.643932683790965, "grad_norm": 0.26125022768974304, "learning_rate": 1e-05, "loss": 0.9998, "step": 31210 }, { "epoch": 27.648361381753766, "grad_norm": 0.2203727662563324, "learning_rate": 1e-05, "loss": 0.9712, "step": 31215 }, { "epoch": 27.652790079716564, "grad_norm": 0.2464284747838974, "learning_rate": 1e-05, "loss": 0.9358, "step": 31220 }, { "epoch": 27.65721877767936, "grad_norm": 0.22900137305259705, "learning_rate": 1e-05, "loss": 1.0085, "step": 31225 }, { "epoch": 27.661647475642162, "grad_norm": 0.2325325459241867, "learning_rate": 1e-05, "loss": 0.9702, "step": 31230 }, { "epoch": 27.66607617360496, "grad_norm": 0.2320452481508255, "learning_rate": 1e-05, "loss": 0.9946, "step": 31235 }, { "epoch": 27.670504871567758, "grad_norm": 0.23795297741889954, "learning_rate": 1e-05, "loss": 0.9962, "step": 31240 }, { "epoch": 27.67493356953056, "grad_norm": 0.2289896011352539, "learning_rate": 1e-05, "loss": 0.9625, "step": 31245 }, { "epoch": 27.679362267493357, "grad_norm": 0.20696084201335907, "learning_rate": 1e-05, "loss": 0.9576, "step": 31250 }, { "epoch": 27.683790965456154, "grad_norm": 0.2510433495044708, "learning_rate": 1e-05, "loss": 0.9899, "step": 31255 }, { "epoch": 27.688219663418955, "grad_norm": 0.2481033354997635, "learning_rate": 1e-05, "loss": 0.9811, "step": 31260 }, { "epoch": 27.692648361381753, "grad_norm": 0.2543386220932007, "learning_rate": 1e-05, "loss": 1.0135, "step": 31265 }, { "epoch": 27.697077059344554, "grad_norm": 0.25980255007743835, "learning_rate": 1e-05, "loss": 0.9984, "step": 31270 }, { "epoch": 27.701505757307352, "grad_norm": 0.2612070143222809, "learning_rate": 1e-05, "loss": 0.9985, "step": 31275 }, { "epoch": 27.70593445527015, "grad_norm": 0.23598235845565796, "learning_rate": 1e-05, "loss": 0.9524, "step": 31280 }, { "epoch": 27.71036315323295, "grad_norm": 0.2432212233543396, "learning_rate": 1e-05, "loss": 0.958, "step": 31285 }, { "epoch": 27.71479185119575, "grad_norm": 0.37720274925231934, "learning_rate": 1e-05, "loss": 0.9666, "step": 31290 }, { "epoch": 27.719220549158546, "grad_norm": 0.276174396276474, "learning_rate": 1e-05, "loss": 0.9763, "step": 31295 }, { "epoch": 27.723649247121347, "grad_norm": 0.2447313517332077, "learning_rate": 1e-05, "loss": 0.9361, "step": 31300 }, { "epoch": 27.728077945084145, "grad_norm": 0.2556127607822418, "learning_rate": 1e-05, "loss": 0.9705, "step": 31305 }, { "epoch": 27.732506643046943, "grad_norm": 0.28501713275909424, "learning_rate": 1e-05, "loss": 0.971, "step": 31310 }, { "epoch": 27.736935341009744, "grad_norm": 0.25870856642723083, "learning_rate": 1e-05, "loss": 1.0121, "step": 31315 }, { "epoch": 27.74136403897254, "grad_norm": 0.21241873502731323, "learning_rate": 1e-05, "loss": 0.9219, "step": 31320 }, { "epoch": 27.745792736935343, "grad_norm": 0.25511202216148376, "learning_rate": 1e-05, "loss": 0.9939, "step": 31325 }, { "epoch": 27.75022143489814, "grad_norm": 0.29273536801338196, "learning_rate": 1e-05, "loss": 0.9524, "step": 31330 }, { "epoch": 27.754650132860938, "grad_norm": 0.23387330770492554, "learning_rate": 1e-05, "loss": 0.9927, "step": 31335 }, { "epoch": 27.75907883082374, "grad_norm": 0.21892881393432617, "learning_rate": 1e-05, "loss": 0.9986, "step": 31340 }, { "epoch": 27.763507528786537, "grad_norm": 0.2555164694786072, "learning_rate": 1e-05, "loss": 0.977, "step": 31345 }, { "epoch": 27.767936226749335, "grad_norm": 0.23252488672733307, "learning_rate": 1e-05, "loss": 0.9935, "step": 31350 }, { "epoch": 27.772364924712136, "grad_norm": 0.24156518280506134, "learning_rate": 1e-05, "loss": 1.0252, "step": 31355 }, { "epoch": 27.776793622674933, "grad_norm": 0.20946422219276428, "learning_rate": 1e-05, "loss": 1.0392, "step": 31360 }, { "epoch": 27.78122232063773, "grad_norm": 0.24066127836704254, "learning_rate": 1e-05, "loss": 1.0006, "step": 31365 }, { "epoch": 27.785651018600532, "grad_norm": 0.2086721807718277, "learning_rate": 1e-05, "loss": 0.9425, "step": 31370 }, { "epoch": 27.79007971656333, "grad_norm": 0.2736165225505829, "learning_rate": 1e-05, "loss": 0.9764, "step": 31375 }, { "epoch": 27.794508414526128, "grad_norm": 0.24205589294433594, "learning_rate": 1e-05, "loss": 1.0053, "step": 31380 }, { "epoch": 27.79893711248893, "grad_norm": 0.23163186013698578, "learning_rate": 1e-05, "loss": 1.017, "step": 31385 }, { "epoch": 27.803365810451727, "grad_norm": 0.24665170907974243, "learning_rate": 1e-05, "loss": 0.9942, "step": 31390 }, { "epoch": 27.807794508414528, "grad_norm": 0.22288623452186584, "learning_rate": 1e-05, "loss": 0.9963, "step": 31395 }, { "epoch": 27.812223206377325, "grad_norm": 0.2558608949184418, "learning_rate": 1e-05, "loss": 1.0295, "step": 31400 }, { "epoch": 27.816651904340123, "grad_norm": 0.22187495231628418, "learning_rate": 1e-05, "loss": 1.0049, "step": 31405 }, { "epoch": 27.821080602302924, "grad_norm": 0.2203754037618637, "learning_rate": 1e-05, "loss": 0.9971, "step": 31410 }, { "epoch": 27.825509300265722, "grad_norm": 0.22384601831436157, "learning_rate": 1e-05, "loss": 1.0548, "step": 31415 }, { "epoch": 27.82993799822852, "grad_norm": 0.26528772711753845, "learning_rate": 1e-05, "loss": 1.0441, "step": 31420 }, { "epoch": 27.83436669619132, "grad_norm": 0.2644975185394287, "learning_rate": 1e-05, "loss": 0.9702, "step": 31425 }, { "epoch": 27.83879539415412, "grad_norm": 0.3055729866027832, "learning_rate": 1e-05, "loss": 1.0072, "step": 31430 }, { "epoch": 27.843224092116916, "grad_norm": 0.2652888894081116, "learning_rate": 1e-05, "loss": 1.0467, "step": 31435 }, { "epoch": 27.847652790079717, "grad_norm": 0.21909134089946747, "learning_rate": 1e-05, "loss": 1.009, "step": 31440 }, { "epoch": 27.852081488042515, "grad_norm": 0.29096031188964844, "learning_rate": 1e-05, "loss": 0.9739, "step": 31445 }, { "epoch": 27.856510186005316, "grad_norm": 0.25493115186691284, "learning_rate": 1e-05, "loss": 0.9568, "step": 31450 }, { "epoch": 27.860938883968114, "grad_norm": 0.2630530893802643, "learning_rate": 1e-05, "loss": 0.9836, "step": 31455 }, { "epoch": 27.86536758193091, "grad_norm": 0.250218003988266, "learning_rate": 1e-05, "loss": 0.9567, "step": 31460 }, { "epoch": 27.869796279893713, "grad_norm": 0.2662123441696167, "learning_rate": 1e-05, "loss": 0.9694, "step": 31465 }, { "epoch": 27.87422497785651, "grad_norm": 0.21547740697860718, "learning_rate": 1e-05, "loss": 0.9918, "step": 31470 }, { "epoch": 27.878653675819308, "grad_norm": 0.2514686584472656, "learning_rate": 1e-05, "loss": 1.0058, "step": 31475 }, { "epoch": 27.88308237378211, "grad_norm": 0.2314177006483078, "learning_rate": 1e-05, "loss": 0.9838, "step": 31480 }, { "epoch": 27.887511071744907, "grad_norm": 0.23453478515148163, "learning_rate": 1e-05, "loss": 1.0227, "step": 31485 }, { "epoch": 27.891939769707704, "grad_norm": 0.2426118701696396, "learning_rate": 1e-05, "loss": 0.9921, "step": 31490 }, { "epoch": 27.896368467670506, "grad_norm": 0.3054236173629761, "learning_rate": 1e-05, "loss": 0.9953, "step": 31495 }, { "epoch": 27.900797165633303, "grad_norm": 0.25926703214645386, "learning_rate": 1e-05, "loss": 0.9242, "step": 31500 }, { "epoch": 27.9052258635961, "grad_norm": 0.26455721259117126, "learning_rate": 1e-05, "loss": 0.9992, "step": 31505 }, { "epoch": 27.909654561558902, "grad_norm": 0.27374276518821716, "learning_rate": 1e-05, "loss": 0.9564, "step": 31510 }, { "epoch": 27.9140832595217, "grad_norm": 0.26286399364471436, "learning_rate": 1e-05, "loss": 0.9878, "step": 31515 }, { "epoch": 27.9185119574845, "grad_norm": 0.25271520018577576, "learning_rate": 1e-05, "loss": 1.0142, "step": 31520 }, { "epoch": 27.9229406554473, "grad_norm": 0.2456914186477661, "learning_rate": 1e-05, "loss": 1.0598, "step": 31525 }, { "epoch": 27.927369353410096, "grad_norm": 0.2313370555639267, "learning_rate": 1e-05, "loss": 0.9754, "step": 31530 }, { "epoch": 27.931798051372898, "grad_norm": 0.23602987825870514, "learning_rate": 1e-05, "loss": 0.9772, "step": 31535 }, { "epoch": 27.936226749335695, "grad_norm": 0.2285691648721695, "learning_rate": 1e-05, "loss": 1.0227, "step": 31540 }, { "epoch": 27.940655447298493, "grad_norm": 0.2799905240535736, "learning_rate": 1e-05, "loss": 1.0051, "step": 31545 }, { "epoch": 27.945084145261294, "grad_norm": 0.20500244200229645, "learning_rate": 1e-05, "loss": 0.9687, "step": 31550 }, { "epoch": 27.949512843224092, "grad_norm": 0.24910742044448853, "learning_rate": 1e-05, "loss": 1.0392, "step": 31555 }, { "epoch": 27.95394154118689, "grad_norm": 0.2464452087879181, "learning_rate": 1e-05, "loss": 0.9838, "step": 31560 }, { "epoch": 27.95837023914969, "grad_norm": 0.25134047865867615, "learning_rate": 1e-05, "loss": 0.9684, "step": 31565 }, { "epoch": 27.96279893711249, "grad_norm": 0.2531101107597351, "learning_rate": 1e-05, "loss": 0.9813, "step": 31570 }, { "epoch": 27.96722763507529, "grad_norm": 0.23719653487205505, "learning_rate": 1e-05, "loss": 1.0059, "step": 31575 }, { "epoch": 27.971656333038087, "grad_norm": 0.20806440711021423, "learning_rate": 1e-05, "loss": 0.9851, "step": 31580 }, { "epoch": 27.976085031000885, "grad_norm": 0.28420737385749817, "learning_rate": 1e-05, "loss": 0.9529, "step": 31585 }, { "epoch": 27.980513728963686, "grad_norm": 0.22390660643577576, "learning_rate": 1e-05, "loss": 0.9375, "step": 31590 }, { "epoch": 27.984942426926484, "grad_norm": 0.25261765718460083, "learning_rate": 1e-05, "loss": 0.9028, "step": 31595 }, { "epoch": 27.98937112488928, "grad_norm": 0.23809413611888885, "learning_rate": 1e-05, "loss": 0.9761, "step": 31600 }, { "epoch": 27.993799822852083, "grad_norm": 0.22561772167682648, "learning_rate": 1e-05, "loss": 0.9451, "step": 31605 }, { "epoch": 27.99822852081488, "grad_norm": 0.25934693217277527, "learning_rate": 1e-05, "loss": 0.9907, "step": 31610 }, { "epoch": 28.002657218777678, "grad_norm": 0.23673613369464874, "learning_rate": 1e-05, "loss": 0.9504, "step": 31615 }, { "epoch": 28.00708591674048, "grad_norm": 0.18960891664028168, "learning_rate": 1e-05, "loss": 0.9545, "step": 31620 }, { "epoch": 28.011514614703277, "grad_norm": 0.23437142372131348, "learning_rate": 1e-05, "loss": 1.0102, "step": 31625 }, { "epoch": 28.015943312666074, "grad_norm": 0.22854219377040863, "learning_rate": 1e-05, "loss": 1.0332, "step": 31630 }, { "epoch": 28.020372010628876, "grad_norm": 0.2848300337791443, "learning_rate": 1e-05, "loss": 0.9629, "step": 31635 }, { "epoch": 28.024800708591673, "grad_norm": 0.280914843082428, "learning_rate": 1e-05, "loss": 1.0204, "step": 31640 }, { "epoch": 28.029229406554474, "grad_norm": 0.2586367130279541, "learning_rate": 1e-05, "loss": 0.9353, "step": 31645 }, { "epoch": 28.033658104517272, "grad_norm": 0.26630550622940063, "learning_rate": 1e-05, "loss": 1.0089, "step": 31650 }, { "epoch": 28.03808680248007, "grad_norm": 0.2387808859348297, "learning_rate": 1e-05, "loss": 1.0523, "step": 31655 }, { "epoch": 28.04251550044287, "grad_norm": 0.2943151295185089, "learning_rate": 1e-05, "loss": 1.0109, "step": 31660 }, { "epoch": 28.04694419840567, "grad_norm": 0.28493833541870117, "learning_rate": 1e-05, "loss": 0.9665, "step": 31665 }, { "epoch": 28.051372896368466, "grad_norm": 0.2756597399711609, "learning_rate": 1e-05, "loss": 0.9404, "step": 31670 }, { "epoch": 28.055801594331268, "grad_norm": 0.2798602879047394, "learning_rate": 1e-05, "loss": 1.0266, "step": 31675 }, { "epoch": 28.060230292294065, "grad_norm": 0.2521456182003021, "learning_rate": 1e-05, "loss": 1.003, "step": 31680 }, { "epoch": 28.064658990256863, "grad_norm": 0.20207074284553528, "learning_rate": 1e-05, "loss": 1.0216, "step": 31685 }, { "epoch": 28.069087688219664, "grad_norm": 0.2529992461204529, "learning_rate": 1e-05, "loss": 0.9823, "step": 31690 }, { "epoch": 28.07351638618246, "grad_norm": 0.3253951370716095, "learning_rate": 1e-05, "loss": 0.9698, "step": 31695 }, { "epoch": 28.077945084145263, "grad_norm": 0.300656795501709, "learning_rate": 1e-05, "loss": 1.0453, "step": 31700 }, { "epoch": 28.08237378210806, "grad_norm": 0.25386694073677063, "learning_rate": 1e-05, "loss": 0.9951, "step": 31705 }, { "epoch": 28.086802480070858, "grad_norm": 0.25687623023986816, "learning_rate": 1e-05, "loss": 1.0177, "step": 31710 }, { "epoch": 28.09123117803366, "grad_norm": 0.2788044214248657, "learning_rate": 1e-05, "loss": 0.9776, "step": 31715 }, { "epoch": 28.095659875996457, "grad_norm": 0.2612397372722626, "learning_rate": 1e-05, "loss": 0.9799, "step": 31720 }, { "epoch": 28.100088573959255, "grad_norm": 0.22717978060245514, "learning_rate": 1e-05, "loss": 0.9466, "step": 31725 }, { "epoch": 28.104517271922056, "grad_norm": 0.27017271518707275, "learning_rate": 1e-05, "loss": 1.0038, "step": 31730 }, { "epoch": 28.108945969884854, "grad_norm": 0.24155868589878082, "learning_rate": 1e-05, "loss": 0.9712, "step": 31735 }, { "epoch": 28.11337466784765, "grad_norm": 0.2399553507566452, "learning_rate": 1e-05, "loss": 1.0352, "step": 31740 }, { "epoch": 28.117803365810452, "grad_norm": 0.24093075096607208, "learning_rate": 1e-05, "loss": 1.0196, "step": 31745 }, { "epoch": 28.12223206377325, "grad_norm": 0.21842342615127563, "learning_rate": 1e-05, "loss": 0.9952, "step": 31750 }, { "epoch": 28.12666076173605, "grad_norm": 0.26092880964279175, "learning_rate": 1e-05, "loss": 0.9652, "step": 31755 }, { "epoch": 28.13108945969885, "grad_norm": 0.22773610055446625, "learning_rate": 1e-05, "loss": 0.9722, "step": 31760 }, { "epoch": 28.135518157661647, "grad_norm": 0.305690735578537, "learning_rate": 1e-05, "loss": 0.9399, "step": 31765 }, { "epoch": 28.139946855624448, "grad_norm": 0.2465287297964096, "learning_rate": 1e-05, "loss": 1.0374, "step": 31770 }, { "epoch": 28.144375553587246, "grad_norm": 0.2904472053050995, "learning_rate": 1e-05, "loss": 0.9975, "step": 31775 }, { "epoch": 28.148804251550043, "grad_norm": 0.28554725646972656, "learning_rate": 1e-05, "loss": 1.0166, "step": 31780 }, { "epoch": 28.153232949512844, "grad_norm": 0.2604232728481293, "learning_rate": 1e-05, "loss": 0.9895, "step": 31785 }, { "epoch": 28.157661647475642, "grad_norm": 0.2199588119983673, "learning_rate": 1e-05, "loss": 0.9954, "step": 31790 }, { "epoch": 28.16209034543844, "grad_norm": 0.21487219631671906, "learning_rate": 1e-05, "loss": 1.0226, "step": 31795 }, { "epoch": 28.16651904340124, "grad_norm": 0.23685072362422943, "learning_rate": 1e-05, "loss": 0.952, "step": 31800 }, { "epoch": 28.17094774136404, "grad_norm": 0.21897755563259125, "learning_rate": 1e-05, "loss": 1.0044, "step": 31805 }, { "epoch": 28.175376439326836, "grad_norm": 0.20956380665302277, "learning_rate": 1e-05, "loss": 0.984, "step": 31810 }, { "epoch": 28.179805137289637, "grad_norm": 0.25374090671539307, "learning_rate": 1e-05, "loss": 0.9613, "step": 31815 }, { "epoch": 28.184233835252435, "grad_norm": 0.23682576417922974, "learning_rate": 1e-05, "loss": 1.0491, "step": 31820 }, { "epoch": 28.188662533215236, "grad_norm": 0.2301643043756485, "learning_rate": 1e-05, "loss": 1.0099, "step": 31825 }, { "epoch": 28.193091231178034, "grad_norm": 0.2204231172800064, "learning_rate": 1e-05, "loss": 0.9533, "step": 31830 }, { "epoch": 28.19751992914083, "grad_norm": 0.23439417779445648, "learning_rate": 1e-05, "loss": 1.0114, "step": 31835 }, { "epoch": 28.201948627103633, "grad_norm": 0.24330754578113556, "learning_rate": 1e-05, "loss": 0.9903, "step": 31840 }, { "epoch": 28.20637732506643, "grad_norm": 0.23869043588638306, "learning_rate": 1e-05, "loss": 0.9775, "step": 31845 }, { "epoch": 28.210806023029228, "grad_norm": 0.2518260180950165, "learning_rate": 1e-05, "loss": 0.9457, "step": 31850 }, { "epoch": 28.21523472099203, "grad_norm": 0.2115086168050766, "learning_rate": 1e-05, "loss": 0.9931, "step": 31855 }, { "epoch": 28.219663418954827, "grad_norm": 0.23373179137706757, "learning_rate": 1e-05, "loss": 1.0077, "step": 31860 }, { "epoch": 28.224092116917625, "grad_norm": 0.23405955731868744, "learning_rate": 1e-05, "loss": 0.9456, "step": 31865 }, { "epoch": 28.228520814880426, "grad_norm": 0.21524518728256226, "learning_rate": 1e-05, "loss": 0.9951, "step": 31870 }, { "epoch": 28.232949512843224, "grad_norm": 0.23748517036437988, "learning_rate": 1e-05, "loss": 1.0461, "step": 31875 }, { "epoch": 28.237378210806025, "grad_norm": 0.2518925070762634, "learning_rate": 1e-05, "loss": 1.0291, "step": 31880 }, { "epoch": 28.241806908768822, "grad_norm": 0.21329110860824585, "learning_rate": 1e-05, "loss": 0.9786, "step": 31885 }, { "epoch": 28.24623560673162, "grad_norm": 0.2637166380882263, "learning_rate": 1e-05, "loss": 0.95, "step": 31890 }, { "epoch": 28.25066430469442, "grad_norm": 0.252742737531662, "learning_rate": 1e-05, "loss": 0.9921, "step": 31895 }, { "epoch": 28.25509300265722, "grad_norm": 0.2738778293132782, "learning_rate": 1e-05, "loss": 0.9504, "step": 31900 }, { "epoch": 28.259521700620017, "grad_norm": 0.21903753280639648, "learning_rate": 1e-05, "loss": 1.0132, "step": 31905 }, { "epoch": 28.263950398582818, "grad_norm": 0.2650688588619232, "learning_rate": 1e-05, "loss": 1.0396, "step": 31910 }, { "epoch": 28.268379096545615, "grad_norm": 0.25602075457572937, "learning_rate": 1e-05, "loss": 1.0443, "step": 31915 }, { "epoch": 28.272807794508413, "grad_norm": 0.24006669223308563, "learning_rate": 1e-05, "loss": 0.9894, "step": 31920 }, { "epoch": 28.277236492471214, "grad_norm": 0.21383509039878845, "learning_rate": 1e-05, "loss": 1.0054, "step": 31925 }, { "epoch": 28.281665190434012, "grad_norm": 0.20268988609313965, "learning_rate": 1e-05, "loss": 1.003, "step": 31930 }, { "epoch": 28.28609388839681, "grad_norm": 0.23327352106571198, "learning_rate": 1e-05, "loss": 1.013, "step": 31935 }, { "epoch": 28.29052258635961, "grad_norm": 0.23685763776302338, "learning_rate": 1e-05, "loss": 0.9477, "step": 31940 }, { "epoch": 28.29495128432241, "grad_norm": 0.2382057011127472, "learning_rate": 1e-05, "loss": 1.0354, "step": 31945 }, { "epoch": 28.29937998228521, "grad_norm": 0.21725286543369293, "learning_rate": 1e-05, "loss": 1.0138, "step": 31950 }, { "epoch": 28.303808680248007, "grad_norm": 0.2097073495388031, "learning_rate": 1e-05, "loss": 1.0125, "step": 31955 }, { "epoch": 28.308237378210805, "grad_norm": 0.21233265101909637, "learning_rate": 1e-05, "loss": 1.012, "step": 31960 }, { "epoch": 28.312666076173606, "grad_norm": 0.2257704734802246, "learning_rate": 1e-05, "loss": 0.9746, "step": 31965 }, { "epoch": 28.317094774136404, "grad_norm": 0.23280641436576843, "learning_rate": 1e-05, "loss": 0.9993, "step": 31970 }, { "epoch": 28.3215234720992, "grad_norm": 0.22488297522068024, "learning_rate": 1e-05, "loss": 0.9755, "step": 31975 }, { "epoch": 28.325952170062003, "grad_norm": 0.27357837557792664, "learning_rate": 1e-05, "loss": 1.03, "step": 31980 }, { "epoch": 28.3303808680248, "grad_norm": 0.2501538097858429, "learning_rate": 1e-05, "loss": 0.9273, "step": 31985 }, { "epoch": 28.334809565987598, "grad_norm": 0.2617991864681244, "learning_rate": 1e-05, "loss": 0.9336, "step": 31990 }, { "epoch": 28.3392382639504, "grad_norm": 0.23400233685970306, "learning_rate": 1e-05, "loss": 1.0437, "step": 31995 }, { "epoch": 28.343666961913197, "grad_norm": 0.2104942947626114, "learning_rate": 1e-05, "loss": 0.9417, "step": 32000 }, { "epoch": 28.348095659875998, "grad_norm": 0.25284701585769653, "learning_rate": 1e-05, "loss": 0.969, "step": 32005 }, { "epoch": 28.352524357838796, "grad_norm": 0.20697462558746338, "learning_rate": 1e-05, "loss": 1.0052, "step": 32010 }, { "epoch": 28.356953055801593, "grad_norm": 0.23990274965763092, "learning_rate": 1e-05, "loss": 0.9932, "step": 32015 }, { "epoch": 28.361381753764395, "grad_norm": 0.26316896080970764, "learning_rate": 1e-05, "loss": 0.9635, "step": 32020 }, { "epoch": 28.365810451727192, "grad_norm": 0.26972419023513794, "learning_rate": 1e-05, "loss": 1.0189, "step": 32025 }, { "epoch": 28.37023914968999, "grad_norm": 0.22244566679000854, "learning_rate": 1e-05, "loss": 0.9861, "step": 32030 }, { "epoch": 28.37466784765279, "grad_norm": 0.2899254262447357, "learning_rate": 1e-05, "loss": 0.9842, "step": 32035 }, { "epoch": 28.37909654561559, "grad_norm": 0.2493455857038498, "learning_rate": 1e-05, "loss": 0.9869, "step": 32040 }, { "epoch": 28.383525243578386, "grad_norm": 0.2560606896877289, "learning_rate": 1e-05, "loss": 0.9752, "step": 32045 }, { "epoch": 28.387953941541188, "grad_norm": 0.2786065340042114, "learning_rate": 1e-05, "loss": 0.971, "step": 32050 }, { "epoch": 28.392382639503985, "grad_norm": 0.2601239085197449, "learning_rate": 1e-05, "loss": 0.983, "step": 32055 }, { "epoch": 28.396811337466787, "grad_norm": 0.2901366353034973, "learning_rate": 1e-05, "loss": 0.9429, "step": 32060 }, { "epoch": 28.401240035429584, "grad_norm": 0.23854628205299377, "learning_rate": 1e-05, "loss": 0.8997, "step": 32065 }, { "epoch": 28.405668733392382, "grad_norm": 0.24251247942447662, "learning_rate": 1e-05, "loss": 0.9542, "step": 32070 }, { "epoch": 28.410097431355183, "grad_norm": 0.25075048208236694, "learning_rate": 1e-05, "loss": 0.9403, "step": 32075 }, { "epoch": 28.41452612931798, "grad_norm": 0.22861126065254211, "learning_rate": 1e-05, "loss": 0.9974, "step": 32080 }, { "epoch": 28.41895482728078, "grad_norm": 0.2756919264793396, "learning_rate": 1e-05, "loss": 1.0096, "step": 32085 }, { "epoch": 28.42338352524358, "grad_norm": 0.21984876692295074, "learning_rate": 1e-05, "loss": 0.9562, "step": 32090 }, { "epoch": 28.427812223206377, "grad_norm": 0.2257862538099289, "learning_rate": 1e-05, "loss": 1.0318, "step": 32095 }, { "epoch": 28.432240921169175, "grad_norm": 0.24166934192180634, "learning_rate": 1e-05, "loss": 0.9352, "step": 32100 }, { "epoch": 28.436669619131976, "grad_norm": 0.22282323241233826, "learning_rate": 1e-05, "loss": 0.9786, "step": 32105 }, { "epoch": 28.441098317094774, "grad_norm": 0.21808739006519318, "learning_rate": 1e-05, "loss": 0.9908, "step": 32110 }, { "epoch": 28.44552701505757, "grad_norm": 0.2878672480583191, "learning_rate": 1e-05, "loss": 0.9549, "step": 32115 }, { "epoch": 28.449955713020373, "grad_norm": 0.22229643166065216, "learning_rate": 1e-05, "loss": 0.9713, "step": 32120 }, { "epoch": 28.45438441098317, "grad_norm": 0.23301953077316284, "learning_rate": 1e-05, "loss": 0.9878, "step": 32125 }, { "epoch": 28.45881310894597, "grad_norm": 0.23343119025230408, "learning_rate": 1e-05, "loss": 0.9413, "step": 32130 }, { "epoch": 28.46324180690877, "grad_norm": 0.2900716960430145, "learning_rate": 1e-05, "loss": 1.037, "step": 32135 }, { "epoch": 28.467670504871567, "grad_norm": 0.23313802480697632, "learning_rate": 1e-05, "loss": 0.994, "step": 32140 }, { "epoch": 28.472099202834368, "grad_norm": 0.23826976120471954, "learning_rate": 1e-05, "loss": 1.0099, "step": 32145 }, { "epoch": 28.476527900797166, "grad_norm": 0.19099058210849762, "learning_rate": 1e-05, "loss": 0.9783, "step": 32150 }, { "epoch": 28.480956598759963, "grad_norm": 0.25383460521698, "learning_rate": 1e-05, "loss": 1.0155, "step": 32155 }, { "epoch": 28.485385296722765, "grad_norm": 0.23315663635730743, "learning_rate": 1e-05, "loss": 0.9964, "step": 32160 }, { "epoch": 28.489813994685562, "grad_norm": 0.36980384588241577, "learning_rate": 1e-05, "loss": 0.9875, "step": 32165 }, { "epoch": 28.49424269264836, "grad_norm": 0.2621196210384369, "learning_rate": 1e-05, "loss": 0.9349, "step": 32170 }, { "epoch": 28.49867139061116, "grad_norm": 0.23126965761184692, "learning_rate": 1e-05, "loss": 1.0167, "step": 32175 }, { "epoch": 28.50310008857396, "grad_norm": 0.2714141309261322, "learning_rate": 1e-05, "loss": 0.982, "step": 32180 }, { "epoch": 28.50752878653676, "grad_norm": 0.21692310273647308, "learning_rate": 1e-05, "loss": 0.9698, "step": 32185 }, { "epoch": 28.511957484499558, "grad_norm": 0.2549686133861542, "learning_rate": 1e-05, "loss": 0.9807, "step": 32190 }, { "epoch": 28.516386182462355, "grad_norm": 0.27794280648231506, "learning_rate": 1e-05, "loss": 0.984, "step": 32195 }, { "epoch": 28.520814880425156, "grad_norm": 0.26158109307289124, "learning_rate": 1e-05, "loss": 1.0156, "step": 32200 }, { "epoch": 28.525243578387954, "grad_norm": 0.25001096725463867, "learning_rate": 1e-05, "loss": 1.0063, "step": 32205 }, { "epoch": 28.52967227635075, "grad_norm": 0.2353513091802597, "learning_rate": 1e-05, "loss": 1.0318, "step": 32210 }, { "epoch": 28.534100974313553, "grad_norm": 0.23650024831295013, "learning_rate": 1e-05, "loss": 1.0183, "step": 32215 }, { "epoch": 28.53852967227635, "grad_norm": 0.23523296415805817, "learning_rate": 1e-05, "loss": 0.9862, "step": 32220 }, { "epoch": 28.54295837023915, "grad_norm": 0.30064430832862854, "learning_rate": 1e-05, "loss": 0.9831, "step": 32225 }, { "epoch": 28.54738706820195, "grad_norm": 0.2481122612953186, "learning_rate": 1e-05, "loss": 0.9952, "step": 32230 }, { "epoch": 28.551815766164747, "grad_norm": 0.19871559739112854, "learning_rate": 1e-05, "loss": 1.0008, "step": 32235 }, { "epoch": 28.556244464127545, "grad_norm": 0.2222663015127182, "learning_rate": 1e-05, "loss": 1.0247, "step": 32240 }, { "epoch": 28.560673162090346, "grad_norm": 0.24340662360191345, "learning_rate": 1e-05, "loss": 1.0039, "step": 32245 }, { "epoch": 28.565101860053144, "grad_norm": 0.2281303107738495, "learning_rate": 1e-05, "loss": 0.9747, "step": 32250 }, { "epoch": 28.569530558015945, "grad_norm": 0.272045761346817, "learning_rate": 1e-05, "loss": 0.9808, "step": 32255 }, { "epoch": 28.573959255978743, "grad_norm": 0.24185380339622498, "learning_rate": 1e-05, "loss": 1.0574, "step": 32260 }, { "epoch": 28.57838795394154, "grad_norm": 0.2618290185928345, "learning_rate": 1e-05, "loss": 0.9632, "step": 32265 }, { "epoch": 28.58281665190434, "grad_norm": 0.24950416386127472, "learning_rate": 1e-05, "loss": 1.0075, "step": 32270 }, { "epoch": 28.58724534986714, "grad_norm": 0.2625531256198883, "learning_rate": 1e-05, "loss": 1.0152, "step": 32275 }, { "epoch": 28.591674047829937, "grad_norm": 0.2395915538072586, "learning_rate": 1e-05, "loss": 1.0183, "step": 32280 }, { "epoch": 28.596102745792738, "grad_norm": 0.2766381502151489, "learning_rate": 1e-05, "loss": 0.9825, "step": 32285 }, { "epoch": 28.600531443755536, "grad_norm": 0.2338201403617859, "learning_rate": 1e-05, "loss": 0.998, "step": 32290 }, { "epoch": 28.604960141718333, "grad_norm": 0.35288164019584656, "learning_rate": 1e-05, "loss": 0.9252, "step": 32295 }, { "epoch": 28.609388839681134, "grad_norm": 0.23449599742889404, "learning_rate": 1e-05, "loss": 0.9971, "step": 32300 }, { "epoch": 28.613817537643932, "grad_norm": 0.22771088778972626, "learning_rate": 1e-05, "loss": 0.9498, "step": 32305 }, { "epoch": 28.618246235606733, "grad_norm": 0.25646376609802246, "learning_rate": 1e-05, "loss": 1.0169, "step": 32310 }, { "epoch": 28.62267493356953, "grad_norm": 0.21815960109233856, "learning_rate": 1e-05, "loss": 1.0406, "step": 32315 }, { "epoch": 28.62710363153233, "grad_norm": 0.24311071634292603, "learning_rate": 1e-05, "loss": 0.9804, "step": 32320 }, { "epoch": 28.63153232949513, "grad_norm": 0.23015135526657104, "learning_rate": 1e-05, "loss": 0.956, "step": 32325 }, { "epoch": 28.635961027457927, "grad_norm": 0.22268445789813995, "learning_rate": 1e-05, "loss": 0.9683, "step": 32330 }, { "epoch": 28.640389725420725, "grad_norm": 0.2108345627784729, "learning_rate": 1e-05, "loss": 0.9672, "step": 32335 }, { "epoch": 28.644818423383526, "grad_norm": 0.22398991882801056, "learning_rate": 1e-05, "loss": 0.9776, "step": 32340 }, { "epoch": 28.649247121346324, "grad_norm": 0.22614456713199615, "learning_rate": 1e-05, "loss": 0.9577, "step": 32345 }, { "epoch": 28.65367581930912, "grad_norm": 0.22226692736148834, "learning_rate": 1e-05, "loss": 0.9669, "step": 32350 }, { "epoch": 28.658104517271923, "grad_norm": 0.21315021812915802, "learning_rate": 1e-05, "loss": 0.9729, "step": 32355 }, { "epoch": 28.66253321523472, "grad_norm": 0.23941238224506378, "learning_rate": 1e-05, "loss": 0.943, "step": 32360 }, { "epoch": 28.666961913197518, "grad_norm": 0.2649783790111542, "learning_rate": 1e-05, "loss": 0.9398, "step": 32365 }, { "epoch": 28.67139061116032, "grad_norm": 0.22079895436763763, "learning_rate": 1e-05, "loss": 1.0095, "step": 32370 }, { "epoch": 28.675819309123117, "grad_norm": 0.29518985748291016, "learning_rate": 1e-05, "loss": 0.976, "step": 32375 }, { "epoch": 28.68024800708592, "grad_norm": 0.23155109584331512, "learning_rate": 1e-05, "loss": 1.0223, "step": 32380 }, { "epoch": 28.684676705048716, "grad_norm": 0.2357022762298584, "learning_rate": 1e-05, "loss": 0.9909, "step": 32385 }, { "epoch": 28.689105403011514, "grad_norm": 0.2670823335647583, "learning_rate": 1e-05, "loss": 1.0128, "step": 32390 }, { "epoch": 28.693534100974315, "grad_norm": 0.2107042372226715, "learning_rate": 1e-05, "loss": 0.9443, "step": 32395 }, { "epoch": 28.697962798937112, "grad_norm": 0.24749968945980072, "learning_rate": 1e-05, "loss": 0.9642, "step": 32400 }, { "epoch": 28.70239149689991, "grad_norm": 0.23842880129814148, "learning_rate": 1e-05, "loss": 0.9676, "step": 32405 }, { "epoch": 28.70682019486271, "grad_norm": 0.2762989401817322, "learning_rate": 1e-05, "loss": 0.9815, "step": 32410 }, { "epoch": 28.71124889282551, "grad_norm": 0.23752766847610474, "learning_rate": 1e-05, "loss": 0.9508, "step": 32415 }, { "epoch": 28.715677590788307, "grad_norm": 0.26040777564048767, "learning_rate": 1e-05, "loss": 0.9803, "step": 32420 }, { "epoch": 28.720106288751108, "grad_norm": 0.2159717082977295, "learning_rate": 1e-05, "loss": 0.9936, "step": 32425 }, { "epoch": 28.724534986713905, "grad_norm": 0.222582146525383, "learning_rate": 1e-05, "loss": 1.0459, "step": 32430 }, { "epoch": 28.728963684676707, "grad_norm": 0.28095534443855286, "learning_rate": 1e-05, "loss": 0.9802, "step": 32435 }, { "epoch": 28.733392382639504, "grad_norm": 0.2394607663154602, "learning_rate": 1e-05, "loss": 0.9659, "step": 32440 }, { "epoch": 28.737821080602302, "grad_norm": 0.237933948636055, "learning_rate": 1e-05, "loss": 1.0105, "step": 32445 }, { "epoch": 28.742249778565103, "grad_norm": 0.3280743956565857, "learning_rate": 1e-05, "loss": 0.9273, "step": 32450 }, { "epoch": 28.7466784765279, "grad_norm": 0.2793219983577728, "learning_rate": 1e-05, "loss": 0.9539, "step": 32455 }, { "epoch": 28.7511071744907, "grad_norm": 0.27851080894470215, "learning_rate": 1e-05, "loss": 0.9678, "step": 32460 }, { "epoch": 28.7555358724535, "grad_norm": 0.24829672276973724, "learning_rate": 1e-05, "loss": 0.9679, "step": 32465 }, { "epoch": 28.759964570416297, "grad_norm": 0.2521245777606964, "learning_rate": 1e-05, "loss": 0.9601, "step": 32470 }, { "epoch": 28.764393268379095, "grad_norm": 0.22625643014907837, "learning_rate": 1e-05, "loss": 0.988, "step": 32475 }, { "epoch": 28.768821966341896, "grad_norm": 0.2462923675775528, "learning_rate": 1e-05, "loss": 0.9867, "step": 32480 }, { "epoch": 28.773250664304694, "grad_norm": 0.23189212381839752, "learning_rate": 1e-05, "loss": 0.9972, "step": 32485 }, { "epoch": 28.77767936226749, "grad_norm": 0.2547517716884613, "learning_rate": 1e-05, "loss": 1.0383, "step": 32490 }, { "epoch": 28.782108060230293, "grad_norm": 0.24846181273460388, "learning_rate": 1e-05, "loss": 0.9464, "step": 32495 }, { "epoch": 28.78653675819309, "grad_norm": 0.2744988799095154, "learning_rate": 1e-05, "loss": 0.9843, "step": 32500 }, { "epoch": 28.79096545615589, "grad_norm": 0.2284986972808838, "learning_rate": 1e-05, "loss": 0.9693, "step": 32505 }, { "epoch": 28.79539415411869, "grad_norm": 0.25157272815704346, "learning_rate": 1e-05, "loss": 0.9943, "step": 32510 }, { "epoch": 28.799822852081487, "grad_norm": 0.2612571716308594, "learning_rate": 1e-05, "loss": 1.0314, "step": 32515 }, { "epoch": 28.804251550044288, "grad_norm": 0.25162070989608765, "learning_rate": 1e-05, "loss": 0.9624, "step": 32520 }, { "epoch": 28.808680248007086, "grad_norm": 0.23993653059005737, "learning_rate": 1e-05, "loss": 1.0198, "step": 32525 }, { "epoch": 28.813108945969883, "grad_norm": 0.22653470933437347, "learning_rate": 1e-05, "loss": 0.9756, "step": 32530 }, { "epoch": 28.817537643932685, "grad_norm": 0.2827704846858978, "learning_rate": 1e-05, "loss": 1.0178, "step": 32535 }, { "epoch": 28.821966341895482, "grad_norm": 0.2577563226222992, "learning_rate": 1e-05, "loss": 0.976, "step": 32540 }, { "epoch": 28.82639503985828, "grad_norm": 0.28066474199295044, "learning_rate": 1e-05, "loss": 0.943, "step": 32545 }, { "epoch": 28.83082373782108, "grad_norm": 0.2725309133529663, "learning_rate": 1e-05, "loss": 0.9792, "step": 32550 }, { "epoch": 28.83525243578388, "grad_norm": 0.23435166478157043, "learning_rate": 1e-05, "loss": 0.9775, "step": 32555 }, { "epoch": 28.83968113374668, "grad_norm": 1.3431416749954224, "learning_rate": 1e-05, "loss": 0.9913, "step": 32560 }, { "epoch": 28.844109831709478, "grad_norm": 0.21664059162139893, "learning_rate": 1e-05, "loss": 1.0073, "step": 32565 }, { "epoch": 28.848538529672275, "grad_norm": 0.256254106760025, "learning_rate": 1e-05, "loss": 0.9898, "step": 32570 }, { "epoch": 28.852967227635077, "grad_norm": 0.25111788511276245, "learning_rate": 1e-05, "loss": 0.9767, "step": 32575 }, { "epoch": 28.857395925597874, "grad_norm": 0.24342425167560577, "learning_rate": 1e-05, "loss": 0.9638, "step": 32580 }, { "epoch": 28.861824623560672, "grad_norm": 0.23971812427043915, "learning_rate": 1e-05, "loss": 1.0253, "step": 32585 }, { "epoch": 28.866253321523473, "grad_norm": 0.23585836589336395, "learning_rate": 1e-05, "loss": 0.9927, "step": 32590 }, { "epoch": 28.87068201948627, "grad_norm": 0.2613334357738495, "learning_rate": 1e-05, "loss": 0.9585, "step": 32595 }, { "epoch": 28.87511071744907, "grad_norm": 0.25037524104118347, "learning_rate": 1e-05, "loss": 1.0331, "step": 32600 }, { "epoch": 28.87953941541187, "grad_norm": 0.25890594720840454, "learning_rate": 1e-05, "loss": 1.0745, "step": 32605 }, { "epoch": 28.883968113374667, "grad_norm": 0.25616317987442017, "learning_rate": 1e-05, "loss": 0.9957, "step": 32610 }, { "epoch": 28.888396811337465, "grad_norm": 0.2339998185634613, "learning_rate": 1e-05, "loss": 0.9964, "step": 32615 }, { "epoch": 28.892825509300266, "grad_norm": 0.2537119686603546, "learning_rate": 1e-05, "loss": 0.9939, "step": 32620 }, { "epoch": 28.897254207263064, "grad_norm": 0.21938014030456543, "learning_rate": 1e-05, "loss": 1.0399, "step": 32625 }, { "epoch": 28.901682905225865, "grad_norm": 0.2520517408847809, "learning_rate": 1e-05, "loss": 1.0335, "step": 32630 }, { "epoch": 28.906111603188663, "grad_norm": 0.24985922873020172, "learning_rate": 1e-05, "loss": 0.9763, "step": 32635 }, { "epoch": 28.91054030115146, "grad_norm": 0.26078352332115173, "learning_rate": 1e-05, "loss": 1.0374, "step": 32640 }, { "epoch": 28.91496899911426, "grad_norm": 0.22888430953025818, "learning_rate": 1e-05, "loss": 0.9826, "step": 32645 }, { "epoch": 28.91939769707706, "grad_norm": 0.23833692073822021, "learning_rate": 1e-05, "loss": 0.9505, "step": 32650 }, { "epoch": 28.923826395039857, "grad_norm": 0.27575263381004333, "learning_rate": 1e-05, "loss": 1.0029, "step": 32655 }, { "epoch": 28.928255093002658, "grad_norm": 0.19048675894737244, "learning_rate": 1e-05, "loss": 1.0154, "step": 32660 }, { "epoch": 28.932683790965456, "grad_norm": 0.24705664813518524, "learning_rate": 1e-05, "loss": 0.9239, "step": 32665 }, { "epoch": 28.937112488928253, "grad_norm": 0.23557865619659424, "learning_rate": 1e-05, "loss": 0.9522, "step": 32670 }, { "epoch": 28.941541186891055, "grad_norm": 0.23979802429676056, "learning_rate": 1e-05, "loss": 0.9536, "step": 32675 }, { "epoch": 28.945969884853852, "grad_norm": 0.2484261840581894, "learning_rate": 1e-05, "loss": 0.955, "step": 32680 }, { "epoch": 28.950398582816653, "grad_norm": 0.2429129183292389, "learning_rate": 1e-05, "loss": 0.9398, "step": 32685 }, { "epoch": 28.95482728077945, "grad_norm": 0.23566299676895142, "learning_rate": 1e-05, "loss": 0.9899, "step": 32690 }, { "epoch": 28.95925597874225, "grad_norm": 0.2131844460964203, "learning_rate": 1e-05, "loss": 1.0243, "step": 32695 }, { "epoch": 28.96368467670505, "grad_norm": 0.22451364994049072, "learning_rate": 1e-05, "loss": 1.018, "step": 32700 }, { "epoch": 28.968113374667848, "grad_norm": 0.3154303729534149, "learning_rate": 1e-05, "loss": 1.0068, "step": 32705 }, { "epoch": 28.972542072630645, "grad_norm": 0.2687758505344391, "learning_rate": 1e-05, "loss": 1.0099, "step": 32710 }, { "epoch": 28.976970770593447, "grad_norm": 0.2667892277240753, "learning_rate": 1e-05, "loss": 1.0138, "step": 32715 }, { "epoch": 28.981399468556244, "grad_norm": 0.2537265717983246, "learning_rate": 1e-05, "loss": 1.0156, "step": 32720 }, { "epoch": 28.985828166519042, "grad_norm": 0.25515344738960266, "learning_rate": 1e-05, "loss": 1.0006, "step": 32725 }, { "epoch": 28.990256864481843, "grad_norm": 0.24838703870773315, "learning_rate": 1e-05, "loss": 0.9903, "step": 32730 }, { "epoch": 28.99468556244464, "grad_norm": 0.23681841790676117, "learning_rate": 1e-05, "loss": 0.9579, "step": 32735 }, { "epoch": 28.999114260407442, "grad_norm": 0.21567222476005554, "learning_rate": 1e-05, "loss": 1.0124, "step": 32740 }, { "epoch": 29.00354295837024, "grad_norm": 0.3280039429664612, "learning_rate": 1e-05, "loss": 1.0211, "step": 32745 }, { "epoch": 29.007971656333037, "grad_norm": 0.21983349323272705, "learning_rate": 1e-05, "loss": 0.9714, "step": 32750 }, { "epoch": 29.01240035429584, "grad_norm": 0.22912150621414185, "learning_rate": 1e-05, "loss": 1.0109, "step": 32755 }, { "epoch": 29.016829052258636, "grad_norm": 0.2517050802707672, "learning_rate": 1e-05, "loss": 0.9747, "step": 32760 }, { "epoch": 29.021257750221434, "grad_norm": 0.22547712922096252, "learning_rate": 1e-05, "loss": 0.9941, "step": 32765 }, { "epoch": 29.025686448184235, "grad_norm": 0.2503494620323181, "learning_rate": 1e-05, "loss": 0.9776, "step": 32770 }, { "epoch": 29.030115146147033, "grad_norm": 0.2700424790382385, "learning_rate": 1e-05, "loss": 0.9811, "step": 32775 }, { "epoch": 29.03454384410983, "grad_norm": 0.2730877101421356, "learning_rate": 1e-05, "loss": 1.0097, "step": 32780 }, { "epoch": 29.03897254207263, "grad_norm": 0.23071296513080597, "learning_rate": 1e-05, "loss": 1.0158, "step": 32785 }, { "epoch": 29.04340124003543, "grad_norm": 0.2713901996612549, "learning_rate": 1e-05, "loss": 0.9739, "step": 32790 }, { "epoch": 29.047829937998227, "grad_norm": 0.291415810585022, "learning_rate": 1e-05, "loss": 0.9933, "step": 32795 }, { "epoch": 29.052258635961028, "grad_norm": 0.25327184796333313, "learning_rate": 1e-05, "loss": 1.0081, "step": 32800 }, { "epoch": 29.056687333923826, "grad_norm": 0.25974413752555847, "learning_rate": 1e-05, "loss": 0.963, "step": 32805 }, { "epoch": 29.061116031886627, "grad_norm": 0.2495075911283493, "learning_rate": 1e-05, "loss": 1.0313, "step": 32810 }, { "epoch": 29.065544729849424, "grad_norm": 0.27440500259399414, "learning_rate": 1e-05, "loss": 1.0301, "step": 32815 }, { "epoch": 29.069973427812222, "grad_norm": 0.2577050030231476, "learning_rate": 1e-05, "loss": 0.9579, "step": 32820 }, { "epoch": 29.074402125775023, "grad_norm": 0.24887725710868835, "learning_rate": 1e-05, "loss": 0.9734, "step": 32825 }, { "epoch": 29.07883082373782, "grad_norm": 0.22878865897655487, "learning_rate": 1e-05, "loss": 1.0146, "step": 32830 }, { "epoch": 29.08325952170062, "grad_norm": 0.22497308254241943, "learning_rate": 1e-05, "loss": 0.9807, "step": 32835 }, { "epoch": 29.08768821966342, "grad_norm": 0.22363094985485077, "learning_rate": 1e-05, "loss": 0.9579, "step": 32840 }, { "epoch": 29.092116917626218, "grad_norm": 0.26189014315605164, "learning_rate": 1e-05, "loss": 0.9718, "step": 32845 }, { "epoch": 29.096545615589015, "grad_norm": 0.22668828070163727, "learning_rate": 1e-05, "loss": 0.9919, "step": 32850 }, { "epoch": 29.100974313551816, "grad_norm": 0.23449285328388214, "learning_rate": 1e-05, "loss": 0.9369, "step": 32855 }, { "epoch": 29.105403011514614, "grad_norm": 0.24742555618286133, "learning_rate": 1e-05, "loss": 0.9637, "step": 32860 }, { "epoch": 29.109831709477415, "grad_norm": 0.22500079870224, "learning_rate": 1e-05, "loss": 1.012, "step": 32865 }, { "epoch": 29.114260407440213, "grad_norm": 0.22786012291908264, "learning_rate": 1e-05, "loss": 0.9643, "step": 32870 }, { "epoch": 29.11868910540301, "grad_norm": 0.2354092299938202, "learning_rate": 1e-05, "loss": 1.0527, "step": 32875 }, { "epoch": 29.123117803365812, "grad_norm": 0.22803618013858795, "learning_rate": 1e-05, "loss": 0.9644, "step": 32880 }, { "epoch": 29.12754650132861, "grad_norm": 0.2696690857410431, "learning_rate": 1e-05, "loss": 1.0146, "step": 32885 }, { "epoch": 29.131975199291407, "grad_norm": 0.2248559445142746, "learning_rate": 1e-05, "loss": 1.0164, "step": 32890 }, { "epoch": 29.13640389725421, "grad_norm": 0.23205527663230896, "learning_rate": 1e-05, "loss": 1.0078, "step": 32895 }, { "epoch": 29.140832595217006, "grad_norm": 0.27893131971359253, "learning_rate": 1e-05, "loss": 0.9764, "step": 32900 }, { "epoch": 29.145261293179804, "grad_norm": 0.24877573549747467, "learning_rate": 1e-05, "loss": 0.9726, "step": 32905 }, { "epoch": 29.149689991142605, "grad_norm": 0.2536066472530365, "learning_rate": 1e-05, "loss": 0.9771, "step": 32910 }, { "epoch": 29.154118689105402, "grad_norm": 0.2403651773929596, "learning_rate": 1e-05, "loss": 1.063, "step": 32915 }, { "epoch": 29.158547387068204, "grad_norm": 0.3604414463043213, "learning_rate": 1e-05, "loss": 0.9534, "step": 32920 }, { "epoch": 29.162976085031, "grad_norm": 0.2457941621541977, "learning_rate": 1e-05, "loss": 1.0217, "step": 32925 }, { "epoch": 29.1674047829938, "grad_norm": 0.22010116279125214, "learning_rate": 1e-05, "loss": 0.9654, "step": 32930 }, { "epoch": 29.1718334809566, "grad_norm": 0.24960453808307648, "learning_rate": 1e-05, "loss": 0.9723, "step": 32935 }, { "epoch": 29.176262178919398, "grad_norm": 0.23997916281223297, "learning_rate": 1e-05, "loss": 0.9808, "step": 32940 }, { "epoch": 29.180690876882196, "grad_norm": 0.27191200852394104, "learning_rate": 1e-05, "loss": 0.9954, "step": 32945 }, { "epoch": 29.185119574844997, "grad_norm": 0.22481368482112885, "learning_rate": 1e-05, "loss": 1.0153, "step": 32950 }, { "epoch": 29.189548272807794, "grad_norm": 0.2542947232723236, "learning_rate": 1e-05, "loss": 0.9652, "step": 32955 }, { "epoch": 29.193976970770592, "grad_norm": 0.2834567129611969, "learning_rate": 1e-05, "loss": 1.0083, "step": 32960 }, { "epoch": 29.198405668733393, "grad_norm": 0.2695099711418152, "learning_rate": 1e-05, "loss": 1.0139, "step": 32965 }, { "epoch": 29.20283436669619, "grad_norm": 0.2747369408607483, "learning_rate": 1e-05, "loss": 0.9831, "step": 32970 }, { "epoch": 29.20726306465899, "grad_norm": 0.2420107126235962, "learning_rate": 1e-05, "loss": 0.9723, "step": 32975 }, { "epoch": 29.21169176262179, "grad_norm": 0.2598002552986145, "learning_rate": 1e-05, "loss": 0.9577, "step": 32980 }, { "epoch": 29.216120460584587, "grad_norm": 0.2828283905982971, "learning_rate": 1e-05, "loss": 0.9841, "step": 32985 }, { "epoch": 29.22054915854739, "grad_norm": 0.2351001352071762, "learning_rate": 1e-05, "loss": 0.9895, "step": 32990 }, { "epoch": 29.224977856510186, "grad_norm": 0.2394755631685257, "learning_rate": 1e-05, "loss": 0.9709, "step": 32995 }, { "epoch": 29.229406554472984, "grad_norm": 0.31276336312294006, "learning_rate": 1e-05, "loss": 1.017, "step": 33000 }, { "epoch": 29.233835252435785, "grad_norm": 0.2706705629825592, "learning_rate": 1e-05, "loss": 0.9284, "step": 33005 }, { "epoch": 29.238263950398583, "grad_norm": 0.24142980575561523, "learning_rate": 1e-05, "loss": 1.0073, "step": 33010 }, { "epoch": 29.24269264836138, "grad_norm": 0.23878198862075806, "learning_rate": 1e-05, "loss": 1.0021, "step": 33015 }, { "epoch": 29.24712134632418, "grad_norm": 0.22935231029987335, "learning_rate": 1e-05, "loss": 0.9936, "step": 33020 }, { "epoch": 29.25155004428698, "grad_norm": 0.2256104201078415, "learning_rate": 1e-05, "loss": 1.0169, "step": 33025 }, { "epoch": 29.255978742249777, "grad_norm": 0.21620772778987885, "learning_rate": 1e-05, "loss": 1.0227, "step": 33030 }, { "epoch": 29.260407440212578, "grad_norm": 0.2574934661388397, "learning_rate": 1e-05, "loss": 0.9767, "step": 33035 }, { "epoch": 29.264836138175376, "grad_norm": 0.2542046904563904, "learning_rate": 1e-05, "loss": 1.002, "step": 33040 }, { "epoch": 29.269264836138177, "grad_norm": 0.26493820548057556, "learning_rate": 1e-05, "loss": 0.9396, "step": 33045 }, { "epoch": 29.273693534100975, "grad_norm": 0.25389036536216736, "learning_rate": 1e-05, "loss": 0.9904, "step": 33050 }, { "epoch": 29.278122232063772, "grad_norm": 0.2502876818180084, "learning_rate": 1e-05, "loss": 1.0333, "step": 33055 }, { "epoch": 29.282550930026574, "grad_norm": 0.2571655809879303, "learning_rate": 1e-05, "loss": 0.9602, "step": 33060 }, { "epoch": 29.28697962798937, "grad_norm": 0.24559326469898224, "learning_rate": 1e-05, "loss": 0.9874, "step": 33065 }, { "epoch": 29.29140832595217, "grad_norm": 0.23028038442134857, "learning_rate": 1e-05, "loss": 0.9626, "step": 33070 }, { "epoch": 29.29583702391497, "grad_norm": 0.22738775610923767, "learning_rate": 1e-05, "loss": 1.0204, "step": 33075 }, { "epoch": 29.300265721877768, "grad_norm": 0.2694743275642395, "learning_rate": 1e-05, "loss": 1.0262, "step": 33080 }, { "epoch": 29.304694419840565, "grad_norm": 0.2586454749107361, "learning_rate": 1e-05, "loss": 0.9803, "step": 33085 }, { "epoch": 29.309123117803367, "grad_norm": 0.23461739718914032, "learning_rate": 1e-05, "loss": 0.9687, "step": 33090 }, { "epoch": 29.313551815766164, "grad_norm": 0.22517363727092743, "learning_rate": 1e-05, "loss": 0.9956, "step": 33095 }, { "epoch": 29.317980513728962, "grad_norm": 0.2183738797903061, "learning_rate": 1e-05, "loss": 0.9791, "step": 33100 }, { "epoch": 29.322409211691763, "grad_norm": 0.2568979263305664, "learning_rate": 1e-05, "loss": 1.0218, "step": 33105 }, { "epoch": 29.32683790965456, "grad_norm": 0.2764614522457123, "learning_rate": 1e-05, "loss": 1.0095, "step": 33110 }, { "epoch": 29.331266607617362, "grad_norm": 0.2150292694568634, "learning_rate": 1e-05, "loss": 1.0127, "step": 33115 }, { "epoch": 29.33569530558016, "grad_norm": 0.29166650772094727, "learning_rate": 1e-05, "loss": 0.9275, "step": 33120 }, { "epoch": 29.340124003542957, "grad_norm": 0.22223339974880219, "learning_rate": 1e-05, "loss": 1.0201, "step": 33125 }, { "epoch": 29.34455270150576, "grad_norm": 0.32859939336776733, "learning_rate": 1e-05, "loss": 1.0042, "step": 33130 }, { "epoch": 29.348981399468556, "grad_norm": 0.2441742718219757, "learning_rate": 1e-05, "loss": 1.0515, "step": 33135 }, { "epoch": 29.353410097431354, "grad_norm": 0.2407771348953247, "learning_rate": 1e-05, "loss": 1.0065, "step": 33140 }, { "epoch": 29.357838795394155, "grad_norm": 0.24372783303260803, "learning_rate": 1e-05, "loss": 0.9734, "step": 33145 }, { "epoch": 29.362267493356953, "grad_norm": 0.20713920891284943, "learning_rate": 1e-05, "loss": 1.0254, "step": 33150 }, { "epoch": 29.36669619131975, "grad_norm": 0.24818897247314453, "learning_rate": 1e-05, "loss": 0.9606, "step": 33155 }, { "epoch": 29.37112488928255, "grad_norm": 0.25761672854423523, "learning_rate": 1e-05, "loss": 1.013, "step": 33160 }, { "epoch": 29.37555358724535, "grad_norm": 0.23868688941001892, "learning_rate": 1e-05, "loss": 0.9532, "step": 33165 }, { "epoch": 29.37998228520815, "grad_norm": 0.1938958764076233, "learning_rate": 1e-05, "loss": 0.9649, "step": 33170 }, { "epoch": 29.384410983170948, "grad_norm": 0.2537584900856018, "learning_rate": 1e-05, "loss": 0.9428, "step": 33175 }, { "epoch": 29.388839681133746, "grad_norm": 0.2777320444583893, "learning_rate": 1e-05, "loss": 0.9424, "step": 33180 }, { "epoch": 29.393268379096547, "grad_norm": 0.23738200962543488, "learning_rate": 1e-05, "loss": 1.0114, "step": 33185 }, { "epoch": 29.397697077059345, "grad_norm": 0.27272143959999084, "learning_rate": 1e-05, "loss": 0.9781, "step": 33190 }, { "epoch": 29.402125775022142, "grad_norm": 0.25351670384407043, "learning_rate": 1e-05, "loss": 0.9747, "step": 33195 }, { "epoch": 29.406554472984944, "grad_norm": 0.2402755618095398, "learning_rate": 1e-05, "loss": 0.9937, "step": 33200 }, { "epoch": 29.41098317094774, "grad_norm": 0.22165927290916443, "learning_rate": 1e-05, "loss": 1.0153, "step": 33205 }, { "epoch": 29.41541186891054, "grad_norm": 0.22902365028858185, "learning_rate": 1e-05, "loss": 1.0204, "step": 33210 }, { "epoch": 29.41984056687334, "grad_norm": 0.25669386982917786, "learning_rate": 1e-05, "loss": 0.9989, "step": 33215 }, { "epoch": 29.424269264836138, "grad_norm": 0.25663214921951294, "learning_rate": 1e-05, "loss": 1.0038, "step": 33220 }, { "epoch": 29.428697962798935, "grad_norm": 0.22321319580078125, "learning_rate": 1e-05, "loss": 1.0196, "step": 33225 }, { "epoch": 29.433126660761737, "grad_norm": 0.26744386553764343, "learning_rate": 1e-05, "loss": 0.9042, "step": 33230 }, { "epoch": 29.437555358724534, "grad_norm": 0.21041367948055267, "learning_rate": 1e-05, "loss": 1.0313, "step": 33235 }, { "epoch": 29.441984056687335, "grad_norm": 0.2211267203092575, "learning_rate": 1e-05, "loss": 0.9643, "step": 33240 }, { "epoch": 29.446412754650133, "grad_norm": 0.22458960115909576, "learning_rate": 1e-05, "loss": 0.9454, "step": 33245 }, { "epoch": 29.45084145261293, "grad_norm": 0.25537192821502686, "learning_rate": 1e-05, "loss": 0.9841, "step": 33250 }, { "epoch": 29.455270150575732, "grad_norm": 0.25810956954956055, "learning_rate": 1e-05, "loss": 0.9969, "step": 33255 }, { "epoch": 29.45969884853853, "grad_norm": 0.20794254541397095, "learning_rate": 1e-05, "loss": 0.9908, "step": 33260 }, { "epoch": 29.464127546501327, "grad_norm": 0.24391856789588928, "learning_rate": 1e-05, "loss": 0.9577, "step": 33265 }, { "epoch": 29.46855624446413, "grad_norm": 0.2551543712615967, "learning_rate": 1e-05, "loss": 1.0019, "step": 33270 }, { "epoch": 29.472984942426926, "grad_norm": 0.24688716232776642, "learning_rate": 1e-05, "loss": 0.9886, "step": 33275 }, { "epoch": 29.477413640389724, "grad_norm": 0.2589344084262848, "learning_rate": 1e-05, "loss": 0.9492, "step": 33280 }, { "epoch": 29.481842338352525, "grad_norm": 0.20920489728450775, "learning_rate": 1e-05, "loss": 0.9692, "step": 33285 }, { "epoch": 29.486271036315323, "grad_norm": 0.26096221804618835, "learning_rate": 1e-05, "loss": 0.956, "step": 33290 }, { "epoch": 29.490699734278124, "grad_norm": 0.23136945068836212, "learning_rate": 1e-05, "loss": 0.9724, "step": 33295 }, { "epoch": 29.49512843224092, "grad_norm": 0.24354800581932068, "learning_rate": 1e-05, "loss": 0.9812, "step": 33300 }, { "epoch": 29.49955713020372, "grad_norm": 0.26048028469085693, "learning_rate": 1e-05, "loss": 0.9987, "step": 33305 }, { "epoch": 29.50398582816652, "grad_norm": 0.26856541633605957, "learning_rate": 1e-05, "loss": 1.0121, "step": 33310 }, { "epoch": 29.508414526129318, "grad_norm": 0.2925977110862732, "learning_rate": 1e-05, "loss": 0.9752, "step": 33315 }, { "epoch": 29.512843224092116, "grad_norm": 0.2360851913690567, "learning_rate": 1e-05, "loss": 0.9936, "step": 33320 }, { "epoch": 29.517271922054917, "grad_norm": 0.25888973474502563, "learning_rate": 1e-05, "loss": 1.012, "step": 33325 }, { "epoch": 29.521700620017715, "grad_norm": 0.27774688601493835, "learning_rate": 1e-05, "loss": 1.0298, "step": 33330 }, { "epoch": 29.526129317980512, "grad_norm": 0.29836344718933105, "learning_rate": 1e-05, "loss": 1.002, "step": 33335 }, { "epoch": 29.530558015943313, "grad_norm": 0.23448826372623444, "learning_rate": 1e-05, "loss": 1.0108, "step": 33340 }, { "epoch": 29.53498671390611, "grad_norm": 0.24063892662525177, "learning_rate": 1e-05, "loss": 0.9959, "step": 33345 }, { "epoch": 29.53941541186891, "grad_norm": 0.28010237216949463, "learning_rate": 1e-05, "loss": 0.9665, "step": 33350 }, { "epoch": 29.54384410983171, "grad_norm": 0.23950083553791046, "learning_rate": 1e-05, "loss": 0.997, "step": 33355 }, { "epoch": 29.548272807794508, "grad_norm": 0.2847791016101837, "learning_rate": 1e-05, "loss": 0.9573, "step": 33360 }, { "epoch": 29.55270150575731, "grad_norm": 0.21922768652439117, "learning_rate": 1e-05, "loss": 0.9258, "step": 33365 }, { "epoch": 29.557130203720106, "grad_norm": 0.23477822542190552, "learning_rate": 1e-05, "loss": 0.9802, "step": 33370 }, { "epoch": 29.561558901682904, "grad_norm": 0.22649945318698883, "learning_rate": 1e-05, "loss": 0.949, "step": 33375 }, { "epoch": 29.565987599645705, "grad_norm": 0.25750523805618286, "learning_rate": 1e-05, "loss": 1.0392, "step": 33380 }, { "epoch": 29.570416297608503, "grad_norm": 0.29351806640625, "learning_rate": 1e-05, "loss": 0.9604, "step": 33385 }, { "epoch": 29.5748449955713, "grad_norm": 0.2347741574048996, "learning_rate": 1e-05, "loss": 0.9567, "step": 33390 }, { "epoch": 29.579273693534102, "grad_norm": 0.27449727058410645, "learning_rate": 1e-05, "loss": 0.9717, "step": 33395 }, { "epoch": 29.5837023914969, "grad_norm": 0.24438513815402985, "learning_rate": 1e-05, "loss": 0.9765, "step": 33400 }, { "epoch": 29.588131089459697, "grad_norm": 0.2961277961730957, "learning_rate": 1e-05, "loss": 1.007, "step": 33405 }, { "epoch": 29.5925597874225, "grad_norm": 0.25197264552116394, "learning_rate": 1e-05, "loss": 1.0138, "step": 33410 }, { "epoch": 29.596988485385296, "grad_norm": 0.26049402356147766, "learning_rate": 1e-05, "loss": 0.9933, "step": 33415 }, { "epoch": 29.601417183348097, "grad_norm": 0.3087747097015381, "learning_rate": 1e-05, "loss": 0.9813, "step": 33420 }, { "epoch": 29.605845881310895, "grad_norm": 0.24062579870224, "learning_rate": 1e-05, "loss": 0.9514, "step": 33425 }, { "epoch": 29.610274579273693, "grad_norm": 0.2511786222457886, "learning_rate": 1e-05, "loss": 0.9779, "step": 33430 }, { "epoch": 29.614703277236494, "grad_norm": 0.22619308531284332, "learning_rate": 1e-05, "loss": 1.0062, "step": 33435 }, { "epoch": 29.61913197519929, "grad_norm": 0.2527395486831665, "learning_rate": 1e-05, "loss": 0.9888, "step": 33440 }, { "epoch": 29.62356067316209, "grad_norm": 0.19290590286254883, "learning_rate": 1e-05, "loss": 0.9509, "step": 33445 }, { "epoch": 29.62798937112489, "grad_norm": 0.3469868302345276, "learning_rate": 1e-05, "loss": 0.9961, "step": 33450 }, { "epoch": 29.632418069087688, "grad_norm": 0.24371160566806793, "learning_rate": 1e-05, "loss": 0.8963, "step": 33455 }, { "epoch": 29.636846767050486, "grad_norm": 0.24923472106456757, "learning_rate": 1e-05, "loss": 0.9883, "step": 33460 }, { "epoch": 29.641275465013287, "grad_norm": 0.23934711515903473, "learning_rate": 1e-05, "loss": 0.944, "step": 33465 }, { "epoch": 29.645704162976084, "grad_norm": 0.2713635563850403, "learning_rate": 1e-05, "loss": 0.9748, "step": 33470 }, { "epoch": 29.650132860938886, "grad_norm": 0.24598468840122223, "learning_rate": 1e-05, "loss": 1.0265, "step": 33475 }, { "epoch": 29.654561558901683, "grad_norm": 0.24211840331554413, "learning_rate": 1e-05, "loss": 0.9949, "step": 33480 }, { "epoch": 29.65899025686448, "grad_norm": 0.22946064174175262, "learning_rate": 1e-05, "loss": 1.0025, "step": 33485 }, { "epoch": 29.663418954827282, "grad_norm": 0.26737797260284424, "learning_rate": 1e-05, "loss": 0.9559, "step": 33490 }, { "epoch": 29.66784765279008, "grad_norm": 0.2769491374492645, "learning_rate": 1e-05, "loss": 0.9921, "step": 33495 }, { "epoch": 29.672276350752878, "grad_norm": 0.2214026153087616, "learning_rate": 1e-05, "loss": 1.0116, "step": 33500 }, { "epoch": 29.67670504871568, "grad_norm": 0.231183260679245, "learning_rate": 1e-05, "loss": 1.0019, "step": 33505 }, { "epoch": 29.681133746678476, "grad_norm": 0.25053277611732483, "learning_rate": 1e-05, "loss": 0.9705, "step": 33510 }, { "epoch": 29.685562444641274, "grad_norm": 0.22274485230445862, "learning_rate": 1e-05, "loss": 1.0318, "step": 33515 }, { "epoch": 29.689991142604075, "grad_norm": 0.2466314435005188, "learning_rate": 1e-05, "loss": 1.0024, "step": 33520 }, { "epoch": 29.694419840566873, "grad_norm": 0.2397489696741104, "learning_rate": 1e-05, "loss": 0.9846, "step": 33525 }, { "epoch": 29.698848538529674, "grad_norm": 0.22980856895446777, "learning_rate": 1e-05, "loss": 1.004, "step": 33530 }, { "epoch": 29.70327723649247, "grad_norm": 0.2572290897369385, "learning_rate": 1e-05, "loss": 0.9145, "step": 33535 }, { "epoch": 29.70770593445527, "grad_norm": 0.24892254173755646, "learning_rate": 1e-05, "loss": 0.9431, "step": 33540 }, { "epoch": 29.71213463241807, "grad_norm": 0.2390914112329483, "learning_rate": 1e-05, "loss": 0.9229, "step": 33545 }, { "epoch": 29.71656333038087, "grad_norm": 0.21791300177574158, "learning_rate": 1e-05, "loss": 0.9753, "step": 33550 }, { "epoch": 29.720992028343666, "grad_norm": 0.24028660356998444, "learning_rate": 1e-05, "loss": 0.9723, "step": 33555 }, { "epoch": 29.725420726306467, "grad_norm": 0.2410203218460083, "learning_rate": 1e-05, "loss": 0.9862, "step": 33560 }, { "epoch": 29.729849424269265, "grad_norm": 0.24294373393058777, "learning_rate": 1e-05, "loss": 0.9321, "step": 33565 }, { "epoch": 29.734278122232062, "grad_norm": 0.20225472748279572, "learning_rate": 1e-05, "loss": 0.9726, "step": 33570 }, { "epoch": 29.738706820194864, "grad_norm": 0.19303831458091736, "learning_rate": 1e-05, "loss": 0.9925, "step": 33575 }, { "epoch": 29.74313551815766, "grad_norm": 0.20625481009483337, "learning_rate": 1e-05, "loss": 0.9956, "step": 33580 }, { "epoch": 29.74756421612046, "grad_norm": 0.24322716891765594, "learning_rate": 1e-05, "loss": 0.9579, "step": 33585 }, { "epoch": 29.75199291408326, "grad_norm": 0.26344043016433716, "learning_rate": 1e-05, "loss": 0.9739, "step": 33590 }, { "epoch": 29.756421612046058, "grad_norm": 0.28257396817207336, "learning_rate": 1e-05, "loss": 0.9819, "step": 33595 }, { "epoch": 29.76085031000886, "grad_norm": 0.27443960309028625, "learning_rate": 1e-05, "loss": 0.9912, "step": 33600 }, { "epoch": 29.765279007971657, "grad_norm": 0.24726985394954681, "learning_rate": 1e-05, "loss": 1.0026, "step": 33605 }, { "epoch": 29.769707705934454, "grad_norm": 0.22249121963977814, "learning_rate": 1e-05, "loss": 0.9797, "step": 33610 }, { "epoch": 29.774136403897256, "grad_norm": 0.25300875306129456, "learning_rate": 1e-05, "loss": 1.0037, "step": 33615 }, { "epoch": 29.778565101860053, "grad_norm": 0.2905903458595276, "learning_rate": 1e-05, "loss": 0.9934, "step": 33620 }, { "epoch": 29.78299379982285, "grad_norm": 0.2594776153564453, "learning_rate": 1e-05, "loss": 1.0061, "step": 33625 }, { "epoch": 29.787422497785652, "grad_norm": 0.4132271707057953, "learning_rate": 1e-05, "loss": 1.002, "step": 33630 }, { "epoch": 29.79185119574845, "grad_norm": 0.23372139036655426, "learning_rate": 1e-05, "loss": 0.9521, "step": 33635 }, { "epoch": 29.796279893711247, "grad_norm": 0.25129473209381104, "learning_rate": 1e-05, "loss": 0.985, "step": 33640 }, { "epoch": 29.80070859167405, "grad_norm": 0.2372855246067047, "learning_rate": 1e-05, "loss": 0.9931, "step": 33645 }, { "epoch": 29.805137289636846, "grad_norm": 0.25713202357292175, "learning_rate": 1e-05, "loss": 0.9732, "step": 33650 }, { "epoch": 29.809565987599647, "grad_norm": 0.25309890508651733, "learning_rate": 1e-05, "loss": 0.992, "step": 33655 }, { "epoch": 29.813994685562445, "grad_norm": 0.23268768191337585, "learning_rate": 1e-05, "loss": 1.0311, "step": 33660 }, { "epoch": 29.818423383525243, "grad_norm": 0.22995048761367798, "learning_rate": 1e-05, "loss": 0.9398, "step": 33665 }, { "epoch": 29.822852081488044, "grad_norm": 0.3111424744129181, "learning_rate": 1e-05, "loss": 0.9875, "step": 33670 }, { "epoch": 29.82728077945084, "grad_norm": 0.2710413336753845, "learning_rate": 1e-05, "loss": 1.015, "step": 33675 }, { "epoch": 29.83170947741364, "grad_norm": 0.3212091624736786, "learning_rate": 1e-05, "loss": 0.9927, "step": 33680 }, { "epoch": 29.83613817537644, "grad_norm": 0.3030320107936859, "learning_rate": 1e-05, "loss": 1.0098, "step": 33685 }, { "epoch": 29.840566873339238, "grad_norm": 0.24937406182289124, "learning_rate": 1e-05, "loss": 0.9639, "step": 33690 }, { "epoch": 29.844995571302036, "grad_norm": 0.24058672785758972, "learning_rate": 1e-05, "loss": 1.0373, "step": 33695 }, { "epoch": 29.849424269264837, "grad_norm": 0.23151683807373047, "learning_rate": 1e-05, "loss": 0.9865, "step": 33700 }, { "epoch": 29.853852967227635, "grad_norm": 0.29559141397476196, "learning_rate": 1e-05, "loss": 1.0257, "step": 33705 }, { "epoch": 29.858281665190432, "grad_norm": 0.2709830403327942, "learning_rate": 1e-05, "loss": 1.0242, "step": 33710 }, { "epoch": 29.862710363153234, "grad_norm": 0.30582910776138306, "learning_rate": 1e-05, "loss": 0.9837, "step": 33715 }, { "epoch": 29.86713906111603, "grad_norm": 0.23479345440864563, "learning_rate": 1e-05, "loss": 0.9965, "step": 33720 }, { "epoch": 29.871567759078832, "grad_norm": 0.1971449851989746, "learning_rate": 1e-05, "loss": 0.9446, "step": 33725 }, { "epoch": 29.87599645704163, "grad_norm": 0.276248037815094, "learning_rate": 1e-05, "loss": 0.9845, "step": 33730 }, { "epoch": 29.880425155004428, "grad_norm": 0.2343430519104004, "learning_rate": 1e-05, "loss": 0.9684, "step": 33735 }, { "epoch": 29.88485385296723, "grad_norm": 0.2501111328601837, "learning_rate": 1e-05, "loss": 0.9463, "step": 33740 }, { "epoch": 29.889282550930027, "grad_norm": 0.2660665810108185, "learning_rate": 1e-05, "loss": 0.9334, "step": 33745 }, { "epoch": 29.893711248892824, "grad_norm": 0.2440873384475708, "learning_rate": 1e-05, "loss": 0.9955, "step": 33750 }, { "epoch": 29.898139946855625, "grad_norm": 0.2634141445159912, "learning_rate": 1e-05, "loss": 0.9901, "step": 33755 }, { "epoch": 29.902568644818423, "grad_norm": 0.2766169309616089, "learning_rate": 1e-05, "loss": 0.9862, "step": 33760 }, { "epoch": 29.90699734278122, "grad_norm": 0.25497835874557495, "learning_rate": 1e-05, "loss": 0.9406, "step": 33765 }, { "epoch": 29.911426040744022, "grad_norm": 0.23452550172805786, "learning_rate": 1e-05, "loss": 0.9781, "step": 33770 }, { "epoch": 29.91585473870682, "grad_norm": 0.27484825253486633, "learning_rate": 1e-05, "loss": 1.0107, "step": 33775 }, { "epoch": 29.92028343666962, "grad_norm": 0.24014128744602203, "learning_rate": 1e-05, "loss": 0.9506, "step": 33780 }, { "epoch": 29.92471213463242, "grad_norm": 0.24675621092319489, "learning_rate": 1e-05, "loss": 0.9525, "step": 33785 }, { "epoch": 29.929140832595216, "grad_norm": 0.2273753583431244, "learning_rate": 1e-05, "loss": 1.0349, "step": 33790 }, { "epoch": 29.933569530558017, "grad_norm": 0.23943696916103363, "learning_rate": 1e-05, "loss": 1.0029, "step": 33795 }, { "epoch": 29.937998228520815, "grad_norm": 0.21891751885414124, "learning_rate": 1e-05, "loss": 0.9429, "step": 33800 }, { "epoch": 29.942426926483613, "grad_norm": 0.23428450524806976, "learning_rate": 1e-05, "loss": 0.9699, "step": 33805 }, { "epoch": 29.946855624446414, "grad_norm": 0.21084064245224, "learning_rate": 1e-05, "loss": 0.9993, "step": 33810 }, { "epoch": 29.95128432240921, "grad_norm": 0.22127531468868256, "learning_rate": 1e-05, "loss": 0.9755, "step": 33815 }, { "epoch": 29.95571302037201, "grad_norm": 0.21720542013645172, "learning_rate": 1e-05, "loss": 1.0163, "step": 33820 }, { "epoch": 29.96014171833481, "grad_norm": 0.21275512874126434, "learning_rate": 1e-05, "loss": 0.922, "step": 33825 }, { "epoch": 29.964570416297608, "grad_norm": 0.21496634185314178, "learning_rate": 1e-05, "loss": 0.9549, "step": 33830 }, { "epoch": 29.968999114260406, "grad_norm": 0.29388946294784546, "learning_rate": 1e-05, "loss": 1.0267, "step": 33835 }, { "epoch": 29.973427812223207, "grad_norm": 0.2560620903968811, "learning_rate": 1e-05, "loss": 1.0102, "step": 33840 }, { "epoch": 29.977856510186005, "grad_norm": 0.2432190179824829, "learning_rate": 1e-05, "loss": 0.9465, "step": 33845 }, { "epoch": 29.982285208148806, "grad_norm": 0.2676409184932709, "learning_rate": 1e-05, "loss": 0.96, "step": 33850 }, { "epoch": 29.986713906111603, "grad_norm": 0.20951853692531586, "learning_rate": 1e-05, "loss": 0.981, "step": 33855 }, { "epoch": 29.9911426040744, "grad_norm": 0.2243216186761856, "learning_rate": 1e-05, "loss": 0.9482, "step": 33860 }, { "epoch": 29.995571302037202, "grad_norm": 0.23263275623321533, "learning_rate": 1e-05, "loss": 0.9685, "step": 33865 }, { "epoch": 30.0, "grad_norm": 0.22279272973537445, "learning_rate": 1e-05, "loss": 1.0098, "step": 33870 }, { "epoch": 30.004428697962798, "grad_norm": 0.2128625214099884, "learning_rate": 1e-05, "loss": 1.0005, "step": 33875 }, { "epoch": 30.0088573959256, "grad_norm": 0.24543286859989166, "learning_rate": 1e-05, "loss": 0.9487, "step": 33880 }, { "epoch": 30.013286093888397, "grad_norm": 0.21484240889549255, "learning_rate": 1e-05, "loss": 0.9776, "step": 33885 }, { "epoch": 30.017714791851194, "grad_norm": 0.2522362172603607, "learning_rate": 1e-05, "loss": 0.9827, "step": 33890 }, { "epoch": 30.022143489813995, "grad_norm": 0.20335379242897034, "learning_rate": 1e-05, "loss": 0.913, "step": 33895 }, { "epoch": 30.026572187776793, "grad_norm": 0.27319905161857605, "learning_rate": 1e-05, "loss": 0.9864, "step": 33900 }, { "epoch": 30.031000885739594, "grad_norm": 0.24649815261363983, "learning_rate": 1e-05, "loss": 0.9693, "step": 33905 }, { "epoch": 30.035429583702392, "grad_norm": 0.2384646236896515, "learning_rate": 1e-05, "loss": 0.9643, "step": 33910 }, { "epoch": 30.03985828166519, "grad_norm": 0.24109268188476562, "learning_rate": 1e-05, "loss": 0.9584, "step": 33915 }, { "epoch": 30.04428697962799, "grad_norm": 0.22765232622623444, "learning_rate": 1e-05, "loss": 0.9702, "step": 33920 }, { "epoch": 30.04871567759079, "grad_norm": 0.23042981326580048, "learning_rate": 1e-05, "loss": 0.992, "step": 33925 }, { "epoch": 30.053144375553586, "grad_norm": 0.22180402278900146, "learning_rate": 1e-05, "loss": 1.0232, "step": 33930 }, { "epoch": 30.057573073516387, "grad_norm": 0.2460329681634903, "learning_rate": 1e-05, "loss": 0.9983, "step": 33935 }, { "epoch": 30.062001771479185, "grad_norm": 0.2586100399494171, "learning_rate": 1e-05, "loss": 0.9544, "step": 33940 }, { "epoch": 30.066430469441983, "grad_norm": 0.2281045913696289, "learning_rate": 1e-05, "loss": 0.9406, "step": 33945 }, { "epoch": 30.070859167404784, "grad_norm": 0.31259095668792725, "learning_rate": 1e-05, "loss": 0.9483, "step": 33950 }, { "epoch": 30.07528786536758, "grad_norm": 0.29469239711761475, "learning_rate": 1e-05, "loss": 0.9715, "step": 33955 }, { "epoch": 30.07971656333038, "grad_norm": 0.2334946095943451, "learning_rate": 1e-05, "loss": 0.9525, "step": 33960 }, { "epoch": 30.08414526129318, "grad_norm": 0.2896636426448822, "learning_rate": 1e-05, "loss": 0.978, "step": 33965 }, { "epoch": 30.088573959255978, "grad_norm": 0.24355122447013855, "learning_rate": 1e-05, "loss": 0.9804, "step": 33970 }, { "epoch": 30.09300265721878, "grad_norm": 0.26359230279922485, "learning_rate": 1e-05, "loss": 1.0115, "step": 33975 }, { "epoch": 30.097431355181577, "grad_norm": 0.27373090386390686, "learning_rate": 1e-05, "loss": 0.9421, "step": 33980 }, { "epoch": 30.101860053144375, "grad_norm": 0.21663948893547058, "learning_rate": 1e-05, "loss": 0.978, "step": 33985 }, { "epoch": 30.106288751107176, "grad_norm": 0.22245050966739655, "learning_rate": 1e-05, "loss": 0.9826, "step": 33990 }, { "epoch": 30.110717449069973, "grad_norm": 0.22601357102394104, "learning_rate": 1e-05, "loss": 1.0131, "step": 33995 }, { "epoch": 30.11514614703277, "grad_norm": 0.20984213054180145, "learning_rate": 1e-05, "loss": 0.9912, "step": 34000 }, { "epoch": 30.119574844995572, "grad_norm": 0.21778535842895508, "learning_rate": 1e-05, "loss": 0.9497, "step": 34005 }, { "epoch": 30.12400354295837, "grad_norm": 0.2527742385864258, "learning_rate": 1e-05, "loss": 0.9558, "step": 34010 }, { "epoch": 30.128432240921168, "grad_norm": 0.24183496832847595, "learning_rate": 1e-05, "loss": 1.0084, "step": 34015 }, { "epoch": 30.13286093888397, "grad_norm": 0.2694372236728668, "learning_rate": 1e-05, "loss": 1.0414, "step": 34020 }, { "epoch": 30.137289636846766, "grad_norm": 0.2790459990501404, "learning_rate": 1e-05, "loss": 0.9535, "step": 34025 }, { "epoch": 30.141718334809568, "grad_norm": 0.3224250376224518, "learning_rate": 1e-05, "loss": 0.9917, "step": 34030 }, { "epoch": 30.146147032772365, "grad_norm": 0.28082728385925293, "learning_rate": 1e-05, "loss": 1.0035, "step": 34035 }, { "epoch": 30.150575730735163, "grad_norm": 0.25160282850265503, "learning_rate": 1e-05, "loss": 1.0074, "step": 34040 }, { "epoch": 30.155004428697964, "grad_norm": 0.2314085215330124, "learning_rate": 1e-05, "loss": 1.0014, "step": 34045 }, { "epoch": 30.159433126660762, "grad_norm": 0.21914249658584595, "learning_rate": 1e-05, "loss": 0.9451, "step": 34050 }, { "epoch": 30.16386182462356, "grad_norm": 0.25499778985977173, "learning_rate": 1e-05, "loss": 0.938, "step": 34055 }, { "epoch": 30.16829052258636, "grad_norm": 0.2513638138771057, "learning_rate": 1e-05, "loss": 1.0035, "step": 34060 }, { "epoch": 30.17271922054916, "grad_norm": 0.23948869109153748, "learning_rate": 1e-05, "loss": 0.9562, "step": 34065 }, { "epoch": 30.177147918511956, "grad_norm": 0.24730654060840607, "learning_rate": 1e-05, "loss": 0.9733, "step": 34070 }, { "epoch": 30.181576616474757, "grad_norm": 0.22750549018383026, "learning_rate": 1e-05, "loss": 0.9951, "step": 34075 }, { "epoch": 30.186005314437555, "grad_norm": 0.24042083323001862, "learning_rate": 1e-05, "loss": 1.0371, "step": 34080 }, { "epoch": 30.190434012400353, "grad_norm": 0.26706311106681824, "learning_rate": 1e-05, "loss": 0.9885, "step": 34085 }, { "epoch": 30.194862710363154, "grad_norm": 0.2427464723587036, "learning_rate": 1e-05, "loss": 0.9579, "step": 34090 }, { "epoch": 30.19929140832595, "grad_norm": 0.21277616918087006, "learning_rate": 1e-05, "loss": 0.9674, "step": 34095 }, { "epoch": 30.203720106288753, "grad_norm": 0.26772087812423706, "learning_rate": 1e-05, "loss": 0.948, "step": 34100 }, { "epoch": 30.20814880425155, "grad_norm": 0.3057091534137726, "learning_rate": 1e-05, "loss": 0.9905, "step": 34105 }, { "epoch": 30.212577502214348, "grad_norm": 0.21423472464084625, "learning_rate": 1e-05, "loss": 0.9564, "step": 34110 }, { "epoch": 30.21700620017715, "grad_norm": 0.23690512776374817, "learning_rate": 1e-05, "loss": 0.9934, "step": 34115 }, { "epoch": 30.221434898139947, "grad_norm": 0.22269457578659058, "learning_rate": 1e-05, "loss": 0.9611, "step": 34120 }, { "epoch": 30.225863596102744, "grad_norm": 0.2575618326663971, "learning_rate": 1e-05, "loss": 0.9838, "step": 34125 }, { "epoch": 30.230292294065546, "grad_norm": 0.29142439365386963, "learning_rate": 1e-05, "loss": 1.029, "step": 34130 }, { "epoch": 30.234720992028343, "grad_norm": 0.24798934161663055, "learning_rate": 1e-05, "loss": 1.0161, "step": 34135 }, { "epoch": 30.23914968999114, "grad_norm": 0.2384113222360611, "learning_rate": 1e-05, "loss": 0.9892, "step": 34140 }, { "epoch": 30.243578387953942, "grad_norm": 0.24455608427524567, "learning_rate": 1e-05, "loss": 1.0191, "step": 34145 }, { "epoch": 30.24800708591674, "grad_norm": 0.23404699563980103, "learning_rate": 1e-05, "loss": 0.9833, "step": 34150 }, { "epoch": 30.25243578387954, "grad_norm": 0.23720529675483704, "learning_rate": 1e-05, "loss": 0.9301, "step": 34155 }, { "epoch": 30.25686448184234, "grad_norm": 0.2367081642150879, "learning_rate": 1e-05, "loss": 0.9787, "step": 34160 }, { "epoch": 30.261293179805136, "grad_norm": 0.2246650755405426, "learning_rate": 1e-05, "loss": 0.9605, "step": 34165 }, { "epoch": 30.265721877767938, "grad_norm": 0.24377083778381348, "learning_rate": 1e-05, "loss": 0.9783, "step": 34170 }, { "epoch": 30.270150575730735, "grad_norm": 0.2659006714820862, "learning_rate": 1e-05, "loss": 0.9754, "step": 34175 }, { "epoch": 30.274579273693533, "grad_norm": 0.23865897953510284, "learning_rate": 1e-05, "loss": 0.9654, "step": 34180 }, { "epoch": 30.279007971656334, "grad_norm": 0.32606515288352966, "learning_rate": 1e-05, "loss": 0.9419, "step": 34185 }, { "epoch": 30.28343666961913, "grad_norm": 0.2819402813911438, "learning_rate": 1e-05, "loss": 0.9895, "step": 34190 }, { "epoch": 30.28786536758193, "grad_norm": 0.23415009677410126, "learning_rate": 1e-05, "loss": 0.9981, "step": 34195 }, { "epoch": 30.29229406554473, "grad_norm": 0.22377543151378632, "learning_rate": 1e-05, "loss": 0.985, "step": 34200 }, { "epoch": 30.29672276350753, "grad_norm": 0.2716352045536041, "learning_rate": 1e-05, "loss": 1.0293, "step": 34205 }, { "epoch": 30.30115146147033, "grad_norm": 0.2506796717643738, "learning_rate": 1e-05, "loss": 0.9604, "step": 34210 }, { "epoch": 30.305580159433127, "grad_norm": 0.245045006275177, "learning_rate": 1e-05, "loss": 0.9709, "step": 34215 }, { "epoch": 30.310008857395925, "grad_norm": 0.22532270848751068, "learning_rate": 1e-05, "loss": 1.013, "step": 34220 }, { "epoch": 30.314437555358726, "grad_norm": 0.2955477833747864, "learning_rate": 1e-05, "loss": 0.9851, "step": 34225 }, { "epoch": 30.318866253321524, "grad_norm": 0.3092818856239319, "learning_rate": 1e-05, "loss": 0.997, "step": 34230 }, { "epoch": 30.32329495128432, "grad_norm": 0.22338555753231049, "learning_rate": 1e-05, "loss": 0.9736, "step": 34235 }, { "epoch": 30.327723649247122, "grad_norm": 0.22364799678325653, "learning_rate": 1e-05, "loss": 0.9232, "step": 34240 }, { "epoch": 30.33215234720992, "grad_norm": 0.23154060542583466, "learning_rate": 1e-05, "loss": 0.9857, "step": 34245 }, { "epoch": 30.336581045172718, "grad_norm": 0.240864560008049, "learning_rate": 1e-05, "loss": 0.9418, "step": 34250 }, { "epoch": 30.34100974313552, "grad_norm": 0.24423836171627045, "learning_rate": 1e-05, "loss": 1.0136, "step": 34255 }, { "epoch": 30.345438441098317, "grad_norm": 0.24207143485546112, "learning_rate": 1e-05, "loss": 0.9293, "step": 34260 }, { "epoch": 30.349867139061114, "grad_norm": 0.29829666018486023, "learning_rate": 1e-05, "loss": 0.9603, "step": 34265 }, { "epoch": 30.354295837023916, "grad_norm": 0.2518916726112366, "learning_rate": 1e-05, "loss": 0.9491, "step": 34270 }, { "epoch": 30.358724534986713, "grad_norm": 0.2573820650577545, "learning_rate": 1e-05, "loss": 0.9808, "step": 34275 }, { "epoch": 30.363153232949514, "grad_norm": 0.26762500405311584, "learning_rate": 1e-05, "loss": 0.9806, "step": 34280 }, { "epoch": 30.367581930912312, "grad_norm": 0.2401001900434494, "learning_rate": 1e-05, "loss": 1.0604, "step": 34285 }, { "epoch": 30.37201062887511, "grad_norm": 0.2129121869802475, "learning_rate": 1e-05, "loss": 0.9803, "step": 34290 }, { "epoch": 30.37643932683791, "grad_norm": 0.2617007791996002, "learning_rate": 1e-05, "loss": 0.9419, "step": 34295 }, { "epoch": 30.38086802480071, "grad_norm": 0.282577782869339, "learning_rate": 1e-05, "loss": 0.9468, "step": 34300 }, { "epoch": 30.385296722763506, "grad_norm": 0.28779372572898865, "learning_rate": 1e-05, "loss": 0.9751, "step": 34305 }, { "epoch": 30.389725420726307, "grad_norm": 0.2623580992221832, "learning_rate": 1e-05, "loss": 0.9404, "step": 34310 }, { "epoch": 30.394154118689105, "grad_norm": 0.32232269644737244, "learning_rate": 1e-05, "loss": 0.9717, "step": 34315 }, { "epoch": 30.398582816651903, "grad_norm": 0.2766963839530945, "learning_rate": 1e-05, "loss": 0.9787, "step": 34320 }, { "epoch": 30.403011514614704, "grad_norm": 0.20745114982128143, "learning_rate": 1e-05, "loss": 0.9975, "step": 34325 }, { "epoch": 30.4074402125775, "grad_norm": 0.20107071101665497, "learning_rate": 1e-05, "loss": 0.9834, "step": 34330 }, { "epoch": 30.411868910540303, "grad_norm": 0.2113850861787796, "learning_rate": 1e-05, "loss": 0.9894, "step": 34335 }, { "epoch": 30.4162976085031, "grad_norm": 0.2446461021900177, "learning_rate": 1e-05, "loss": 0.9868, "step": 34340 }, { "epoch": 30.420726306465898, "grad_norm": 0.24066166579723358, "learning_rate": 1e-05, "loss": 0.9505, "step": 34345 }, { "epoch": 30.4251550044287, "grad_norm": 0.23494704067707062, "learning_rate": 1e-05, "loss": 0.9756, "step": 34350 }, { "epoch": 30.429583702391497, "grad_norm": 0.23799870908260345, "learning_rate": 1e-05, "loss": 0.9923, "step": 34355 }, { "epoch": 30.434012400354295, "grad_norm": 0.28301307559013367, "learning_rate": 1e-05, "loss": 1.0245, "step": 34360 }, { "epoch": 30.438441098317096, "grad_norm": 0.2635035514831543, "learning_rate": 1e-05, "loss": 0.9291, "step": 34365 }, { "epoch": 30.442869796279894, "grad_norm": 0.24691958725452423, "learning_rate": 1e-05, "loss": 0.9964, "step": 34370 }, { "epoch": 30.44729849424269, "grad_norm": 0.22498434782028198, "learning_rate": 1e-05, "loss": 1.0095, "step": 34375 }, { "epoch": 30.451727192205492, "grad_norm": 0.2796193063259125, "learning_rate": 1e-05, "loss": 1.0131, "step": 34380 }, { "epoch": 30.45615589016829, "grad_norm": 0.2543242275714874, "learning_rate": 1e-05, "loss": 1.0287, "step": 34385 }, { "epoch": 30.460584588131088, "grad_norm": 0.26206764578819275, "learning_rate": 1e-05, "loss": 1.014, "step": 34390 }, { "epoch": 30.46501328609389, "grad_norm": 0.24417613446712494, "learning_rate": 1e-05, "loss": 0.9712, "step": 34395 }, { "epoch": 30.469441984056687, "grad_norm": 0.22031310200691223, "learning_rate": 1e-05, "loss": 0.9705, "step": 34400 }, { "epoch": 30.473870682019488, "grad_norm": 0.22819258272647858, "learning_rate": 1e-05, "loss": 0.9323, "step": 34405 }, { "epoch": 30.478299379982285, "grad_norm": 0.28085991740226746, "learning_rate": 1e-05, "loss": 0.9287, "step": 34410 }, { "epoch": 30.482728077945083, "grad_norm": 0.2402373105287552, "learning_rate": 1e-05, "loss": 0.9682, "step": 34415 }, { "epoch": 30.487156775907884, "grad_norm": 0.2271738499403, "learning_rate": 1e-05, "loss": 0.9928, "step": 34420 }, { "epoch": 30.491585473870682, "grad_norm": 0.2838076651096344, "learning_rate": 1e-05, "loss": 0.9757, "step": 34425 }, { "epoch": 30.49601417183348, "grad_norm": 0.21655474603176117, "learning_rate": 1e-05, "loss": 1.0033, "step": 34430 }, { "epoch": 30.50044286979628, "grad_norm": 0.22896727919578552, "learning_rate": 1e-05, "loss": 0.977, "step": 34435 }, { "epoch": 30.50487156775908, "grad_norm": 0.28526267409324646, "learning_rate": 1e-05, "loss": 0.988, "step": 34440 }, { "epoch": 30.509300265721876, "grad_norm": 0.26764383912086487, "learning_rate": 1e-05, "loss": 0.9585, "step": 34445 }, { "epoch": 30.513728963684677, "grad_norm": 0.24766159057617188, "learning_rate": 1e-05, "loss": 0.9654, "step": 34450 }, { "epoch": 30.518157661647475, "grad_norm": 0.2382461130619049, "learning_rate": 1e-05, "loss": 0.9616, "step": 34455 }, { "epoch": 30.522586359610276, "grad_norm": 0.2269027978181839, "learning_rate": 1e-05, "loss": 1.0046, "step": 34460 }, { "epoch": 30.527015057573074, "grad_norm": 0.2292771190404892, "learning_rate": 1e-05, "loss": 0.9939, "step": 34465 }, { "epoch": 30.53144375553587, "grad_norm": 0.2514874041080475, "learning_rate": 1e-05, "loss": 1.0244, "step": 34470 }, { "epoch": 30.535872453498673, "grad_norm": 0.2683906555175781, "learning_rate": 1e-05, "loss": 1.0112, "step": 34475 }, { "epoch": 30.54030115146147, "grad_norm": 0.2551718056201935, "learning_rate": 1e-05, "loss": 0.9302, "step": 34480 }, { "epoch": 30.544729849424268, "grad_norm": 0.2350943386554718, "learning_rate": 1e-05, "loss": 0.9682, "step": 34485 }, { "epoch": 30.54915854738707, "grad_norm": 0.2871323823928833, "learning_rate": 1e-05, "loss": 0.9995, "step": 34490 }, { "epoch": 30.553587245349867, "grad_norm": 0.23525409400463104, "learning_rate": 1e-05, "loss": 0.9466, "step": 34495 }, { "epoch": 30.558015943312665, "grad_norm": 0.21111206710338593, "learning_rate": 1e-05, "loss": 0.8939, "step": 34500 }, { "epoch": 30.562444641275466, "grad_norm": 0.2725718319416046, "learning_rate": 1e-05, "loss": 0.9782, "step": 34505 }, { "epoch": 30.566873339238263, "grad_norm": 0.21595382690429688, "learning_rate": 1e-05, "loss": 1.0037, "step": 34510 }, { "epoch": 30.571302037201065, "grad_norm": 0.3127965033054352, "learning_rate": 1e-05, "loss": 0.8932, "step": 34515 }, { "epoch": 30.575730735163862, "grad_norm": 0.2232382446527481, "learning_rate": 1e-05, "loss": 1.0382, "step": 34520 }, { "epoch": 30.58015943312666, "grad_norm": 0.25699564814567566, "learning_rate": 1e-05, "loss": 1.0486, "step": 34525 }, { "epoch": 30.58458813108946, "grad_norm": 0.32374072074890137, "learning_rate": 1e-05, "loss": 0.9624, "step": 34530 }, { "epoch": 30.58901682905226, "grad_norm": 0.31563764810562134, "learning_rate": 1e-05, "loss": 0.9816, "step": 34535 }, { "epoch": 30.593445527015056, "grad_norm": 0.2529101073741913, "learning_rate": 1e-05, "loss": 1.0062, "step": 34540 }, { "epoch": 30.597874224977858, "grad_norm": 0.3023061752319336, "learning_rate": 1e-05, "loss": 0.9827, "step": 34545 }, { "epoch": 30.602302922940655, "grad_norm": 0.25037938356399536, "learning_rate": 1e-05, "loss": 1.0151, "step": 34550 }, { "epoch": 30.606731620903453, "grad_norm": 0.2639210522174835, "learning_rate": 1e-05, "loss": 0.9775, "step": 34555 }, { "epoch": 30.611160318866254, "grad_norm": 0.22708819806575775, "learning_rate": 1e-05, "loss": 0.9914, "step": 34560 }, { "epoch": 30.615589016829052, "grad_norm": 0.26096218824386597, "learning_rate": 1e-05, "loss": 0.9653, "step": 34565 }, { "epoch": 30.62001771479185, "grad_norm": 0.2173214554786682, "learning_rate": 1e-05, "loss": 0.982, "step": 34570 }, { "epoch": 30.62444641275465, "grad_norm": 0.2637869715690613, "learning_rate": 1e-05, "loss": 0.9953, "step": 34575 }, { "epoch": 30.62887511071745, "grad_norm": 0.2391861230134964, "learning_rate": 1e-05, "loss": 1.0021, "step": 34580 }, { "epoch": 30.63330380868025, "grad_norm": 0.22723832726478577, "learning_rate": 1e-05, "loss": 0.9489, "step": 34585 }, { "epoch": 30.637732506643047, "grad_norm": 0.2361384779214859, "learning_rate": 1e-05, "loss": 1.0161, "step": 34590 }, { "epoch": 30.642161204605845, "grad_norm": 0.2518586218357086, "learning_rate": 1e-05, "loss": 1.0011, "step": 34595 }, { "epoch": 30.646589902568646, "grad_norm": 0.2275327891111374, "learning_rate": 1e-05, "loss": 0.9885, "step": 34600 }, { "epoch": 30.651018600531444, "grad_norm": 0.2624276876449585, "learning_rate": 1e-05, "loss": 0.9689, "step": 34605 }, { "epoch": 30.65544729849424, "grad_norm": 0.2679789662361145, "learning_rate": 1e-05, "loss": 0.9616, "step": 34610 }, { "epoch": 30.659875996457043, "grad_norm": 0.2824432849884033, "learning_rate": 1e-05, "loss": 0.9989, "step": 34615 }, { "epoch": 30.66430469441984, "grad_norm": 0.2547994554042816, "learning_rate": 1e-05, "loss": 0.972, "step": 34620 }, { "epoch": 30.668733392382638, "grad_norm": 0.20962588489055634, "learning_rate": 1e-05, "loss": 1.0169, "step": 34625 }, { "epoch": 30.67316209034544, "grad_norm": 0.22719013690948486, "learning_rate": 1e-05, "loss": 1.0357, "step": 34630 }, { "epoch": 30.677590788308237, "grad_norm": 0.2688770592212677, "learning_rate": 1e-05, "loss": 0.9581, "step": 34635 }, { "epoch": 30.682019486271038, "grad_norm": 0.22420749068260193, "learning_rate": 1e-05, "loss": 1.0203, "step": 34640 }, { "epoch": 30.686448184233836, "grad_norm": 0.2644655704498291, "learning_rate": 1e-05, "loss": 0.9657, "step": 34645 }, { "epoch": 30.690876882196633, "grad_norm": 0.23710519075393677, "learning_rate": 1e-05, "loss": 0.9744, "step": 34650 }, { "epoch": 30.695305580159435, "grad_norm": 0.2559422552585602, "learning_rate": 1e-05, "loss": 1.0098, "step": 34655 }, { "epoch": 30.699734278122232, "grad_norm": 0.3059367835521698, "learning_rate": 1e-05, "loss": 0.9884, "step": 34660 }, { "epoch": 30.70416297608503, "grad_norm": 0.2646207809448242, "learning_rate": 1e-05, "loss": 0.9262, "step": 34665 }, { "epoch": 30.70859167404783, "grad_norm": 0.23796968162059784, "learning_rate": 1e-05, "loss": 1.0165, "step": 34670 }, { "epoch": 30.71302037201063, "grad_norm": 0.29492005705833435, "learning_rate": 1e-05, "loss": 0.9322, "step": 34675 }, { "epoch": 30.717449069973426, "grad_norm": 0.24444738030433655, "learning_rate": 1e-05, "loss": 1.0007, "step": 34680 }, { "epoch": 30.721877767936228, "grad_norm": 0.2072303593158722, "learning_rate": 1e-05, "loss": 1.0205, "step": 34685 }, { "epoch": 30.726306465899025, "grad_norm": 0.25287508964538574, "learning_rate": 1e-05, "loss": 0.9912, "step": 34690 }, { "epoch": 30.730735163861823, "grad_norm": 0.28292641043663025, "learning_rate": 1e-05, "loss": 0.9903, "step": 34695 }, { "epoch": 30.735163861824624, "grad_norm": 0.26195114850997925, "learning_rate": 1e-05, "loss": 1.0116, "step": 34700 }, { "epoch": 30.739592559787422, "grad_norm": 0.2798844575881958, "learning_rate": 1e-05, "loss": 0.9976, "step": 34705 }, { "epoch": 30.744021257750223, "grad_norm": 0.25873300433158875, "learning_rate": 1e-05, "loss": 0.9261, "step": 34710 }, { "epoch": 30.74844995571302, "grad_norm": 0.21659459173679352, "learning_rate": 1e-05, "loss": 0.9474, "step": 34715 }, { "epoch": 30.75287865367582, "grad_norm": 0.25124579668045044, "learning_rate": 1e-05, "loss": 0.9659, "step": 34720 }, { "epoch": 30.75730735163862, "grad_norm": 0.2484748363494873, "learning_rate": 1e-05, "loss": 1.0391, "step": 34725 }, { "epoch": 30.761736049601417, "grad_norm": 0.2469712346792221, "learning_rate": 1e-05, "loss": 0.9688, "step": 34730 }, { "epoch": 30.766164747564215, "grad_norm": 0.23426826298236847, "learning_rate": 1e-05, "loss": 1.0116, "step": 34735 }, { "epoch": 30.770593445527016, "grad_norm": 0.25094476342201233, "learning_rate": 1e-05, "loss": 0.9416, "step": 34740 }, { "epoch": 30.775022143489814, "grad_norm": 0.23613260686397552, "learning_rate": 1e-05, "loss": 0.9625, "step": 34745 }, { "epoch": 30.77945084145261, "grad_norm": 0.2904433608055115, "learning_rate": 1e-05, "loss": 1.0095, "step": 34750 }, { "epoch": 30.783879539415413, "grad_norm": 0.2220756858587265, "learning_rate": 1e-05, "loss": 1.0321, "step": 34755 }, { "epoch": 30.78830823737821, "grad_norm": 0.2451700121164322, "learning_rate": 1e-05, "loss": 0.9675, "step": 34760 }, { "epoch": 30.79273693534101, "grad_norm": 0.25129827857017517, "learning_rate": 1e-05, "loss": 1.0066, "step": 34765 }, { "epoch": 30.79716563330381, "grad_norm": 0.2640751004219055, "learning_rate": 1e-05, "loss": 0.9949, "step": 34770 }, { "epoch": 30.801594331266607, "grad_norm": 0.22449372708797455, "learning_rate": 1e-05, "loss": 0.9682, "step": 34775 }, { "epoch": 30.806023029229408, "grad_norm": 0.19808551669120789, "learning_rate": 1e-05, "loss": 0.9534, "step": 34780 }, { "epoch": 30.810451727192206, "grad_norm": 0.24920284748077393, "learning_rate": 1e-05, "loss": 0.984, "step": 34785 }, { "epoch": 30.814880425155003, "grad_norm": 0.26047414541244507, "learning_rate": 1e-05, "loss": 1.0399, "step": 34790 }, { "epoch": 30.819309123117804, "grad_norm": 0.23394909501075745, "learning_rate": 1e-05, "loss": 0.9424, "step": 34795 }, { "epoch": 30.823737821080602, "grad_norm": 0.2258889526128769, "learning_rate": 1e-05, "loss": 0.9515, "step": 34800 }, { "epoch": 30.8281665190434, "grad_norm": 0.2383006066083908, "learning_rate": 1e-05, "loss": 1.0085, "step": 34805 }, { "epoch": 30.8325952170062, "grad_norm": 0.2643567621707916, "learning_rate": 1e-05, "loss": 0.9203, "step": 34810 }, { "epoch": 30.837023914969, "grad_norm": 0.22734931111335754, "learning_rate": 1e-05, "loss": 0.9908, "step": 34815 }, { "epoch": 30.841452612931796, "grad_norm": 0.25397050380706787, "learning_rate": 1e-05, "loss": 0.94, "step": 34820 }, { "epoch": 30.845881310894598, "grad_norm": 0.27157706022262573, "learning_rate": 1e-05, "loss": 1.0197, "step": 34825 }, { "epoch": 30.850310008857395, "grad_norm": 0.2450697124004364, "learning_rate": 1e-05, "loss": 0.9995, "step": 34830 }, { "epoch": 30.854738706820196, "grad_norm": 0.21117836236953735, "learning_rate": 1e-05, "loss": 0.9675, "step": 34835 }, { "epoch": 30.859167404782994, "grad_norm": 0.2756665349006653, "learning_rate": 1e-05, "loss": 1.0056, "step": 34840 }, { "epoch": 30.86359610274579, "grad_norm": 0.24095040559768677, "learning_rate": 1e-05, "loss": 1.0084, "step": 34845 }, { "epoch": 30.868024800708593, "grad_norm": 0.2671302258968353, "learning_rate": 1e-05, "loss": 0.9723, "step": 34850 }, { "epoch": 30.87245349867139, "grad_norm": 0.22376534342765808, "learning_rate": 1e-05, "loss": 0.9886, "step": 34855 }, { "epoch": 30.876882196634188, "grad_norm": 0.27429595589637756, "learning_rate": 1e-05, "loss": 0.985, "step": 34860 }, { "epoch": 30.88131089459699, "grad_norm": 0.24483215808868408, "learning_rate": 1e-05, "loss": 0.924, "step": 34865 }, { "epoch": 30.885739592559787, "grad_norm": 0.2594766318798065, "learning_rate": 1e-05, "loss": 1.0141, "step": 34870 }, { "epoch": 30.890168290522585, "grad_norm": 0.22799572348594666, "learning_rate": 1e-05, "loss": 0.9711, "step": 34875 }, { "epoch": 30.894596988485386, "grad_norm": 0.22435466945171356, "learning_rate": 1e-05, "loss": 0.9643, "step": 34880 }, { "epoch": 30.899025686448184, "grad_norm": 0.2150404304265976, "learning_rate": 1e-05, "loss": 0.9505, "step": 34885 }, { "epoch": 30.903454384410985, "grad_norm": 0.27419155836105347, "learning_rate": 1e-05, "loss": 0.989, "step": 34890 }, { "epoch": 30.907883082373782, "grad_norm": 0.2596563398838043, "learning_rate": 1e-05, "loss": 1.0063, "step": 34895 }, { "epoch": 30.91231178033658, "grad_norm": 0.28533440828323364, "learning_rate": 1e-05, "loss": 1.0153, "step": 34900 }, { "epoch": 30.91674047829938, "grad_norm": 0.2412666231393814, "learning_rate": 1e-05, "loss": 0.9844, "step": 34905 }, { "epoch": 30.92116917626218, "grad_norm": 0.22542257606983185, "learning_rate": 1e-05, "loss": 1.0391, "step": 34910 }, { "epoch": 30.925597874224977, "grad_norm": 0.22069807350635529, "learning_rate": 1e-05, "loss": 0.9993, "step": 34915 }, { "epoch": 30.930026572187778, "grad_norm": 0.24135540425777435, "learning_rate": 1e-05, "loss": 0.945, "step": 34920 }, { "epoch": 30.934455270150576, "grad_norm": 0.2341495156288147, "learning_rate": 1e-05, "loss": 1.0268, "step": 34925 }, { "epoch": 30.938883968113373, "grad_norm": 0.22325782477855682, "learning_rate": 1e-05, "loss": 0.9238, "step": 34930 }, { "epoch": 30.943312666076174, "grad_norm": 0.25892776250839233, "learning_rate": 1e-05, "loss": 0.9367, "step": 34935 }, { "epoch": 30.947741364038972, "grad_norm": 0.2588663101196289, "learning_rate": 1e-05, "loss": 1.0342, "step": 34940 }, { "epoch": 30.95217006200177, "grad_norm": 0.2352781891822815, "learning_rate": 1e-05, "loss": 1.0213, "step": 34945 }, { "epoch": 30.95659875996457, "grad_norm": 0.23250240087509155, "learning_rate": 1e-05, "loss": 0.9737, "step": 34950 }, { "epoch": 30.96102745792737, "grad_norm": 0.2730807662010193, "learning_rate": 1e-05, "loss": 0.9501, "step": 34955 }, { "epoch": 30.96545615589017, "grad_norm": 0.23375511169433594, "learning_rate": 1e-05, "loss": 1.0445, "step": 34960 }, { "epoch": 30.969884853852967, "grad_norm": 0.2920178174972534, "learning_rate": 1e-05, "loss": 0.9499, "step": 34965 }, { "epoch": 30.974313551815765, "grad_norm": 0.2189139574766159, "learning_rate": 1e-05, "loss": 1.0159, "step": 34970 }, { "epoch": 30.978742249778566, "grad_norm": 0.27846527099609375, "learning_rate": 1e-05, "loss": 1.0064, "step": 34975 }, { "epoch": 30.983170947741364, "grad_norm": 0.20329032838344574, "learning_rate": 1e-05, "loss": 0.9942, "step": 34980 }, { "epoch": 30.98759964570416, "grad_norm": 0.2962951958179474, "learning_rate": 1e-05, "loss": 0.9802, "step": 34985 }, { "epoch": 30.992028343666963, "grad_norm": 0.27440345287323, "learning_rate": 1e-05, "loss": 1.0102, "step": 34990 }, { "epoch": 30.99645704162976, "grad_norm": 0.28769195079803467, "learning_rate": 1e-05, "loss": 0.9635, "step": 34995 }, { "epoch": 31.000885739592558, "grad_norm": 0.2570668160915375, "learning_rate": 1e-05, "loss": 0.9861, "step": 35000 }, { "epoch": 31.00531443755536, "grad_norm": 0.24807776510715485, "learning_rate": 1e-05, "loss": 0.9723, "step": 35005 }, { "epoch": 31.009743135518157, "grad_norm": 0.2410086691379547, "learning_rate": 1e-05, "loss": 0.9906, "step": 35010 }, { "epoch": 31.014171833480958, "grad_norm": 0.23655954003334045, "learning_rate": 1e-05, "loss": 0.9211, "step": 35015 }, { "epoch": 31.018600531443756, "grad_norm": 0.27551335096359253, "learning_rate": 1e-05, "loss": 1.013, "step": 35020 }, { "epoch": 31.023029229406553, "grad_norm": 0.2707156538963318, "learning_rate": 1e-05, "loss": 0.9703, "step": 35025 }, { "epoch": 31.027457927369355, "grad_norm": 0.22851836681365967, "learning_rate": 1e-05, "loss": 0.9685, "step": 35030 }, { "epoch": 31.031886625332152, "grad_norm": 0.31851282715797424, "learning_rate": 1e-05, "loss": 0.9636, "step": 35035 }, { "epoch": 31.03631532329495, "grad_norm": 0.2293817549943924, "learning_rate": 1e-05, "loss": 0.9883, "step": 35040 }, { "epoch": 31.04074402125775, "grad_norm": 0.24714027345180511, "learning_rate": 1e-05, "loss": 1.0144, "step": 35045 }, { "epoch": 31.04517271922055, "grad_norm": 0.2505913972854614, "learning_rate": 1e-05, "loss": 0.9867, "step": 35050 }, { "epoch": 31.049601417183347, "grad_norm": 0.275619238615036, "learning_rate": 1e-05, "loss": 0.9675, "step": 35055 }, { "epoch": 31.054030115146148, "grad_norm": 0.24901467561721802, "learning_rate": 1e-05, "loss": 0.9939, "step": 35060 }, { "epoch": 31.058458813108945, "grad_norm": 0.2166486233472824, "learning_rate": 1e-05, "loss": 1.0088, "step": 35065 }, { "epoch": 31.062887511071747, "grad_norm": 0.20286552608013153, "learning_rate": 1e-05, "loss": 1.0002, "step": 35070 }, { "epoch": 31.067316209034544, "grad_norm": 0.2698226571083069, "learning_rate": 1e-05, "loss": 0.9802, "step": 35075 }, { "epoch": 31.071744906997342, "grad_norm": 0.2966356873512268, "learning_rate": 1e-05, "loss": 0.9705, "step": 35080 }, { "epoch": 31.076173604960143, "grad_norm": 0.21147166192531586, "learning_rate": 1e-05, "loss": 0.9823, "step": 35085 }, { "epoch": 31.08060230292294, "grad_norm": 0.24268680810928345, "learning_rate": 1e-05, "loss": 0.9852, "step": 35090 }, { "epoch": 31.08503100088574, "grad_norm": 0.2531626522541046, "learning_rate": 1e-05, "loss": 1.0475, "step": 35095 }, { "epoch": 31.08945969884854, "grad_norm": 0.25439777970314026, "learning_rate": 1e-05, "loss": 1.0354, "step": 35100 }, { "epoch": 31.093888396811337, "grad_norm": 0.4166276752948761, "learning_rate": 1e-05, "loss": 0.9822, "step": 35105 }, { "epoch": 31.098317094774135, "grad_norm": 0.2791493237018585, "learning_rate": 1e-05, "loss": 1.0185, "step": 35110 }, { "epoch": 31.102745792736936, "grad_norm": 0.2618753910064697, "learning_rate": 1e-05, "loss": 0.984, "step": 35115 }, { "epoch": 31.107174490699734, "grad_norm": 0.236869677901268, "learning_rate": 1e-05, "loss": 0.992, "step": 35120 }, { "epoch": 31.11160318866253, "grad_norm": 0.24910107254981995, "learning_rate": 1e-05, "loss": 0.9489, "step": 35125 }, { "epoch": 31.116031886625333, "grad_norm": 0.2720644474029541, "learning_rate": 1e-05, "loss": 0.9764, "step": 35130 }, { "epoch": 31.12046058458813, "grad_norm": 0.2633616626262665, "learning_rate": 1e-05, "loss": 1.01, "step": 35135 }, { "epoch": 31.12488928255093, "grad_norm": 0.288844496011734, "learning_rate": 1e-05, "loss": 1.0105, "step": 35140 }, { "epoch": 31.12931798051373, "grad_norm": 0.2554263770580292, "learning_rate": 1e-05, "loss": 0.9311, "step": 35145 }, { "epoch": 31.133746678476527, "grad_norm": 0.28100958466529846, "learning_rate": 1e-05, "loss": 0.9428, "step": 35150 }, { "epoch": 31.138175376439328, "grad_norm": 0.22852961719036102, "learning_rate": 1e-05, "loss": 0.9818, "step": 35155 }, { "epoch": 31.142604074402126, "grad_norm": 0.21116362512111664, "learning_rate": 1e-05, "loss": 0.9068, "step": 35160 }, { "epoch": 31.147032772364923, "grad_norm": 0.2336074411869049, "learning_rate": 1e-05, "loss": 0.9965, "step": 35165 }, { "epoch": 31.151461470327725, "grad_norm": 0.2704808712005615, "learning_rate": 1e-05, "loss": 0.9356, "step": 35170 }, { "epoch": 31.155890168290522, "grad_norm": 0.26952216029167175, "learning_rate": 1e-05, "loss": 0.9855, "step": 35175 }, { "epoch": 31.16031886625332, "grad_norm": 0.26170507073402405, "learning_rate": 1e-05, "loss": 0.965, "step": 35180 }, { "epoch": 31.16474756421612, "grad_norm": 0.282736212015152, "learning_rate": 1e-05, "loss": 1.0215, "step": 35185 }, { "epoch": 31.16917626217892, "grad_norm": 0.23463398218154907, "learning_rate": 1e-05, "loss": 0.9827, "step": 35190 }, { "epoch": 31.17360496014172, "grad_norm": 0.24890385568141937, "learning_rate": 1e-05, "loss": 0.9753, "step": 35195 }, { "epoch": 31.178033658104518, "grad_norm": 0.23202618956565857, "learning_rate": 1e-05, "loss": 1.021, "step": 35200 }, { "epoch": 31.182462356067315, "grad_norm": 0.236739844083786, "learning_rate": 1e-05, "loss": 1.0408, "step": 35205 }, { "epoch": 31.186891054030117, "grad_norm": 0.29719746112823486, "learning_rate": 1e-05, "loss": 1.0371, "step": 35210 }, { "epoch": 31.191319751992914, "grad_norm": 0.26827743649482727, "learning_rate": 1e-05, "loss": 0.9913, "step": 35215 }, { "epoch": 31.195748449955712, "grad_norm": 0.2626824975013733, "learning_rate": 1e-05, "loss": 0.9523, "step": 35220 }, { "epoch": 31.200177147918513, "grad_norm": 0.2704109251499176, "learning_rate": 1e-05, "loss": 0.9424, "step": 35225 }, { "epoch": 31.20460584588131, "grad_norm": 0.27876201272010803, "learning_rate": 1e-05, "loss": 0.9864, "step": 35230 }, { "epoch": 31.20903454384411, "grad_norm": 0.2840498387813568, "learning_rate": 1e-05, "loss": 0.9282, "step": 35235 }, { "epoch": 31.21346324180691, "grad_norm": 0.214911550283432, "learning_rate": 1e-05, "loss": 0.9174, "step": 35240 }, { "epoch": 31.217891939769707, "grad_norm": 0.25282469391822815, "learning_rate": 1e-05, "loss": 0.9693, "step": 35245 }, { "epoch": 31.22232063773251, "grad_norm": 0.2781233787536621, "learning_rate": 1e-05, "loss": 0.9953, "step": 35250 }, { "epoch": 31.226749335695306, "grad_norm": 0.21406224370002747, "learning_rate": 1e-05, "loss": 1.0258, "step": 35255 }, { "epoch": 31.231178033658104, "grad_norm": 0.22580429911613464, "learning_rate": 1e-05, "loss": 0.9236, "step": 35260 }, { "epoch": 31.235606731620905, "grad_norm": 0.21106700599193573, "learning_rate": 1e-05, "loss": 1.0099, "step": 35265 }, { "epoch": 31.240035429583703, "grad_norm": 0.23357057571411133, "learning_rate": 1e-05, "loss": 1.0046, "step": 35270 }, { "epoch": 31.2444641275465, "grad_norm": 0.24178718030452728, "learning_rate": 1e-05, "loss": 0.9855, "step": 35275 }, { "epoch": 31.2488928255093, "grad_norm": 0.2801498770713806, "learning_rate": 1e-05, "loss": 0.9498, "step": 35280 }, { "epoch": 31.2533215234721, "grad_norm": 0.25350314378738403, "learning_rate": 1e-05, "loss": 0.9737, "step": 35285 }, { "epoch": 31.257750221434897, "grad_norm": 0.225112184882164, "learning_rate": 1e-05, "loss": 0.95, "step": 35290 }, { "epoch": 31.262178919397698, "grad_norm": 0.2511879801750183, "learning_rate": 1e-05, "loss": 1.0109, "step": 35295 }, { "epoch": 31.266607617360496, "grad_norm": 0.28343436121940613, "learning_rate": 1e-05, "loss": 1.0264, "step": 35300 }, { "epoch": 31.271036315323293, "grad_norm": 0.2766794264316559, "learning_rate": 1e-05, "loss": 1.0161, "step": 35305 }, { "epoch": 31.275465013286095, "grad_norm": 0.2446972131729126, "learning_rate": 1e-05, "loss": 0.9976, "step": 35310 }, { "epoch": 31.279893711248892, "grad_norm": 0.25362056493759155, "learning_rate": 1e-05, "loss": 1.03, "step": 35315 }, { "epoch": 31.284322409211693, "grad_norm": 0.21366441249847412, "learning_rate": 1e-05, "loss": 0.9639, "step": 35320 }, { "epoch": 31.28875110717449, "grad_norm": 0.22904399037361145, "learning_rate": 1e-05, "loss": 1.0376, "step": 35325 }, { "epoch": 31.29317980513729, "grad_norm": 0.2796422243118286, "learning_rate": 1e-05, "loss": 0.9879, "step": 35330 }, { "epoch": 31.29760850310009, "grad_norm": 0.2913896143436432, "learning_rate": 1e-05, "loss": 0.9698, "step": 35335 }, { "epoch": 31.302037201062888, "grad_norm": 0.23101086914539337, "learning_rate": 1e-05, "loss": 0.9402, "step": 35340 }, { "epoch": 31.306465899025685, "grad_norm": 0.2066575437784195, "learning_rate": 1e-05, "loss": 0.9313, "step": 35345 }, { "epoch": 31.310894596988486, "grad_norm": 0.21769800782203674, "learning_rate": 1e-05, "loss": 0.9939, "step": 35350 }, { "epoch": 31.315323294951284, "grad_norm": 0.2609650790691376, "learning_rate": 1e-05, "loss": 0.9981, "step": 35355 }, { "epoch": 31.31975199291408, "grad_norm": 0.22938460111618042, "learning_rate": 1e-05, "loss": 1.0124, "step": 35360 }, { "epoch": 31.324180690876883, "grad_norm": 0.24664834141731262, "learning_rate": 1e-05, "loss": 0.9831, "step": 35365 }, { "epoch": 31.32860938883968, "grad_norm": 0.18712028861045837, "learning_rate": 1e-05, "loss": 0.9639, "step": 35370 }, { "epoch": 31.333038086802482, "grad_norm": 0.2431710660457611, "learning_rate": 1e-05, "loss": 0.9907, "step": 35375 }, { "epoch": 31.33746678476528, "grad_norm": 0.23196014761924744, "learning_rate": 1e-05, "loss": 1.02, "step": 35380 }, { "epoch": 31.341895482728077, "grad_norm": 0.2867108881473541, "learning_rate": 1e-05, "loss": 1.0122, "step": 35385 }, { "epoch": 31.34632418069088, "grad_norm": 0.24104748666286469, "learning_rate": 1e-05, "loss": 1.0371, "step": 35390 }, { "epoch": 31.350752878653676, "grad_norm": 0.23968102037906647, "learning_rate": 1e-05, "loss": 0.9475, "step": 35395 }, { "epoch": 31.355181576616474, "grad_norm": 0.23314544558525085, "learning_rate": 1e-05, "loss": 0.9458, "step": 35400 }, { "epoch": 31.359610274579275, "grad_norm": 0.2784644067287445, "learning_rate": 1e-05, "loss": 0.9975, "step": 35405 }, { "epoch": 31.364038972542073, "grad_norm": 0.21619026362895966, "learning_rate": 1e-05, "loss": 0.9192, "step": 35410 }, { "epoch": 31.36846767050487, "grad_norm": 0.23371988534927368, "learning_rate": 1e-05, "loss": 1.0272, "step": 35415 }, { "epoch": 31.37289636846767, "grad_norm": 0.23752902448177338, "learning_rate": 1e-05, "loss": 0.9672, "step": 35420 }, { "epoch": 31.37732506643047, "grad_norm": 0.24812424182891846, "learning_rate": 1e-05, "loss": 0.9847, "step": 35425 }, { "epoch": 31.381753764393267, "grad_norm": 0.22310978174209595, "learning_rate": 1e-05, "loss": 1.0111, "step": 35430 }, { "epoch": 31.386182462356068, "grad_norm": 0.24477465450763702, "learning_rate": 1e-05, "loss": 0.9543, "step": 35435 }, { "epoch": 31.390611160318866, "grad_norm": 0.20999886095523834, "learning_rate": 1e-05, "loss": 0.9895, "step": 35440 }, { "epoch": 31.395039858281667, "grad_norm": 0.22738344967365265, "learning_rate": 1e-05, "loss": 0.9851, "step": 35445 }, { "epoch": 31.399468556244464, "grad_norm": 0.2496766597032547, "learning_rate": 1e-05, "loss": 0.9857, "step": 35450 }, { "epoch": 31.403897254207262, "grad_norm": 0.2513379752635956, "learning_rate": 1e-05, "loss": 0.9245, "step": 35455 }, { "epoch": 31.408325952170063, "grad_norm": 0.22780577838420868, "learning_rate": 1e-05, "loss": 1.0135, "step": 35460 }, { "epoch": 31.41275465013286, "grad_norm": 0.23735111951828003, "learning_rate": 1e-05, "loss": 0.971, "step": 35465 }, { "epoch": 31.41718334809566, "grad_norm": 0.2360881119966507, "learning_rate": 1e-05, "loss": 0.9563, "step": 35470 }, { "epoch": 31.42161204605846, "grad_norm": 0.23891125619411469, "learning_rate": 1e-05, "loss": 0.9771, "step": 35475 }, { "epoch": 31.426040744021257, "grad_norm": 0.24041160941123962, "learning_rate": 1e-05, "loss": 1.0146, "step": 35480 }, { "epoch": 31.430469441984055, "grad_norm": 0.24010957777500153, "learning_rate": 1e-05, "loss": 1.0094, "step": 35485 }, { "epoch": 31.434898139946856, "grad_norm": 0.22992154955863953, "learning_rate": 1e-05, "loss": 1.0264, "step": 35490 }, { "epoch": 31.439326837909654, "grad_norm": 0.3005712330341339, "learning_rate": 1e-05, "loss": 0.9983, "step": 35495 }, { "epoch": 31.443755535872455, "grad_norm": 0.2488047182559967, "learning_rate": 1e-05, "loss": 1.0496, "step": 35500 }, { "epoch": 31.448184233835253, "grad_norm": 0.2128056436777115, "learning_rate": 1e-05, "loss": 0.9567, "step": 35505 }, { "epoch": 31.45261293179805, "grad_norm": 0.22438634932041168, "learning_rate": 1e-05, "loss": 1.0507, "step": 35510 }, { "epoch": 31.45704162976085, "grad_norm": 0.2282673865556717, "learning_rate": 1e-05, "loss": 0.9661, "step": 35515 }, { "epoch": 31.46147032772365, "grad_norm": 0.235970601439476, "learning_rate": 1e-05, "loss": 0.978, "step": 35520 }, { "epoch": 31.465899025686447, "grad_norm": 0.2444128692150116, "learning_rate": 1e-05, "loss": 0.999, "step": 35525 }, { "epoch": 31.47032772364925, "grad_norm": 0.25187110900878906, "learning_rate": 1e-05, "loss": 1.0041, "step": 35530 }, { "epoch": 31.474756421612046, "grad_norm": 0.23714660108089447, "learning_rate": 1e-05, "loss": 0.9609, "step": 35535 }, { "epoch": 31.479185119574844, "grad_norm": 0.27497971057891846, "learning_rate": 1e-05, "loss": 1.0096, "step": 35540 }, { "epoch": 31.483613817537645, "grad_norm": 0.2124537229537964, "learning_rate": 1e-05, "loss": 1.0119, "step": 35545 }, { "epoch": 31.488042515500442, "grad_norm": 0.22905023396015167, "learning_rate": 1e-05, "loss": 1.0189, "step": 35550 }, { "epoch": 31.49247121346324, "grad_norm": 0.3222437798976898, "learning_rate": 1e-05, "loss": 0.9766, "step": 35555 }, { "epoch": 31.49689991142604, "grad_norm": 0.2093300074338913, "learning_rate": 1e-05, "loss": 0.9685, "step": 35560 }, { "epoch": 31.50132860938884, "grad_norm": 0.237563818693161, "learning_rate": 1e-05, "loss": 1.0132, "step": 35565 }, { "epoch": 31.50575730735164, "grad_norm": 0.2434566169977188, "learning_rate": 1e-05, "loss": 0.9966, "step": 35570 }, { "epoch": 31.510186005314438, "grad_norm": 0.2535463571548462, "learning_rate": 1e-05, "loss": 1.0228, "step": 35575 }, { "epoch": 31.514614703277235, "grad_norm": 0.2376113086938858, "learning_rate": 1e-05, "loss": 1.0215, "step": 35580 }, { "epoch": 31.519043401240037, "grad_norm": 0.20796199142932892, "learning_rate": 1e-05, "loss": 0.966, "step": 35585 }, { "epoch": 31.523472099202834, "grad_norm": 0.23710638284683228, "learning_rate": 1e-05, "loss": 1.0014, "step": 35590 }, { "epoch": 31.527900797165632, "grad_norm": 0.2609461545944214, "learning_rate": 1e-05, "loss": 1.0118, "step": 35595 }, { "epoch": 31.532329495128433, "grad_norm": 0.2826554775238037, "learning_rate": 1e-05, "loss": 0.9716, "step": 35600 }, { "epoch": 31.53675819309123, "grad_norm": 0.25112223625183105, "learning_rate": 1e-05, "loss": 0.9479, "step": 35605 }, { "epoch": 31.54118689105403, "grad_norm": 0.2330673485994339, "learning_rate": 1e-05, "loss": 0.9684, "step": 35610 }, { "epoch": 31.54561558901683, "grad_norm": 0.2788885235786438, "learning_rate": 1e-05, "loss": 1.0127, "step": 35615 }, { "epoch": 31.550044286979627, "grad_norm": 0.23976612091064453, "learning_rate": 1e-05, "loss": 1.015, "step": 35620 }, { "epoch": 31.55447298494243, "grad_norm": 0.2367788553237915, "learning_rate": 1e-05, "loss": 1.0051, "step": 35625 }, { "epoch": 31.558901682905226, "grad_norm": 0.27491727471351624, "learning_rate": 1e-05, "loss": 0.9824, "step": 35630 }, { "epoch": 31.563330380868024, "grad_norm": 0.263215035200119, "learning_rate": 1e-05, "loss": 0.9772, "step": 35635 }, { "epoch": 31.567759078830825, "grad_norm": 0.2804422974586487, "learning_rate": 1e-05, "loss": 0.9438, "step": 35640 }, { "epoch": 31.572187776793623, "grad_norm": 0.2390197515487671, "learning_rate": 1e-05, "loss": 0.9525, "step": 35645 }, { "epoch": 31.57661647475642, "grad_norm": 0.26763594150543213, "learning_rate": 1e-05, "loss": 0.9659, "step": 35650 }, { "epoch": 31.58104517271922, "grad_norm": 0.22215326130390167, "learning_rate": 1e-05, "loss": 0.9907, "step": 35655 }, { "epoch": 31.58547387068202, "grad_norm": 0.2609909772872925, "learning_rate": 1e-05, "loss": 0.9569, "step": 35660 }, { "epoch": 31.589902568644817, "grad_norm": 0.20740638673305511, "learning_rate": 1e-05, "loss": 1.0779, "step": 35665 }, { "epoch": 31.594331266607618, "grad_norm": 0.2855990529060364, "learning_rate": 1e-05, "loss": 0.9998, "step": 35670 }, { "epoch": 31.598759964570416, "grad_norm": 0.25453928112983704, "learning_rate": 1e-05, "loss": 0.9769, "step": 35675 }, { "epoch": 31.603188662533213, "grad_norm": 0.2798418402671814, "learning_rate": 1e-05, "loss": 1.0516, "step": 35680 }, { "epoch": 31.607617360496015, "grad_norm": 0.27569082379341125, "learning_rate": 1e-05, "loss": 0.9862, "step": 35685 }, { "epoch": 31.612046058458812, "grad_norm": 0.2389264851808548, "learning_rate": 1e-05, "loss": 0.9594, "step": 35690 }, { "epoch": 31.616474756421614, "grad_norm": 0.2625390291213989, "learning_rate": 1e-05, "loss": 1.0054, "step": 35695 }, { "epoch": 31.62090345438441, "grad_norm": 0.2644560635089874, "learning_rate": 1e-05, "loss": 0.9825, "step": 35700 }, { "epoch": 31.62533215234721, "grad_norm": 0.24798107147216797, "learning_rate": 1e-05, "loss": 0.9174, "step": 35705 }, { "epoch": 31.62976085031001, "grad_norm": 0.2319185584783554, "learning_rate": 1e-05, "loss": 0.9963, "step": 35710 }, { "epoch": 31.634189548272808, "grad_norm": 0.2435673177242279, "learning_rate": 1e-05, "loss": 0.9786, "step": 35715 }, { "epoch": 31.638618246235605, "grad_norm": 0.24127261340618134, "learning_rate": 1e-05, "loss": 1.0079, "step": 35720 }, { "epoch": 31.643046944198407, "grad_norm": 0.2230539619922638, "learning_rate": 1e-05, "loss": 0.9345, "step": 35725 }, { "epoch": 31.647475642161204, "grad_norm": 0.2332291603088379, "learning_rate": 1e-05, "loss": 0.9709, "step": 35730 }, { "epoch": 31.651904340124002, "grad_norm": 0.2614711821079254, "learning_rate": 1e-05, "loss": 0.9621, "step": 35735 }, { "epoch": 31.656333038086803, "grad_norm": 0.23311543464660645, "learning_rate": 1e-05, "loss": 1.0003, "step": 35740 }, { "epoch": 31.6607617360496, "grad_norm": 0.3181777596473694, "learning_rate": 1e-05, "loss": 0.9753, "step": 35745 }, { "epoch": 31.665190434012402, "grad_norm": 0.2673760652542114, "learning_rate": 1e-05, "loss": 0.9652, "step": 35750 }, { "epoch": 31.6696191319752, "grad_norm": 0.2845006287097931, "learning_rate": 1e-05, "loss": 0.9673, "step": 35755 }, { "epoch": 31.674047829937997, "grad_norm": 0.2288849800825119, "learning_rate": 1e-05, "loss": 0.9784, "step": 35760 }, { "epoch": 31.6784765279008, "grad_norm": 0.2399853765964508, "learning_rate": 1e-05, "loss": 0.9426, "step": 35765 }, { "epoch": 31.682905225863596, "grad_norm": 0.27550503611564636, "learning_rate": 1e-05, "loss": 0.9214, "step": 35770 }, { "epoch": 31.687333923826394, "grad_norm": 0.2351623773574829, "learning_rate": 1e-05, "loss": 0.9525, "step": 35775 }, { "epoch": 31.691762621789195, "grad_norm": 0.3011205494403839, "learning_rate": 1e-05, "loss": 0.9949, "step": 35780 }, { "epoch": 31.696191319751993, "grad_norm": 0.24457640945911407, "learning_rate": 1e-05, "loss": 0.9901, "step": 35785 }, { "epoch": 31.70062001771479, "grad_norm": 0.2897714078426361, "learning_rate": 1e-05, "loss": 0.9968, "step": 35790 }, { "epoch": 31.70504871567759, "grad_norm": 0.27010974287986755, "learning_rate": 1e-05, "loss": 0.931, "step": 35795 }, { "epoch": 31.70947741364039, "grad_norm": 0.24359668791294098, "learning_rate": 1e-05, "loss": 0.9991, "step": 35800 }, { "epoch": 31.713906111603187, "grad_norm": 0.2919084131717682, "learning_rate": 1e-05, "loss": 0.9398, "step": 35805 }, { "epoch": 31.718334809565988, "grad_norm": 0.2341143786907196, "learning_rate": 1e-05, "loss": 1.021, "step": 35810 }, { "epoch": 31.722763507528786, "grad_norm": 0.24202905595302582, "learning_rate": 1e-05, "loss": 0.9743, "step": 35815 }, { "epoch": 31.727192205491587, "grad_norm": 0.23509721457958221, "learning_rate": 1e-05, "loss": 0.9101, "step": 35820 }, { "epoch": 31.731620903454385, "grad_norm": 0.2174912691116333, "learning_rate": 1e-05, "loss": 0.9461, "step": 35825 }, { "epoch": 31.736049601417182, "grad_norm": 0.24426499009132385, "learning_rate": 1e-05, "loss": 0.9916, "step": 35830 }, { "epoch": 31.740478299379983, "grad_norm": 0.238041490316391, "learning_rate": 1e-05, "loss": 1.0076, "step": 35835 }, { "epoch": 31.74490699734278, "grad_norm": 0.26409682631492615, "learning_rate": 1e-05, "loss": 0.9643, "step": 35840 }, { "epoch": 31.74933569530558, "grad_norm": 0.23438508808612823, "learning_rate": 1e-05, "loss": 0.968, "step": 35845 }, { "epoch": 31.75376439326838, "grad_norm": 0.24072594940662384, "learning_rate": 1e-05, "loss": 0.9397, "step": 35850 }, { "epoch": 31.758193091231178, "grad_norm": 0.2742479145526886, "learning_rate": 1e-05, "loss": 1.0086, "step": 35855 }, { "epoch": 31.762621789193975, "grad_norm": 0.24791787564754486, "learning_rate": 1e-05, "loss": 0.9306, "step": 35860 }, { "epoch": 31.767050487156776, "grad_norm": 0.24108822643756866, "learning_rate": 1e-05, "loss": 0.9543, "step": 35865 }, { "epoch": 31.771479185119574, "grad_norm": 0.2175181359052658, "learning_rate": 1e-05, "loss": 0.9421, "step": 35870 }, { "epoch": 31.775907883082375, "grad_norm": 0.2114318460226059, "learning_rate": 1e-05, "loss": 0.9626, "step": 35875 }, { "epoch": 31.780336581045173, "grad_norm": 0.2672019600868225, "learning_rate": 1e-05, "loss": 0.9807, "step": 35880 }, { "epoch": 31.78476527900797, "grad_norm": 0.3060592710971832, "learning_rate": 1e-05, "loss": 0.9342, "step": 35885 }, { "epoch": 31.789193976970772, "grad_norm": 0.22689026594161987, "learning_rate": 1e-05, "loss": 0.9884, "step": 35890 }, { "epoch": 31.79362267493357, "grad_norm": 0.23403868079185486, "learning_rate": 1e-05, "loss": 0.9404, "step": 35895 }, { "epoch": 31.798051372896367, "grad_norm": 0.2175912857055664, "learning_rate": 1e-05, "loss": 0.9571, "step": 35900 }, { "epoch": 31.80248007085917, "grad_norm": 0.21413464844226837, "learning_rate": 1e-05, "loss": 0.9779, "step": 35905 }, { "epoch": 31.806908768821966, "grad_norm": 0.27171841263771057, "learning_rate": 1e-05, "loss": 0.9531, "step": 35910 }, { "epoch": 31.811337466784764, "grad_norm": 0.24016597867012024, "learning_rate": 1e-05, "loss": 1.0174, "step": 35915 }, { "epoch": 31.815766164747565, "grad_norm": 0.24857477843761444, "learning_rate": 1e-05, "loss": 0.9694, "step": 35920 }, { "epoch": 31.820194862710363, "grad_norm": 0.22688615322113037, "learning_rate": 1e-05, "loss": 0.9662, "step": 35925 }, { "epoch": 31.824623560673164, "grad_norm": 0.2265274077653885, "learning_rate": 1e-05, "loss": 0.9139, "step": 35930 }, { "epoch": 31.82905225863596, "grad_norm": 0.2208435982465744, "learning_rate": 1e-05, "loss": 1.0199, "step": 35935 }, { "epoch": 31.83348095659876, "grad_norm": 0.24390463531017303, "learning_rate": 1e-05, "loss": 0.9362, "step": 35940 }, { "epoch": 31.83790965456156, "grad_norm": 0.26262837648391724, "learning_rate": 1e-05, "loss": 0.9514, "step": 35945 }, { "epoch": 31.842338352524358, "grad_norm": 0.23161430656909943, "learning_rate": 1e-05, "loss": 0.9648, "step": 35950 }, { "epoch": 31.846767050487156, "grad_norm": 0.22293663024902344, "learning_rate": 1e-05, "loss": 0.986, "step": 35955 }, { "epoch": 31.851195748449957, "grad_norm": 0.26411643624305725, "learning_rate": 1e-05, "loss": 0.9873, "step": 35960 }, { "epoch": 31.855624446412754, "grad_norm": 0.2719457149505615, "learning_rate": 1e-05, "loss": 1.0173, "step": 35965 }, { "epoch": 31.860053144375552, "grad_norm": 0.2952626049518585, "learning_rate": 1e-05, "loss": 0.9943, "step": 35970 }, { "epoch": 31.864481842338353, "grad_norm": 0.23821762204170227, "learning_rate": 1e-05, "loss": 1.0016, "step": 35975 }, { "epoch": 31.86891054030115, "grad_norm": 0.21682235598564148, "learning_rate": 1e-05, "loss": 0.9726, "step": 35980 }, { "epoch": 31.873339238263952, "grad_norm": 0.24717587232589722, "learning_rate": 1e-05, "loss": 0.9843, "step": 35985 }, { "epoch": 31.87776793622675, "grad_norm": 0.24953578412532806, "learning_rate": 1e-05, "loss": 1.0306, "step": 35990 }, { "epoch": 31.882196634189548, "grad_norm": 0.26740214228630066, "learning_rate": 1e-05, "loss": 0.9777, "step": 35995 }, { "epoch": 31.88662533215235, "grad_norm": 0.22675389051437378, "learning_rate": 1e-05, "loss": 0.9557, "step": 36000 }, { "epoch": 31.891054030115146, "grad_norm": 0.2416391372680664, "learning_rate": 1e-05, "loss": 0.9892, "step": 36005 }, { "epoch": 31.895482728077944, "grad_norm": 0.2632206082344055, "learning_rate": 1e-05, "loss": 0.93, "step": 36010 }, { "epoch": 31.899911426040745, "grad_norm": 0.254170686006546, "learning_rate": 1e-05, "loss": 0.9705, "step": 36015 }, { "epoch": 31.904340124003543, "grad_norm": 0.27379873394966125, "learning_rate": 1e-05, "loss": 1.023, "step": 36020 }, { "epoch": 31.90876882196634, "grad_norm": 0.29201215505599976, "learning_rate": 1e-05, "loss": 1.0441, "step": 36025 }, { "epoch": 31.913197519929142, "grad_norm": 0.2925633192062378, "learning_rate": 1e-05, "loss": 0.9385, "step": 36030 }, { "epoch": 31.91762621789194, "grad_norm": 0.22624832391738892, "learning_rate": 1e-05, "loss": 0.9884, "step": 36035 }, { "epoch": 31.922054915854737, "grad_norm": 0.24902020394802094, "learning_rate": 1e-05, "loss": 0.9734, "step": 36040 }, { "epoch": 31.92648361381754, "grad_norm": 0.22631345689296722, "learning_rate": 1e-05, "loss": 0.9106, "step": 36045 }, { "epoch": 31.930912311780336, "grad_norm": 0.2157122641801834, "learning_rate": 1e-05, "loss": 0.9706, "step": 36050 }, { "epoch": 31.935341009743137, "grad_norm": 0.2781873643398285, "learning_rate": 1e-05, "loss": 0.9685, "step": 36055 }, { "epoch": 31.939769707705935, "grad_norm": 0.22202855348587036, "learning_rate": 1e-05, "loss": 0.9855, "step": 36060 }, { "epoch": 31.944198405668732, "grad_norm": 0.26727306842803955, "learning_rate": 1e-05, "loss": 1.0339, "step": 36065 }, { "epoch": 31.948627103631534, "grad_norm": 0.23619753122329712, "learning_rate": 1e-05, "loss": 0.9634, "step": 36070 }, { "epoch": 31.95305580159433, "grad_norm": 0.24048994481563568, "learning_rate": 1e-05, "loss": 1.0084, "step": 36075 }, { "epoch": 31.95748449955713, "grad_norm": 0.30554598569869995, "learning_rate": 1e-05, "loss": 1.0352, "step": 36080 }, { "epoch": 31.96191319751993, "grad_norm": 0.23777183890342712, "learning_rate": 1e-05, "loss": 0.969, "step": 36085 }, { "epoch": 31.966341895482728, "grad_norm": 0.23298799991607666, "learning_rate": 1e-05, "loss": 1.0016, "step": 36090 }, { "epoch": 31.970770593445526, "grad_norm": 0.22514529526233673, "learning_rate": 1e-05, "loss": 0.981, "step": 36095 }, { "epoch": 31.975199291408327, "grad_norm": 0.22329914569854736, "learning_rate": 1e-05, "loss": 0.9918, "step": 36100 }, { "epoch": 31.979627989371124, "grad_norm": 0.23244720697402954, "learning_rate": 1e-05, "loss": 0.9766, "step": 36105 }, { "epoch": 31.984056687333926, "grad_norm": 0.2390136420726776, "learning_rate": 1e-05, "loss": 0.9874, "step": 36110 }, { "epoch": 31.988485385296723, "grad_norm": 0.24254535138607025, "learning_rate": 1e-05, "loss": 1.0145, "step": 36115 }, { "epoch": 31.99291408325952, "grad_norm": 0.21646682918071747, "learning_rate": 1e-05, "loss": 1.0331, "step": 36120 }, { "epoch": 31.997342781222322, "grad_norm": 0.24873514473438263, "learning_rate": 1e-05, "loss": 1.0087, "step": 36125 }, { "epoch": 32.001771479185116, "grad_norm": 0.270109087228775, "learning_rate": 1e-05, "loss": 0.9349, "step": 36130 }, { "epoch": 32.00620017714792, "grad_norm": 0.23483222723007202, "learning_rate": 1e-05, "loss": 0.9766, "step": 36135 }, { "epoch": 32.01062887511072, "grad_norm": 0.21630969643592834, "learning_rate": 1e-05, "loss": 0.9936, "step": 36140 }, { "epoch": 32.01505757307352, "grad_norm": 0.22074012458324432, "learning_rate": 1e-05, "loss": 0.9544, "step": 36145 }, { "epoch": 32.019486271036314, "grad_norm": 0.19626078009605408, "learning_rate": 1e-05, "loss": 0.9948, "step": 36150 }, { "epoch": 32.023914968999115, "grad_norm": 0.25371024012565613, "learning_rate": 1e-05, "loss": 1.0171, "step": 36155 }, { "epoch": 32.028343666961916, "grad_norm": 0.3242085874080658, "learning_rate": 1e-05, "loss": 1.0102, "step": 36160 }, { "epoch": 32.03277236492471, "grad_norm": 0.3156336545944214, "learning_rate": 1e-05, "loss": 0.946, "step": 36165 }, { "epoch": 32.03720106288751, "grad_norm": 0.24700365960597992, "learning_rate": 1e-05, "loss": 0.9152, "step": 36170 }, { "epoch": 32.04162976085031, "grad_norm": 0.21226954460144043, "learning_rate": 1e-05, "loss": 0.9471, "step": 36175 }, { "epoch": 32.04605845881311, "grad_norm": 0.24382300674915314, "learning_rate": 1e-05, "loss": 0.9755, "step": 36180 }, { "epoch": 32.05048715677591, "grad_norm": 0.23433832824230194, "learning_rate": 1e-05, "loss": 0.9582, "step": 36185 }, { "epoch": 32.05491585473871, "grad_norm": 0.22456440329551697, "learning_rate": 1e-05, "loss": 1.0184, "step": 36190 }, { "epoch": 32.0593445527015, "grad_norm": 0.2309112548828125, "learning_rate": 1e-05, "loss": 0.9767, "step": 36195 }, { "epoch": 32.063773250664305, "grad_norm": 0.3553697466850281, "learning_rate": 1e-05, "loss": 0.9712, "step": 36200 }, { "epoch": 32.068201948627106, "grad_norm": 0.21518997848033905, "learning_rate": 1e-05, "loss": 0.9469, "step": 36205 }, { "epoch": 32.0726306465899, "grad_norm": 0.26817819476127625, "learning_rate": 1e-05, "loss": 0.998, "step": 36210 }, { "epoch": 32.0770593445527, "grad_norm": 0.24733425676822662, "learning_rate": 1e-05, "loss": 1.0056, "step": 36215 }, { "epoch": 32.0814880425155, "grad_norm": 0.26431870460510254, "learning_rate": 1e-05, "loss": 0.9621, "step": 36220 }, { "epoch": 32.0859167404783, "grad_norm": 0.2452416718006134, "learning_rate": 1e-05, "loss": 0.9952, "step": 36225 }, { "epoch": 32.0903454384411, "grad_norm": 0.24736879765987396, "learning_rate": 1e-05, "loss": 0.9811, "step": 36230 }, { "epoch": 32.0947741364039, "grad_norm": 0.24133014678955078, "learning_rate": 1e-05, "loss": 1.0051, "step": 36235 }, { "epoch": 32.09920283436669, "grad_norm": 0.24304157495498657, "learning_rate": 1e-05, "loss": 0.9651, "step": 36240 }, { "epoch": 32.103631532329494, "grad_norm": 0.227181077003479, "learning_rate": 1e-05, "loss": 0.9961, "step": 36245 }, { "epoch": 32.108060230292296, "grad_norm": 0.20828892290592194, "learning_rate": 1e-05, "loss": 1.054, "step": 36250 }, { "epoch": 32.11248892825509, "grad_norm": 0.21861423552036285, "learning_rate": 1e-05, "loss": 0.9743, "step": 36255 }, { "epoch": 32.11691762621789, "grad_norm": 0.2248683124780655, "learning_rate": 1e-05, "loss": 1.0217, "step": 36260 }, { "epoch": 32.12134632418069, "grad_norm": 0.21037541329860687, "learning_rate": 1e-05, "loss": 0.9381, "step": 36265 }, { "epoch": 32.12577502214349, "grad_norm": 0.26288631558418274, "learning_rate": 1e-05, "loss": 1.0111, "step": 36270 }, { "epoch": 32.13020372010629, "grad_norm": 0.24942642450332642, "learning_rate": 1e-05, "loss": 1.0017, "step": 36275 }, { "epoch": 32.13463241806909, "grad_norm": 0.26169219613075256, "learning_rate": 1e-05, "loss": 1.0115, "step": 36280 }, { "epoch": 32.13906111603189, "grad_norm": 0.2686249613761902, "learning_rate": 1e-05, "loss": 0.9753, "step": 36285 }, { "epoch": 32.143489813994684, "grad_norm": 0.28162044286727905, "learning_rate": 1e-05, "loss": 0.9934, "step": 36290 }, { "epoch": 32.147918511957485, "grad_norm": 0.28954973816871643, "learning_rate": 1e-05, "loss": 0.9706, "step": 36295 }, { "epoch": 32.152347209920286, "grad_norm": 0.25665611028671265, "learning_rate": 1e-05, "loss": 1.0072, "step": 36300 }, { "epoch": 32.15677590788308, "grad_norm": 0.31064364314079285, "learning_rate": 1e-05, "loss": 0.9757, "step": 36305 }, { "epoch": 32.16120460584588, "grad_norm": 0.2692579925060272, "learning_rate": 1e-05, "loss": 0.9647, "step": 36310 }, { "epoch": 32.16563330380868, "grad_norm": 0.22714948654174805, "learning_rate": 1e-05, "loss": 0.9753, "step": 36315 }, { "epoch": 32.17006200177148, "grad_norm": 0.3256995975971222, "learning_rate": 1e-05, "loss": 0.9621, "step": 36320 }, { "epoch": 32.17449069973428, "grad_norm": 0.24564258754253387, "learning_rate": 1e-05, "loss": 0.9735, "step": 36325 }, { "epoch": 32.17891939769708, "grad_norm": 0.23182697594165802, "learning_rate": 1e-05, "loss": 0.9614, "step": 36330 }, { "epoch": 32.18334809565987, "grad_norm": 0.2725316286087036, "learning_rate": 1e-05, "loss": 0.9609, "step": 36335 }, { "epoch": 32.187776793622675, "grad_norm": 0.23019357025623322, "learning_rate": 1e-05, "loss": 0.9239, "step": 36340 }, { "epoch": 32.192205491585476, "grad_norm": 0.2802714705467224, "learning_rate": 1e-05, "loss": 1.0164, "step": 36345 }, { "epoch": 32.19663418954827, "grad_norm": 0.238783061504364, "learning_rate": 1e-05, "loss": 0.9681, "step": 36350 }, { "epoch": 32.20106288751107, "grad_norm": 0.23554763197898865, "learning_rate": 1e-05, "loss": 1.006, "step": 36355 }, { "epoch": 32.20549158547387, "grad_norm": 0.23464348912239075, "learning_rate": 1e-05, "loss": 0.9647, "step": 36360 }, { "epoch": 32.20992028343667, "grad_norm": 0.2215508371591568, "learning_rate": 1e-05, "loss": 0.9862, "step": 36365 }, { "epoch": 32.21434898139947, "grad_norm": 0.23354178667068481, "learning_rate": 1e-05, "loss": 0.9922, "step": 36370 }, { "epoch": 32.21877767936227, "grad_norm": 0.2414330393075943, "learning_rate": 1e-05, "loss": 0.9817, "step": 36375 }, { "epoch": 32.22320637732506, "grad_norm": 0.2330123782157898, "learning_rate": 1e-05, "loss": 0.9739, "step": 36380 }, { "epoch": 32.227635075287864, "grad_norm": 0.23892280459403992, "learning_rate": 1e-05, "loss": 0.9865, "step": 36385 }, { "epoch": 32.232063773250665, "grad_norm": 0.2523578405380249, "learning_rate": 1e-05, "loss": 0.9908, "step": 36390 }, { "epoch": 32.23649247121347, "grad_norm": 0.23887327313423157, "learning_rate": 1e-05, "loss": 1.0389, "step": 36395 }, { "epoch": 32.24092116917626, "grad_norm": 0.31854650378227234, "learning_rate": 1e-05, "loss": 0.9785, "step": 36400 }, { "epoch": 32.24534986713906, "grad_norm": 0.24610836803913116, "learning_rate": 1e-05, "loss": 1.0341, "step": 36405 }, { "epoch": 32.24977856510186, "grad_norm": 0.2544173002243042, "learning_rate": 1e-05, "loss": 0.9857, "step": 36410 }, { "epoch": 32.25420726306466, "grad_norm": 0.22621837258338928, "learning_rate": 1e-05, "loss": 1.0229, "step": 36415 }, { "epoch": 32.25863596102746, "grad_norm": 0.3374495208263397, "learning_rate": 1e-05, "loss": 1.0304, "step": 36420 }, { "epoch": 32.26306465899026, "grad_norm": 0.25838255882263184, "learning_rate": 1e-05, "loss": 0.9798, "step": 36425 }, { "epoch": 32.267493356953054, "grad_norm": 0.2937672436237335, "learning_rate": 1e-05, "loss": 0.9891, "step": 36430 }, { "epoch": 32.271922054915855, "grad_norm": 0.27002236247062683, "learning_rate": 1e-05, "loss": 1.0426, "step": 36435 }, { "epoch": 32.276350752878656, "grad_norm": 0.3041854500770569, "learning_rate": 1e-05, "loss": 0.9802, "step": 36440 }, { "epoch": 32.28077945084145, "grad_norm": 0.24303793907165527, "learning_rate": 1e-05, "loss": 0.9666, "step": 36445 }, { "epoch": 32.28520814880425, "grad_norm": 0.2140151411294937, "learning_rate": 1e-05, "loss": 1.024, "step": 36450 }, { "epoch": 32.28963684676705, "grad_norm": 0.22937098145484924, "learning_rate": 1e-05, "loss": 0.9337, "step": 36455 }, { "epoch": 32.29406554472985, "grad_norm": 0.23301666975021362, "learning_rate": 1e-05, "loss": 1.0324, "step": 36460 }, { "epoch": 32.29849424269265, "grad_norm": 0.2180456519126892, "learning_rate": 1e-05, "loss": 1.0158, "step": 36465 }, { "epoch": 32.30292294065545, "grad_norm": 0.26369139552116394, "learning_rate": 1e-05, "loss": 0.9904, "step": 36470 }, { "epoch": 32.30735163861824, "grad_norm": 0.22635501623153687, "learning_rate": 1e-05, "loss": 0.9674, "step": 36475 }, { "epoch": 32.311780336581045, "grad_norm": 0.21336348354816437, "learning_rate": 1e-05, "loss": 0.9389, "step": 36480 }, { "epoch": 32.316209034543846, "grad_norm": 0.24370889365673065, "learning_rate": 1e-05, "loss": 1.0561, "step": 36485 }, { "epoch": 32.32063773250664, "grad_norm": 0.20133469998836517, "learning_rate": 1e-05, "loss": 0.9907, "step": 36490 }, { "epoch": 32.32506643046944, "grad_norm": 0.20760925114154816, "learning_rate": 1e-05, "loss": 1.0519, "step": 36495 }, { "epoch": 32.32949512843224, "grad_norm": 0.2572067677974701, "learning_rate": 1e-05, "loss": 0.9603, "step": 36500 }, { "epoch": 32.333923826395036, "grad_norm": 0.2773112654685974, "learning_rate": 1e-05, "loss": 0.9707, "step": 36505 }, { "epoch": 32.33835252435784, "grad_norm": 0.2422502040863037, "learning_rate": 1e-05, "loss": 1.0127, "step": 36510 }, { "epoch": 32.34278122232064, "grad_norm": 0.22528409957885742, "learning_rate": 1e-05, "loss": 0.9852, "step": 36515 }, { "epoch": 32.34720992028344, "grad_norm": 0.2012006640434265, "learning_rate": 1e-05, "loss": 1.0331, "step": 36520 }, { "epoch": 32.351638618246234, "grad_norm": 0.26770204305648804, "learning_rate": 1e-05, "loss": 1.0157, "step": 36525 }, { "epoch": 32.356067316209035, "grad_norm": 0.31134894490242004, "learning_rate": 1e-05, "loss": 0.9116, "step": 36530 }, { "epoch": 32.36049601417184, "grad_norm": 0.27741578221321106, "learning_rate": 1e-05, "loss": 0.9502, "step": 36535 }, { "epoch": 32.36492471213463, "grad_norm": 0.2827765941619873, "learning_rate": 1e-05, "loss": 0.981, "step": 36540 }, { "epoch": 32.36935341009743, "grad_norm": 0.2371225357055664, "learning_rate": 1e-05, "loss": 0.9923, "step": 36545 }, { "epoch": 32.37378210806023, "grad_norm": 0.24451832473278046, "learning_rate": 1e-05, "loss": 1.0431, "step": 36550 }, { "epoch": 32.37821080602303, "grad_norm": 0.2703625559806824, "learning_rate": 1e-05, "loss": 0.9389, "step": 36555 }, { "epoch": 32.38263950398583, "grad_norm": 0.27809298038482666, "learning_rate": 1e-05, "loss": 1.0003, "step": 36560 }, { "epoch": 32.38706820194863, "grad_norm": 0.24478842318058014, "learning_rate": 1e-05, "loss": 1.0039, "step": 36565 }, { "epoch": 32.391496899911424, "grad_norm": 0.23801636695861816, "learning_rate": 1e-05, "loss": 1.0367, "step": 36570 }, { "epoch": 32.395925597874225, "grad_norm": 0.2981101870536804, "learning_rate": 1e-05, "loss": 0.9681, "step": 36575 }, { "epoch": 32.400354295837026, "grad_norm": 0.2488527148962021, "learning_rate": 1e-05, "loss": 0.9951, "step": 36580 }, { "epoch": 32.40478299379982, "grad_norm": 0.26797574758529663, "learning_rate": 1e-05, "loss": 0.9576, "step": 36585 }, { "epoch": 32.40921169176262, "grad_norm": 0.25990450382232666, "learning_rate": 1e-05, "loss": 1.0207, "step": 36590 }, { "epoch": 32.41364038972542, "grad_norm": 0.2383684664964676, "learning_rate": 1e-05, "loss": 0.9365, "step": 36595 }, { "epoch": 32.41806908768822, "grad_norm": 0.24780191481113434, "learning_rate": 1e-05, "loss": 1.0032, "step": 36600 }, { "epoch": 32.42249778565102, "grad_norm": 0.21007679402828217, "learning_rate": 1e-05, "loss": 0.9775, "step": 36605 }, { "epoch": 32.42692648361382, "grad_norm": 0.22435930371284485, "learning_rate": 1e-05, "loss": 0.9485, "step": 36610 }, { "epoch": 32.43135518157661, "grad_norm": 0.2192358672618866, "learning_rate": 1e-05, "loss": 0.9784, "step": 36615 }, { "epoch": 32.435783879539414, "grad_norm": 0.23997455835342407, "learning_rate": 1e-05, "loss": 1.0358, "step": 36620 }, { "epoch": 32.440212577502216, "grad_norm": 0.2107658088207245, "learning_rate": 1e-05, "loss": 0.9537, "step": 36625 }, { "epoch": 32.44464127546502, "grad_norm": 0.19654399156570435, "learning_rate": 1e-05, "loss": 0.9491, "step": 36630 }, { "epoch": 32.44906997342781, "grad_norm": 0.2635065019130707, "learning_rate": 1e-05, "loss": 1.0292, "step": 36635 }, { "epoch": 32.45349867139061, "grad_norm": 0.19647496938705444, "learning_rate": 1e-05, "loss": 0.935, "step": 36640 }, { "epoch": 32.45792736935341, "grad_norm": 0.2359771728515625, "learning_rate": 1e-05, "loss": 0.9737, "step": 36645 }, { "epoch": 32.46235606731621, "grad_norm": 0.23199866712093353, "learning_rate": 1e-05, "loss": 1.002, "step": 36650 }, { "epoch": 32.46678476527901, "grad_norm": 0.24042458832263947, "learning_rate": 1e-05, "loss": 0.956, "step": 36655 }, { "epoch": 32.47121346324181, "grad_norm": 0.20143255591392517, "learning_rate": 1e-05, "loss": 0.9794, "step": 36660 }, { "epoch": 32.475642161204604, "grad_norm": 0.22234395146369934, "learning_rate": 1e-05, "loss": 0.9476, "step": 36665 }, { "epoch": 32.480070859167405, "grad_norm": 0.25506138801574707, "learning_rate": 1e-05, "loss": 1.0041, "step": 36670 }, { "epoch": 32.484499557130206, "grad_norm": 0.25806450843811035, "learning_rate": 1e-05, "loss": 0.9998, "step": 36675 }, { "epoch": 32.488928255093, "grad_norm": 0.26445791125297546, "learning_rate": 1e-05, "loss": 0.9772, "step": 36680 }, { "epoch": 32.4933569530558, "grad_norm": 0.21333833038806915, "learning_rate": 1e-05, "loss": 0.9379, "step": 36685 }, { "epoch": 32.4977856510186, "grad_norm": 0.23295442759990692, "learning_rate": 1e-05, "loss": 0.9826, "step": 36690 }, { "epoch": 32.5022143489814, "grad_norm": 0.2343805730342865, "learning_rate": 1e-05, "loss": 0.9705, "step": 36695 }, { "epoch": 32.5066430469442, "grad_norm": 0.24746939539909363, "learning_rate": 1e-05, "loss": 1.0295, "step": 36700 }, { "epoch": 32.511071744907, "grad_norm": 0.24456191062927246, "learning_rate": 1e-05, "loss": 0.9372, "step": 36705 }, { "epoch": 32.515500442869794, "grad_norm": 0.2585187256336212, "learning_rate": 1e-05, "loss": 0.9926, "step": 36710 }, { "epoch": 32.519929140832595, "grad_norm": 0.23885998129844666, "learning_rate": 1e-05, "loss": 1.0122, "step": 36715 }, { "epoch": 32.524357838795396, "grad_norm": 0.27841898798942566, "learning_rate": 1e-05, "loss": 1.0125, "step": 36720 }, { "epoch": 32.52878653675819, "grad_norm": 0.2269294261932373, "learning_rate": 1e-05, "loss": 0.9544, "step": 36725 }, { "epoch": 32.53321523472099, "grad_norm": 0.2419809103012085, "learning_rate": 1e-05, "loss": 1.0099, "step": 36730 }, { "epoch": 32.53764393268379, "grad_norm": 0.2230791598558426, "learning_rate": 1e-05, "loss": 0.9609, "step": 36735 }, { "epoch": 32.54207263064659, "grad_norm": 0.25819098949432373, "learning_rate": 1e-05, "loss": 1.012, "step": 36740 }, { "epoch": 32.54650132860939, "grad_norm": 0.2441270500421524, "learning_rate": 1e-05, "loss": 0.9839, "step": 36745 }, { "epoch": 32.55093002657219, "grad_norm": 0.27701112627983093, "learning_rate": 1e-05, "loss": 1.0354, "step": 36750 }, { "epoch": 32.55535872453498, "grad_norm": 0.24796810746192932, "learning_rate": 1e-05, "loss": 1.0251, "step": 36755 }, { "epoch": 32.559787422497784, "grad_norm": 0.2414970099925995, "learning_rate": 1e-05, "loss": 0.9907, "step": 36760 }, { "epoch": 32.564216120460586, "grad_norm": 0.2906976640224457, "learning_rate": 1e-05, "loss": 1.0004, "step": 36765 }, { "epoch": 32.56864481842339, "grad_norm": 0.2600421905517578, "learning_rate": 1e-05, "loss": 0.9753, "step": 36770 }, { "epoch": 32.57307351638618, "grad_norm": 0.31107842922210693, "learning_rate": 1e-05, "loss": 0.9753, "step": 36775 }, { "epoch": 32.57750221434898, "grad_norm": 0.2882780134677887, "learning_rate": 1e-05, "loss": 0.9277, "step": 36780 }, { "epoch": 32.58193091231178, "grad_norm": 0.27277156710624695, "learning_rate": 1e-05, "loss": 0.9498, "step": 36785 }, { "epoch": 32.58635961027458, "grad_norm": 0.26888182759284973, "learning_rate": 1e-05, "loss": 0.9992, "step": 36790 }, { "epoch": 32.59078830823738, "grad_norm": 0.2638552486896515, "learning_rate": 1e-05, "loss": 0.9632, "step": 36795 }, { "epoch": 32.59521700620018, "grad_norm": 0.2250799536705017, "learning_rate": 1e-05, "loss": 0.9523, "step": 36800 }, { "epoch": 32.599645704162974, "grad_norm": 0.2779911160469055, "learning_rate": 1e-05, "loss": 0.9903, "step": 36805 }, { "epoch": 32.604074402125775, "grad_norm": 0.2575404644012451, "learning_rate": 1e-05, "loss": 0.9964, "step": 36810 }, { "epoch": 32.608503100088576, "grad_norm": 0.23828734457492828, "learning_rate": 1e-05, "loss": 0.9852, "step": 36815 }, { "epoch": 32.61293179805137, "grad_norm": 0.22199828922748566, "learning_rate": 1e-05, "loss": 1.0, "step": 36820 }, { "epoch": 32.61736049601417, "grad_norm": 0.2699551582336426, "learning_rate": 1e-05, "loss": 1.0032, "step": 36825 }, { "epoch": 32.62178919397697, "grad_norm": 0.24250787496566772, "learning_rate": 1e-05, "loss": 1.0178, "step": 36830 }, { "epoch": 32.62621789193977, "grad_norm": 0.22502340376377106, "learning_rate": 1e-05, "loss": 0.9624, "step": 36835 }, { "epoch": 32.63064658990257, "grad_norm": 0.22491468489170074, "learning_rate": 1e-05, "loss": 1.0339, "step": 36840 }, { "epoch": 32.63507528786537, "grad_norm": 0.21636025607585907, "learning_rate": 1e-05, "loss": 1.0084, "step": 36845 }, { "epoch": 32.63950398582816, "grad_norm": 0.20897270739078522, "learning_rate": 1e-05, "loss": 0.9928, "step": 36850 }, { "epoch": 32.643932683790965, "grad_norm": 0.28615567088127136, "learning_rate": 1e-05, "loss": 0.9942, "step": 36855 }, { "epoch": 32.648361381753766, "grad_norm": 0.224213644862175, "learning_rate": 1e-05, "loss": 0.9548, "step": 36860 }, { "epoch": 32.65279007971656, "grad_norm": 0.2577323019504547, "learning_rate": 1e-05, "loss": 0.9946, "step": 36865 }, { "epoch": 32.65721877767936, "grad_norm": 0.2592145800590515, "learning_rate": 1e-05, "loss": 1.0298, "step": 36870 }, { "epoch": 32.66164747564216, "grad_norm": 0.19868357479572296, "learning_rate": 1e-05, "loss": 0.9992, "step": 36875 }, { "epoch": 32.666076173604964, "grad_norm": 0.29028773307800293, "learning_rate": 1e-05, "loss": 0.9849, "step": 36880 }, { "epoch": 32.67050487156776, "grad_norm": 0.29770392179489136, "learning_rate": 1e-05, "loss": 1.0033, "step": 36885 }, { "epoch": 32.67493356953056, "grad_norm": 0.23048870265483856, "learning_rate": 1e-05, "loss": 0.9109, "step": 36890 }, { "epoch": 32.67936226749336, "grad_norm": 0.23651660978794098, "learning_rate": 1e-05, "loss": 0.9773, "step": 36895 }, { "epoch": 32.683790965456154, "grad_norm": 0.21591228246688843, "learning_rate": 1e-05, "loss": 0.9567, "step": 36900 }, { "epoch": 32.688219663418955, "grad_norm": 0.25752878189086914, "learning_rate": 1e-05, "loss": 0.9945, "step": 36905 }, { "epoch": 32.69264836138176, "grad_norm": 0.2440830022096634, "learning_rate": 1e-05, "loss": 0.936, "step": 36910 }, { "epoch": 32.69707705934455, "grad_norm": 0.28019312024116516, "learning_rate": 1e-05, "loss": 0.9347, "step": 36915 }, { "epoch": 32.70150575730735, "grad_norm": 0.30171093344688416, "learning_rate": 1e-05, "loss": 1.0466, "step": 36920 }, { "epoch": 32.70593445527015, "grad_norm": 0.3084123432636261, "learning_rate": 1e-05, "loss": 0.9958, "step": 36925 }, { "epoch": 32.71036315323295, "grad_norm": 0.2576330900192261, "learning_rate": 1e-05, "loss": 0.9926, "step": 36930 }, { "epoch": 32.71479185119575, "grad_norm": 0.22833141684532166, "learning_rate": 1e-05, "loss": 0.987, "step": 36935 }, { "epoch": 32.71922054915855, "grad_norm": 0.22222119569778442, "learning_rate": 1e-05, "loss": 0.9929, "step": 36940 }, { "epoch": 32.723649247121344, "grad_norm": 0.24523021280765533, "learning_rate": 1e-05, "loss": 1.0316, "step": 36945 }, { "epoch": 32.728077945084145, "grad_norm": 0.2447596937417984, "learning_rate": 1e-05, "loss": 1.0002, "step": 36950 }, { "epoch": 32.732506643046946, "grad_norm": 0.28538066148757935, "learning_rate": 1e-05, "loss": 0.9933, "step": 36955 }, { "epoch": 32.73693534100974, "grad_norm": 0.23064063489437103, "learning_rate": 1e-05, "loss": 0.9668, "step": 36960 }, { "epoch": 32.74136403897254, "grad_norm": 0.24713575839996338, "learning_rate": 1e-05, "loss": 0.9896, "step": 36965 }, { "epoch": 32.74579273693534, "grad_norm": 0.27793022990226746, "learning_rate": 1e-05, "loss": 0.9752, "step": 36970 }, { "epoch": 32.75022143489814, "grad_norm": 0.28085562586784363, "learning_rate": 1e-05, "loss": 1.002, "step": 36975 }, { "epoch": 32.75465013286094, "grad_norm": 0.19376224279403687, "learning_rate": 1e-05, "loss": 0.9318, "step": 36980 }, { "epoch": 32.75907883082374, "grad_norm": 0.25680866837501526, "learning_rate": 1e-05, "loss": 0.9826, "step": 36985 }, { "epoch": 32.76350752878653, "grad_norm": 0.2485509216785431, "learning_rate": 1e-05, "loss": 0.9858, "step": 36990 }, { "epoch": 32.767936226749335, "grad_norm": 0.23558840155601501, "learning_rate": 1e-05, "loss": 0.9973, "step": 36995 }, { "epoch": 32.772364924712136, "grad_norm": 0.2355247437953949, "learning_rate": 1e-05, "loss": 1.0077, "step": 37000 }, { "epoch": 32.77679362267494, "grad_norm": 0.24214035272598267, "learning_rate": 1e-05, "loss": 0.955, "step": 37005 }, { "epoch": 32.78122232063773, "grad_norm": 0.23325000703334808, "learning_rate": 1e-05, "loss": 0.9447, "step": 37010 }, { "epoch": 32.78565101860053, "grad_norm": 0.2599441111087799, "learning_rate": 1e-05, "loss": 0.9268, "step": 37015 }, { "epoch": 32.79007971656333, "grad_norm": 0.26771649718284607, "learning_rate": 1e-05, "loss": 1.0063, "step": 37020 }, { "epoch": 32.79450841452613, "grad_norm": 0.24848684668540955, "learning_rate": 1e-05, "loss": 0.968, "step": 37025 }, { "epoch": 32.79893711248893, "grad_norm": 0.2554144561290741, "learning_rate": 1e-05, "loss": 0.9498, "step": 37030 }, { "epoch": 32.80336581045173, "grad_norm": 0.3027076721191406, "learning_rate": 1e-05, "loss": 0.9812, "step": 37035 }, { "epoch": 32.807794508414524, "grad_norm": 0.24313239753246307, "learning_rate": 1e-05, "loss": 0.9715, "step": 37040 }, { "epoch": 32.812223206377325, "grad_norm": 0.3010101914405823, "learning_rate": 1e-05, "loss": 0.9729, "step": 37045 }, { "epoch": 32.81665190434013, "grad_norm": 0.24170802533626556, "learning_rate": 1e-05, "loss": 0.9592, "step": 37050 }, { "epoch": 32.82108060230292, "grad_norm": 0.2526654899120331, "learning_rate": 1e-05, "loss": 0.9622, "step": 37055 }, { "epoch": 32.82550930026572, "grad_norm": 0.2299080342054367, "learning_rate": 1e-05, "loss": 1.0322, "step": 37060 }, { "epoch": 32.82993799822852, "grad_norm": 0.35731419920921326, "learning_rate": 1e-05, "loss": 0.9494, "step": 37065 }, { "epoch": 32.83436669619132, "grad_norm": 0.2991877794265747, "learning_rate": 1e-05, "loss": 0.9654, "step": 37070 }, { "epoch": 32.83879539415412, "grad_norm": 0.2344764918088913, "learning_rate": 1e-05, "loss": 1.0598, "step": 37075 }, { "epoch": 32.84322409211692, "grad_norm": 0.2575565576553345, "learning_rate": 1e-05, "loss": 1.0306, "step": 37080 }, { "epoch": 32.847652790079714, "grad_norm": 0.26056885719299316, "learning_rate": 1e-05, "loss": 1.019, "step": 37085 }, { "epoch": 32.852081488042515, "grad_norm": 0.26875463128089905, "learning_rate": 1e-05, "loss": 0.959, "step": 37090 }, { "epoch": 32.856510186005316, "grad_norm": 0.2692602872848511, "learning_rate": 1e-05, "loss": 0.9893, "step": 37095 }, { "epoch": 32.86093888396811, "grad_norm": 0.27811992168426514, "learning_rate": 1e-05, "loss": 1.0179, "step": 37100 }, { "epoch": 32.86536758193091, "grad_norm": 0.22851362824440002, "learning_rate": 1e-05, "loss": 0.9829, "step": 37105 }, { "epoch": 32.86979627989371, "grad_norm": 0.2500940263271332, "learning_rate": 1e-05, "loss": 0.9866, "step": 37110 }, { "epoch": 32.87422497785651, "grad_norm": 0.22791844606399536, "learning_rate": 1e-05, "loss": 0.9569, "step": 37115 }, { "epoch": 32.87865367581931, "grad_norm": 0.22995440661907196, "learning_rate": 1e-05, "loss": 1.0145, "step": 37120 }, { "epoch": 32.88308237378211, "grad_norm": 0.27069565653800964, "learning_rate": 1e-05, "loss": 0.9749, "step": 37125 }, { "epoch": 32.88751107174491, "grad_norm": 0.24803012609481812, "learning_rate": 1e-05, "loss": 0.9935, "step": 37130 }, { "epoch": 32.891939769707704, "grad_norm": 0.2297472208738327, "learning_rate": 1e-05, "loss": 1.0206, "step": 37135 }, { "epoch": 32.896368467670506, "grad_norm": 0.2674930691719055, "learning_rate": 1e-05, "loss": 1.0097, "step": 37140 }, { "epoch": 32.90079716563331, "grad_norm": 0.25241905450820923, "learning_rate": 1e-05, "loss": 0.9698, "step": 37145 }, { "epoch": 32.9052258635961, "grad_norm": 0.24673676490783691, "learning_rate": 1e-05, "loss": 0.9331, "step": 37150 }, { "epoch": 32.9096545615589, "grad_norm": 0.2551254630088806, "learning_rate": 1e-05, "loss": 0.9774, "step": 37155 }, { "epoch": 32.9140832595217, "grad_norm": 0.23645739257335663, "learning_rate": 1e-05, "loss": 1.0155, "step": 37160 }, { "epoch": 32.9185119574845, "grad_norm": 0.3785167634487152, "learning_rate": 1e-05, "loss": 0.9619, "step": 37165 }, { "epoch": 32.9229406554473, "grad_norm": 0.2699519693851471, "learning_rate": 1e-05, "loss": 1.06, "step": 37170 }, { "epoch": 32.9273693534101, "grad_norm": 0.2335544228553772, "learning_rate": 1e-05, "loss": 0.9837, "step": 37175 }, { "epoch": 32.931798051372894, "grad_norm": 0.24266362190246582, "learning_rate": 1e-05, "loss": 1.0049, "step": 37180 }, { "epoch": 32.936226749335695, "grad_norm": 0.22529281675815582, "learning_rate": 1e-05, "loss": 0.9477, "step": 37185 }, { "epoch": 32.9406554472985, "grad_norm": 0.2407943457365036, "learning_rate": 1e-05, "loss": 0.9761, "step": 37190 }, { "epoch": 32.94508414526129, "grad_norm": 0.2171785831451416, "learning_rate": 1e-05, "loss": 0.9635, "step": 37195 }, { "epoch": 32.94951284322409, "grad_norm": 0.2314230352640152, "learning_rate": 1e-05, "loss": 0.9805, "step": 37200 }, { "epoch": 32.95394154118689, "grad_norm": 0.3619070053100586, "learning_rate": 1e-05, "loss": 0.9531, "step": 37205 }, { "epoch": 32.95837023914969, "grad_norm": 0.21950994431972504, "learning_rate": 1e-05, "loss": 0.9652, "step": 37210 }, { "epoch": 32.96279893711249, "grad_norm": 0.20312044024467468, "learning_rate": 1e-05, "loss": 0.9572, "step": 37215 }, { "epoch": 32.96722763507529, "grad_norm": 0.2392464131116867, "learning_rate": 1e-05, "loss": 0.9786, "step": 37220 }, { "epoch": 32.971656333038084, "grad_norm": 0.2703521251678467, "learning_rate": 1e-05, "loss": 1.0302, "step": 37225 }, { "epoch": 32.976085031000885, "grad_norm": 0.27658864855766296, "learning_rate": 1e-05, "loss": 0.971, "step": 37230 }, { "epoch": 32.980513728963686, "grad_norm": 0.2587783634662628, "learning_rate": 1e-05, "loss": 1.0053, "step": 37235 }, { "epoch": 32.98494242692648, "grad_norm": 0.2905009984970093, "learning_rate": 1e-05, "loss": 0.9471, "step": 37240 }, { "epoch": 32.98937112488928, "grad_norm": 0.2012159526348114, "learning_rate": 1e-05, "loss": 1.005, "step": 37245 }, { "epoch": 32.99379982285208, "grad_norm": 0.2734716236591339, "learning_rate": 1e-05, "loss": 1.0013, "step": 37250 }, { "epoch": 32.998228520814884, "grad_norm": 0.22511662542819977, "learning_rate": 1e-05, "loss": 0.9777, "step": 37255 }, { "epoch": 33.00265721877768, "grad_norm": 0.2462189793586731, "learning_rate": 1e-05, "loss": 0.9472, "step": 37260 }, { "epoch": 33.00708591674048, "grad_norm": 0.22202102839946747, "learning_rate": 1e-05, "loss": 1.0099, "step": 37265 }, { "epoch": 33.01151461470328, "grad_norm": 0.26523357629776, "learning_rate": 1e-05, "loss": 0.9625, "step": 37270 }, { "epoch": 33.015943312666074, "grad_norm": 0.267270565032959, "learning_rate": 1e-05, "loss": 0.9903, "step": 37275 }, { "epoch": 33.020372010628876, "grad_norm": 0.23890122771263123, "learning_rate": 1e-05, "loss": 0.9731, "step": 37280 }, { "epoch": 33.02480070859168, "grad_norm": 0.29531559348106384, "learning_rate": 1e-05, "loss": 1.0377, "step": 37285 }, { "epoch": 33.02922940655447, "grad_norm": 0.2053052932024002, "learning_rate": 1e-05, "loss": 1.0002, "step": 37290 }, { "epoch": 33.03365810451727, "grad_norm": 0.24436959624290466, "learning_rate": 1e-05, "loss": 0.9713, "step": 37295 }, { "epoch": 33.03808680248007, "grad_norm": 0.33839404582977295, "learning_rate": 1e-05, "loss": 0.9494, "step": 37300 }, { "epoch": 33.04251550044287, "grad_norm": 0.2618570923805237, "learning_rate": 1e-05, "loss": 0.9835, "step": 37305 }, { "epoch": 33.04694419840567, "grad_norm": 0.24721820652484894, "learning_rate": 1e-05, "loss": 0.9956, "step": 37310 }, { "epoch": 33.05137289636847, "grad_norm": 0.23903165757656097, "learning_rate": 1e-05, "loss": 0.9746, "step": 37315 }, { "epoch": 33.055801594331264, "grad_norm": 0.2669444978237152, "learning_rate": 1e-05, "loss": 1.0607, "step": 37320 }, { "epoch": 33.060230292294065, "grad_norm": 0.274209588766098, "learning_rate": 1e-05, "loss": 0.9571, "step": 37325 }, { "epoch": 33.064658990256866, "grad_norm": 0.19675327837467194, "learning_rate": 1e-05, "loss": 0.9836, "step": 37330 }, { "epoch": 33.06908768821966, "grad_norm": 0.26304563879966736, "learning_rate": 1e-05, "loss": 0.9676, "step": 37335 }, { "epoch": 33.07351638618246, "grad_norm": 0.25058713555336, "learning_rate": 1e-05, "loss": 0.9801, "step": 37340 }, { "epoch": 33.07794508414526, "grad_norm": 0.27034229040145874, "learning_rate": 1e-05, "loss": 0.9987, "step": 37345 }, { "epoch": 33.08237378210806, "grad_norm": 0.21662580966949463, "learning_rate": 1e-05, "loss": 1.0236, "step": 37350 }, { "epoch": 33.08680248007086, "grad_norm": 0.2343723028898239, "learning_rate": 1e-05, "loss": 0.9883, "step": 37355 }, { "epoch": 33.09123117803366, "grad_norm": 0.2491210550069809, "learning_rate": 1e-05, "loss": 1.0268, "step": 37360 }, { "epoch": 33.09565987599645, "grad_norm": 0.22822631895542145, "learning_rate": 1e-05, "loss": 0.9734, "step": 37365 }, { "epoch": 33.100088573959255, "grad_norm": 0.2211805135011673, "learning_rate": 1e-05, "loss": 0.9875, "step": 37370 }, { "epoch": 33.104517271922056, "grad_norm": 0.2651304602622986, "learning_rate": 1e-05, "loss": 0.9857, "step": 37375 }, { "epoch": 33.10894596988486, "grad_norm": 0.28861936926841736, "learning_rate": 1e-05, "loss": 0.9953, "step": 37380 }, { "epoch": 33.11337466784765, "grad_norm": 0.22887682914733887, "learning_rate": 1e-05, "loss": 1.0134, "step": 37385 }, { "epoch": 33.11780336581045, "grad_norm": 0.383199006319046, "learning_rate": 1e-05, "loss": 0.9568, "step": 37390 }, { "epoch": 33.122232063773254, "grad_norm": 0.23534031212329865, "learning_rate": 1e-05, "loss": 0.9988, "step": 37395 }, { "epoch": 33.12666076173605, "grad_norm": 0.27983418107032776, "learning_rate": 1e-05, "loss": 0.9578, "step": 37400 }, { "epoch": 33.13108945969885, "grad_norm": 0.24169385433197021, "learning_rate": 1e-05, "loss": 0.9626, "step": 37405 }, { "epoch": 33.13551815766165, "grad_norm": 0.2552761435508728, "learning_rate": 1e-05, "loss": 0.9839, "step": 37410 }, { "epoch": 33.139946855624444, "grad_norm": 0.24624589085578918, "learning_rate": 1e-05, "loss": 0.9533, "step": 37415 }, { "epoch": 33.144375553587246, "grad_norm": 0.2403360903263092, "learning_rate": 1e-05, "loss": 1.0364, "step": 37420 }, { "epoch": 33.14880425155005, "grad_norm": 0.2598419487476349, "learning_rate": 1e-05, "loss": 0.973, "step": 37425 }, { "epoch": 33.15323294951284, "grad_norm": 0.28563302755355835, "learning_rate": 1e-05, "loss": 0.9829, "step": 37430 }, { "epoch": 33.15766164747564, "grad_norm": 0.24167422950267792, "learning_rate": 1e-05, "loss": 0.9206, "step": 37435 }, { "epoch": 33.16209034543844, "grad_norm": 0.24953114986419678, "learning_rate": 1e-05, "loss": 1.0139, "step": 37440 }, { "epoch": 33.16651904340124, "grad_norm": 0.22629134356975555, "learning_rate": 1e-05, "loss": 0.9923, "step": 37445 }, { "epoch": 33.17094774136404, "grad_norm": 0.2352973371744156, "learning_rate": 1e-05, "loss": 0.9986, "step": 37450 }, { "epoch": 33.17537643932684, "grad_norm": 0.219156414270401, "learning_rate": 1e-05, "loss": 0.986, "step": 37455 }, { "epoch": 33.179805137289634, "grad_norm": 0.25489112734794617, "learning_rate": 1e-05, "loss": 0.9393, "step": 37460 }, { "epoch": 33.184233835252435, "grad_norm": 0.25097987055778503, "learning_rate": 1e-05, "loss": 0.9596, "step": 37465 }, { "epoch": 33.188662533215236, "grad_norm": 0.23594914376735687, "learning_rate": 1e-05, "loss": 1.0067, "step": 37470 }, { "epoch": 33.19309123117803, "grad_norm": 0.23238977789878845, "learning_rate": 1e-05, "loss": 0.988, "step": 37475 }, { "epoch": 33.19751992914083, "grad_norm": 0.2475544810295105, "learning_rate": 1e-05, "loss": 1.0055, "step": 37480 }, { "epoch": 33.20194862710363, "grad_norm": 0.25583094358444214, "learning_rate": 1e-05, "loss": 1.0343, "step": 37485 }, { "epoch": 33.20637732506643, "grad_norm": 0.2261056900024414, "learning_rate": 1e-05, "loss": 1.0421, "step": 37490 }, { "epoch": 33.21080602302923, "grad_norm": 0.25020086765289307, "learning_rate": 1e-05, "loss": 0.9658, "step": 37495 }, { "epoch": 33.21523472099203, "grad_norm": 0.2253435105085373, "learning_rate": 1e-05, "loss": 0.9564, "step": 37500 }, { "epoch": 33.21966341895483, "grad_norm": 0.22151602804660797, "learning_rate": 1e-05, "loss": 0.9691, "step": 37505 }, { "epoch": 33.224092116917625, "grad_norm": 0.2499372810125351, "learning_rate": 1e-05, "loss": 0.9536, "step": 37510 }, { "epoch": 33.228520814880426, "grad_norm": 0.2618054747581482, "learning_rate": 1e-05, "loss": 0.9748, "step": 37515 }, { "epoch": 33.23294951284323, "grad_norm": 0.22210164368152618, "learning_rate": 1e-05, "loss": 0.9921, "step": 37520 }, { "epoch": 33.23737821080602, "grad_norm": 0.20142731070518494, "learning_rate": 1e-05, "loss": 0.9176, "step": 37525 }, { "epoch": 33.24180690876882, "grad_norm": 0.26337745785713196, "learning_rate": 1e-05, "loss": 1.0049, "step": 37530 }, { "epoch": 33.246235606731624, "grad_norm": 0.2420770823955536, "learning_rate": 1e-05, "loss": 0.9374, "step": 37535 }, { "epoch": 33.25066430469442, "grad_norm": 0.28763192892074585, "learning_rate": 1e-05, "loss": 0.9684, "step": 37540 }, { "epoch": 33.25509300265722, "grad_norm": 0.2696307301521301, "learning_rate": 1e-05, "loss": 0.9562, "step": 37545 }, { "epoch": 33.25952170062002, "grad_norm": 0.25449714064598083, "learning_rate": 1e-05, "loss": 1.018, "step": 37550 }, { "epoch": 33.263950398582814, "grad_norm": 0.2824353575706482, "learning_rate": 1e-05, "loss": 0.986, "step": 37555 }, { "epoch": 33.268379096545615, "grad_norm": 0.25014811754226685, "learning_rate": 1e-05, "loss": 0.9992, "step": 37560 }, { "epoch": 33.27280779450842, "grad_norm": 0.2584537863731384, "learning_rate": 1e-05, "loss": 0.9909, "step": 37565 }, { "epoch": 33.27723649247121, "grad_norm": 0.24283677339553833, "learning_rate": 1e-05, "loss": 0.9461, "step": 37570 }, { "epoch": 33.28166519043401, "grad_norm": 0.23383092880249023, "learning_rate": 1e-05, "loss": 0.9153, "step": 37575 }, { "epoch": 33.28609388839681, "grad_norm": 0.24581915140151978, "learning_rate": 1e-05, "loss": 0.9684, "step": 37580 }, { "epoch": 33.29052258635961, "grad_norm": 0.2930060029029846, "learning_rate": 1e-05, "loss": 0.9853, "step": 37585 }, { "epoch": 33.29495128432241, "grad_norm": 0.20185275375843048, "learning_rate": 1e-05, "loss": 0.9781, "step": 37590 }, { "epoch": 33.29937998228521, "grad_norm": 0.29508915543556213, "learning_rate": 1e-05, "loss": 0.9447, "step": 37595 }, { "epoch": 33.303808680248004, "grad_norm": 0.2665797770023346, "learning_rate": 1e-05, "loss": 0.9628, "step": 37600 }, { "epoch": 33.308237378210805, "grad_norm": 0.219760924577713, "learning_rate": 1e-05, "loss": 0.9781, "step": 37605 }, { "epoch": 33.312666076173606, "grad_norm": 0.243989497423172, "learning_rate": 1e-05, "loss": 0.9582, "step": 37610 }, { "epoch": 33.31709477413641, "grad_norm": 0.18913330137729645, "learning_rate": 1e-05, "loss": 1.0057, "step": 37615 }, { "epoch": 33.3215234720992, "grad_norm": 0.23927466571331024, "learning_rate": 1e-05, "loss": 0.98, "step": 37620 }, { "epoch": 33.325952170062, "grad_norm": 0.2826748490333557, "learning_rate": 1e-05, "loss": 1.0015, "step": 37625 }, { "epoch": 33.330380868024804, "grad_norm": 0.2496422678232193, "learning_rate": 1e-05, "loss": 1.0165, "step": 37630 }, { "epoch": 33.3348095659876, "grad_norm": 0.2906489074230194, "learning_rate": 1e-05, "loss": 0.9461, "step": 37635 }, { "epoch": 33.3392382639504, "grad_norm": 0.23769623041152954, "learning_rate": 1e-05, "loss": 1.058, "step": 37640 }, { "epoch": 33.3436669619132, "grad_norm": 0.2164970487356186, "learning_rate": 1e-05, "loss": 0.9888, "step": 37645 }, { "epoch": 33.348095659875995, "grad_norm": 0.2195953130722046, "learning_rate": 1e-05, "loss": 1.0059, "step": 37650 }, { "epoch": 33.352524357838796, "grad_norm": 0.2180243581533432, "learning_rate": 1e-05, "loss": 0.9735, "step": 37655 }, { "epoch": 33.3569530558016, "grad_norm": 0.2616342008113861, "learning_rate": 1e-05, "loss": 1.0176, "step": 37660 }, { "epoch": 33.36138175376439, "grad_norm": 0.26874032616615295, "learning_rate": 1e-05, "loss": 0.9828, "step": 37665 }, { "epoch": 33.36581045172719, "grad_norm": 0.27089524269104004, "learning_rate": 1e-05, "loss": 0.9656, "step": 37670 }, { "epoch": 33.37023914968999, "grad_norm": 0.23958326876163483, "learning_rate": 1e-05, "loss": 0.9717, "step": 37675 }, { "epoch": 33.37466784765279, "grad_norm": 0.2638654112815857, "learning_rate": 1e-05, "loss": 0.9412, "step": 37680 }, { "epoch": 33.37909654561559, "grad_norm": 0.2748025059700012, "learning_rate": 1e-05, "loss": 1.0019, "step": 37685 }, { "epoch": 33.38352524357839, "grad_norm": 0.20542241632938385, "learning_rate": 1e-05, "loss": 0.9982, "step": 37690 }, { "epoch": 33.387953941541184, "grad_norm": 0.253123015165329, "learning_rate": 1e-05, "loss": 0.9967, "step": 37695 }, { "epoch": 33.392382639503985, "grad_norm": 0.22315892577171326, "learning_rate": 1e-05, "loss": 1.0179, "step": 37700 }, { "epoch": 33.39681133746679, "grad_norm": 0.2307894229888916, "learning_rate": 1e-05, "loss": 0.9362, "step": 37705 }, { "epoch": 33.40124003542958, "grad_norm": 0.24775582551956177, "learning_rate": 1e-05, "loss": 0.9831, "step": 37710 }, { "epoch": 33.40566873339238, "grad_norm": 0.23357945680618286, "learning_rate": 1e-05, "loss": 0.9656, "step": 37715 }, { "epoch": 33.41009743135518, "grad_norm": 0.22723504900932312, "learning_rate": 1e-05, "loss": 0.9612, "step": 37720 }, { "epoch": 33.41452612931798, "grad_norm": 0.212600976228714, "learning_rate": 1e-05, "loss": 0.9947, "step": 37725 }, { "epoch": 33.41895482728078, "grad_norm": 0.22390806674957275, "learning_rate": 1e-05, "loss": 0.9662, "step": 37730 }, { "epoch": 33.42338352524358, "grad_norm": 0.25025299191474915, "learning_rate": 1e-05, "loss": 0.952, "step": 37735 }, { "epoch": 33.42781222320638, "grad_norm": 0.25924888253211975, "learning_rate": 1e-05, "loss": 0.9713, "step": 37740 }, { "epoch": 33.432240921169175, "grad_norm": 0.23782186210155487, "learning_rate": 1e-05, "loss": 0.9947, "step": 37745 }, { "epoch": 33.436669619131976, "grad_norm": 0.2332630753517151, "learning_rate": 1e-05, "loss": 0.912, "step": 37750 }, { "epoch": 33.44109831709478, "grad_norm": 0.22997522354125977, "learning_rate": 1e-05, "loss": 0.9626, "step": 37755 }, { "epoch": 33.44552701505757, "grad_norm": 0.2595265209674835, "learning_rate": 1e-05, "loss": 0.9806, "step": 37760 }, { "epoch": 33.44995571302037, "grad_norm": 0.23804420232772827, "learning_rate": 1e-05, "loss": 0.9871, "step": 37765 }, { "epoch": 33.454384410983174, "grad_norm": 0.2505871653556824, "learning_rate": 1e-05, "loss": 0.9812, "step": 37770 }, { "epoch": 33.45881310894597, "grad_norm": 0.2468378245830536, "learning_rate": 1e-05, "loss": 1.0161, "step": 37775 }, { "epoch": 33.46324180690877, "grad_norm": 0.24924004077911377, "learning_rate": 1e-05, "loss": 0.9726, "step": 37780 }, { "epoch": 33.46767050487157, "grad_norm": 0.25373706221580505, "learning_rate": 1e-05, "loss": 1.0195, "step": 37785 }, { "epoch": 33.472099202834364, "grad_norm": 0.1956234872341156, "learning_rate": 1e-05, "loss": 0.9895, "step": 37790 }, { "epoch": 33.476527900797166, "grad_norm": 0.2079487442970276, "learning_rate": 1e-05, "loss": 0.9211, "step": 37795 }, { "epoch": 33.48095659875997, "grad_norm": 0.2684883773326874, "learning_rate": 1e-05, "loss": 0.9653, "step": 37800 }, { "epoch": 33.48538529672276, "grad_norm": 0.23346030712127686, "learning_rate": 1e-05, "loss": 0.9869, "step": 37805 }, { "epoch": 33.48981399468556, "grad_norm": 0.3997478783130646, "learning_rate": 1e-05, "loss": 0.9808, "step": 37810 }, { "epoch": 33.49424269264836, "grad_norm": 0.24162228405475616, "learning_rate": 1e-05, "loss": 1.0227, "step": 37815 }, { "epoch": 33.49867139061116, "grad_norm": 0.21404288709163666, "learning_rate": 1e-05, "loss": 0.9267, "step": 37820 }, { "epoch": 33.50310008857396, "grad_norm": 0.27143868803977966, "learning_rate": 1e-05, "loss": 1.062, "step": 37825 }, { "epoch": 33.50752878653676, "grad_norm": 0.19717994332313538, "learning_rate": 1e-05, "loss": 0.9918, "step": 37830 }, { "epoch": 33.511957484499554, "grad_norm": 0.20435220003128052, "learning_rate": 1e-05, "loss": 0.9746, "step": 37835 }, { "epoch": 33.516386182462355, "grad_norm": 0.24851441383361816, "learning_rate": 1e-05, "loss": 0.9336, "step": 37840 }, { "epoch": 33.520814880425156, "grad_norm": 0.23568382859230042, "learning_rate": 1e-05, "loss": 1.0269, "step": 37845 }, { "epoch": 33.52524357838795, "grad_norm": 0.25395458936691284, "learning_rate": 1e-05, "loss": 0.9574, "step": 37850 }, { "epoch": 33.52967227635075, "grad_norm": 0.2722824215888977, "learning_rate": 1e-05, "loss": 0.9632, "step": 37855 }, { "epoch": 33.53410097431355, "grad_norm": 0.26487404108047485, "learning_rate": 1e-05, "loss": 0.9996, "step": 37860 }, { "epoch": 33.538529672276354, "grad_norm": 0.23948870599269867, "learning_rate": 1e-05, "loss": 0.9882, "step": 37865 }, { "epoch": 33.54295837023915, "grad_norm": 0.22190731763839722, "learning_rate": 1e-05, "loss": 1.0318, "step": 37870 }, { "epoch": 33.54738706820195, "grad_norm": 0.26469072699546814, "learning_rate": 1e-05, "loss": 0.9702, "step": 37875 }, { "epoch": 33.55181576616475, "grad_norm": 0.22756771743297577, "learning_rate": 1e-05, "loss": 0.9665, "step": 37880 }, { "epoch": 33.556244464127545, "grad_norm": 0.23696641623973846, "learning_rate": 1e-05, "loss": 1.0027, "step": 37885 }, { "epoch": 33.560673162090346, "grad_norm": 0.25579628348350525, "learning_rate": 1e-05, "loss": 0.9441, "step": 37890 }, { "epoch": 33.56510186005315, "grad_norm": 0.2269541174173355, "learning_rate": 1e-05, "loss": 1.0376, "step": 37895 }, { "epoch": 33.56953055801594, "grad_norm": 0.2187008112668991, "learning_rate": 1e-05, "loss": 0.9772, "step": 37900 }, { "epoch": 33.57395925597874, "grad_norm": 0.2544189691543579, "learning_rate": 1e-05, "loss": 0.9802, "step": 37905 }, { "epoch": 33.578387953941544, "grad_norm": 0.21207940578460693, "learning_rate": 1e-05, "loss": 0.9499, "step": 37910 }, { "epoch": 33.58281665190434, "grad_norm": 0.24516642093658447, "learning_rate": 1e-05, "loss": 1.0136, "step": 37915 }, { "epoch": 33.58724534986714, "grad_norm": 0.2136494368314743, "learning_rate": 1e-05, "loss": 0.9712, "step": 37920 }, { "epoch": 33.59167404782994, "grad_norm": 0.25357040762901306, "learning_rate": 1e-05, "loss": 1.0093, "step": 37925 }, { "epoch": 33.596102745792734, "grad_norm": 0.24380625784397125, "learning_rate": 1e-05, "loss": 0.9506, "step": 37930 }, { "epoch": 33.600531443755536, "grad_norm": 0.2423871010541916, "learning_rate": 1e-05, "loss": 1.0015, "step": 37935 }, { "epoch": 33.60496014171834, "grad_norm": 0.3245455026626587, "learning_rate": 1e-05, "loss": 0.9799, "step": 37940 }, { "epoch": 33.60938883968113, "grad_norm": 0.2679072320461273, "learning_rate": 1e-05, "loss": 1.0205, "step": 37945 }, { "epoch": 33.61381753764393, "grad_norm": 0.24811644852161407, "learning_rate": 1e-05, "loss": 0.9517, "step": 37950 }, { "epoch": 33.61824623560673, "grad_norm": 0.27384641766548157, "learning_rate": 1e-05, "loss": 0.9503, "step": 37955 }, { "epoch": 33.62267493356953, "grad_norm": 0.21501348912715912, "learning_rate": 1e-05, "loss": 0.9498, "step": 37960 }, { "epoch": 33.62710363153233, "grad_norm": 0.23064468801021576, "learning_rate": 1e-05, "loss": 0.9745, "step": 37965 }, { "epoch": 33.63153232949513, "grad_norm": 0.22785690426826477, "learning_rate": 1e-05, "loss": 0.9906, "step": 37970 }, { "epoch": 33.635961027457924, "grad_norm": 0.24837546050548553, "learning_rate": 1e-05, "loss": 0.9355, "step": 37975 }, { "epoch": 33.640389725420725, "grad_norm": 0.25972801446914673, "learning_rate": 1e-05, "loss": 1.02, "step": 37980 }, { "epoch": 33.644818423383526, "grad_norm": 0.2836465537548065, "learning_rate": 1e-05, "loss": 0.982, "step": 37985 }, { "epoch": 33.64924712134633, "grad_norm": 0.2290176898241043, "learning_rate": 1e-05, "loss": 1.0113, "step": 37990 }, { "epoch": 33.65367581930912, "grad_norm": 0.3135979473590851, "learning_rate": 1e-05, "loss": 0.9448, "step": 37995 }, { "epoch": 33.65810451727192, "grad_norm": 0.2480962723493576, "learning_rate": 1e-05, "loss": 0.9662, "step": 38000 }, { "epoch": 33.662533215234724, "grad_norm": 0.21964405477046967, "learning_rate": 1e-05, "loss": 0.9352, "step": 38005 }, { "epoch": 33.66696191319752, "grad_norm": 0.2652662396430969, "learning_rate": 1e-05, "loss": 0.94, "step": 38010 }, { "epoch": 33.67139061116032, "grad_norm": 0.26028746366500854, "learning_rate": 1e-05, "loss": 0.9836, "step": 38015 }, { "epoch": 33.67581930912312, "grad_norm": 0.24155287444591522, "learning_rate": 1e-05, "loss": 0.9816, "step": 38020 }, { "epoch": 33.680248007085915, "grad_norm": 0.2609650492668152, "learning_rate": 1e-05, "loss": 0.9779, "step": 38025 }, { "epoch": 33.684676705048716, "grad_norm": 0.26211851835250854, "learning_rate": 1e-05, "loss": 0.9645, "step": 38030 }, { "epoch": 33.68910540301152, "grad_norm": 0.21739651262760162, "learning_rate": 1e-05, "loss": 0.9778, "step": 38035 }, { "epoch": 33.69353410097431, "grad_norm": 0.22474536299705505, "learning_rate": 1e-05, "loss": 1.0088, "step": 38040 }, { "epoch": 33.69796279893711, "grad_norm": 0.22200100123882294, "learning_rate": 1e-05, "loss": 1.0222, "step": 38045 }, { "epoch": 33.702391496899914, "grad_norm": 0.23596346378326416, "learning_rate": 1e-05, "loss": 0.9529, "step": 38050 }, { "epoch": 33.70682019486271, "grad_norm": 0.2362101525068283, "learning_rate": 1e-05, "loss": 0.9868, "step": 38055 }, { "epoch": 33.71124889282551, "grad_norm": 0.2920089066028595, "learning_rate": 1e-05, "loss": 0.9887, "step": 38060 }, { "epoch": 33.71567759078831, "grad_norm": 0.23172269761562347, "learning_rate": 1e-05, "loss": 0.9542, "step": 38065 }, { "epoch": 33.720106288751104, "grad_norm": 0.2064635157585144, "learning_rate": 1e-05, "loss": 0.9505, "step": 38070 }, { "epoch": 33.724534986713905, "grad_norm": 0.2808133661746979, "learning_rate": 1e-05, "loss": 0.9606, "step": 38075 }, { "epoch": 33.72896368467671, "grad_norm": 0.27570268511772156, "learning_rate": 1e-05, "loss": 1.0026, "step": 38080 }, { "epoch": 33.7333923826395, "grad_norm": 0.2796964943408966, "learning_rate": 1e-05, "loss": 1.0364, "step": 38085 }, { "epoch": 33.7378210806023, "grad_norm": 0.23116172850131989, "learning_rate": 1e-05, "loss": 0.9707, "step": 38090 }, { "epoch": 33.7422497785651, "grad_norm": 0.3091551661491394, "learning_rate": 1e-05, "loss": 0.9638, "step": 38095 }, { "epoch": 33.7466784765279, "grad_norm": 0.26836469769477844, "learning_rate": 1e-05, "loss": 1.0025, "step": 38100 }, { "epoch": 33.7511071744907, "grad_norm": 0.2579007148742676, "learning_rate": 1e-05, "loss": 0.9338, "step": 38105 }, { "epoch": 33.7555358724535, "grad_norm": 0.21425236761569977, "learning_rate": 1e-05, "loss": 0.9933, "step": 38110 }, { "epoch": 33.7599645704163, "grad_norm": 0.2666289210319519, "learning_rate": 1e-05, "loss": 0.9753, "step": 38115 }, { "epoch": 33.764393268379095, "grad_norm": 0.2515363395214081, "learning_rate": 1e-05, "loss": 0.996, "step": 38120 }, { "epoch": 33.768821966341896, "grad_norm": 0.2566606104373932, "learning_rate": 1e-05, "loss": 0.9972, "step": 38125 }, { "epoch": 33.7732506643047, "grad_norm": 0.29022499918937683, "learning_rate": 1e-05, "loss": 1.0202, "step": 38130 }, { "epoch": 33.77767936226749, "grad_norm": 0.2834495007991791, "learning_rate": 1e-05, "loss": 0.983, "step": 38135 }, { "epoch": 33.78210806023029, "grad_norm": 0.24446694552898407, "learning_rate": 1e-05, "loss": 0.9539, "step": 38140 }, { "epoch": 33.786536758193094, "grad_norm": 0.27261218428611755, "learning_rate": 1e-05, "loss": 0.9656, "step": 38145 }, { "epoch": 33.79096545615589, "grad_norm": 0.23751747608184814, "learning_rate": 1e-05, "loss": 0.9843, "step": 38150 }, { "epoch": 33.79539415411869, "grad_norm": 0.20280642807483673, "learning_rate": 1e-05, "loss": 0.9527, "step": 38155 }, { "epoch": 33.79982285208149, "grad_norm": 0.25098899006843567, "learning_rate": 1e-05, "loss": 0.9978, "step": 38160 }, { "epoch": 33.804251550044285, "grad_norm": 0.23242275416851044, "learning_rate": 1e-05, "loss": 0.9714, "step": 38165 }, { "epoch": 33.808680248007086, "grad_norm": 0.24211683869361877, "learning_rate": 1e-05, "loss": 0.9625, "step": 38170 }, { "epoch": 33.81310894596989, "grad_norm": 0.2172280102968216, "learning_rate": 1e-05, "loss": 1.0168, "step": 38175 }, { "epoch": 33.81753764393268, "grad_norm": 0.2778877019882202, "learning_rate": 1e-05, "loss": 1.0226, "step": 38180 }, { "epoch": 33.82196634189548, "grad_norm": 0.2905314862728119, "learning_rate": 1e-05, "loss": 1.0409, "step": 38185 }, { "epoch": 33.826395039858284, "grad_norm": 0.2853711247444153, "learning_rate": 1e-05, "loss": 0.9629, "step": 38190 }, { "epoch": 33.83082373782108, "grad_norm": 0.2654324173927307, "learning_rate": 1e-05, "loss": 1.0065, "step": 38195 }, { "epoch": 33.83525243578388, "grad_norm": 0.2581745386123657, "learning_rate": 1e-05, "loss": 0.9697, "step": 38200 }, { "epoch": 33.83968113374668, "grad_norm": 0.25141727924346924, "learning_rate": 1e-05, "loss": 0.9951, "step": 38205 }, { "epoch": 33.844109831709474, "grad_norm": 0.2173549085855484, "learning_rate": 1e-05, "loss": 0.9193, "step": 38210 }, { "epoch": 33.848538529672275, "grad_norm": 0.2668944001197815, "learning_rate": 1e-05, "loss": 0.9741, "step": 38215 }, { "epoch": 33.85296722763508, "grad_norm": 0.21575461328029633, "learning_rate": 1e-05, "loss": 0.9644, "step": 38220 }, { "epoch": 33.85739592559787, "grad_norm": 0.2107408344745636, "learning_rate": 1e-05, "loss": 0.9916, "step": 38225 }, { "epoch": 33.86182462356067, "grad_norm": 0.21856476366519928, "learning_rate": 1e-05, "loss": 1.0577, "step": 38230 }, { "epoch": 33.86625332152347, "grad_norm": 0.225790336728096, "learning_rate": 1e-05, "loss": 1.0136, "step": 38235 }, { "epoch": 33.870682019486274, "grad_norm": 0.21448101103305817, "learning_rate": 1e-05, "loss": 1.0178, "step": 38240 }, { "epoch": 33.87511071744907, "grad_norm": 0.2115173041820526, "learning_rate": 1e-05, "loss": 0.9924, "step": 38245 }, { "epoch": 33.87953941541187, "grad_norm": 0.25188231468200684, "learning_rate": 1e-05, "loss": 0.9717, "step": 38250 }, { "epoch": 33.88396811337467, "grad_norm": 0.2773911952972412, "learning_rate": 1e-05, "loss": 0.9725, "step": 38255 }, { "epoch": 33.888396811337465, "grad_norm": 0.1982952356338501, "learning_rate": 1e-05, "loss": 0.9878, "step": 38260 }, { "epoch": 33.892825509300266, "grad_norm": 0.27818411588668823, "learning_rate": 1e-05, "loss": 0.9586, "step": 38265 }, { "epoch": 33.89725420726307, "grad_norm": 0.26349830627441406, "learning_rate": 1e-05, "loss": 0.9559, "step": 38270 }, { "epoch": 33.90168290522586, "grad_norm": 0.24820630252361298, "learning_rate": 1e-05, "loss": 0.9997, "step": 38275 }, { "epoch": 33.90611160318866, "grad_norm": 0.20903825759887695, "learning_rate": 1e-05, "loss": 0.9509, "step": 38280 }, { "epoch": 33.910540301151464, "grad_norm": 0.24729932844638824, "learning_rate": 1e-05, "loss": 0.987, "step": 38285 }, { "epoch": 33.91496899911426, "grad_norm": 0.21811094880104065, "learning_rate": 1e-05, "loss": 0.9672, "step": 38290 }, { "epoch": 33.91939769707706, "grad_norm": 0.23259656131267548, "learning_rate": 1e-05, "loss": 0.9372, "step": 38295 }, { "epoch": 33.92382639503986, "grad_norm": 0.257358193397522, "learning_rate": 1e-05, "loss": 0.9548, "step": 38300 }, { "epoch": 33.928255093002655, "grad_norm": 0.1952032893896103, "learning_rate": 1e-05, "loss": 0.9558, "step": 38305 }, { "epoch": 33.932683790965456, "grad_norm": 0.2545221149921417, "learning_rate": 1e-05, "loss": 0.9523, "step": 38310 }, { "epoch": 33.93711248892826, "grad_norm": 0.27006372809410095, "learning_rate": 1e-05, "loss": 0.9868, "step": 38315 }, { "epoch": 33.94154118689105, "grad_norm": 0.21550485491752625, "learning_rate": 1e-05, "loss": 0.9635, "step": 38320 }, { "epoch": 33.94596988485385, "grad_norm": 0.225944384932518, "learning_rate": 1e-05, "loss": 0.9973, "step": 38325 }, { "epoch": 33.95039858281665, "grad_norm": 0.2626259922981262, "learning_rate": 1e-05, "loss": 0.984, "step": 38330 }, { "epoch": 33.95482728077945, "grad_norm": 0.22060105204582214, "learning_rate": 1e-05, "loss": 0.9394, "step": 38335 }, { "epoch": 33.95925597874225, "grad_norm": 0.23608282208442688, "learning_rate": 1e-05, "loss": 0.9982, "step": 38340 }, { "epoch": 33.96368467670505, "grad_norm": 0.21818242967128754, "learning_rate": 1e-05, "loss": 1.0262, "step": 38345 }, { "epoch": 33.96811337466785, "grad_norm": 0.2523571848869324, "learning_rate": 1e-05, "loss": 1.0098, "step": 38350 }, { "epoch": 33.972542072630645, "grad_norm": 0.24640929698944092, "learning_rate": 1e-05, "loss": 0.9676, "step": 38355 }, { "epoch": 33.97697077059345, "grad_norm": 0.24513690173625946, "learning_rate": 1e-05, "loss": 0.9852, "step": 38360 }, { "epoch": 33.98139946855625, "grad_norm": 0.23903600871562958, "learning_rate": 1e-05, "loss": 1.0311, "step": 38365 }, { "epoch": 33.98582816651904, "grad_norm": 0.2276596575975418, "learning_rate": 1e-05, "loss": 0.9607, "step": 38370 }, { "epoch": 33.99025686448184, "grad_norm": 0.30598318576812744, "learning_rate": 1e-05, "loss": 1.0492, "step": 38375 }, { "epoch": 33.994685562444644, "grad_norm": 0.20703360438346863, "learning_rate": 1e-05, "loss": 0.9826, "step": 38380 }, { "epoch": 33.99911426040744, "grad_norm": 0.2632818818092346, "learning_rate": 1e-05, "loss": 0.9966, "step": 38385 }, { "epoch": 34.00354295837024, "grad_norm": 0.25833189487457275, "learning_rate": 1e-05, "loss": 0.9767, "step": 38390 }, { "epoch": 34.00797165633304, "grad_norm": 0.2433941662311554, "learning_rate": 1e-05, "loss": 0.965, "step": 38395 }, { "epoch": 34.012400354295835, "grad_norm": 0.22201590240001678, "learning_rate": 1e-05, "loss": 0.963, "step": 38400 }, { "epoch": 34.016829052258636, "grad_norm": 0.25580641627311707, "learning_rate": 1e-05, "loss": 1.0112, "step": 38405 }, { "epoch": 34.02125775022144, "grad_norm": 0.22945557534694672, "learning_rate": 1e-05, "loss": 1.0106, "step": 38410 }, { "epoch": 34.02568644818423, "grad_norm": 0.2832637131214142, "learning_rate": 1e-05, "loss": 0.9871, "step": 38415 }, { "epoch": 34.03011514614703, "grad_norm": 0.25894853472709656, "learning_rate": 1e-05, "loss": 0.9687, "step": 38420 }, { "epoch": 34.034543844109834, "grad_norm": 0.2717205584049225, "learning_rate": 1e-05, "loss": 0.9537, "step": 38425 }, { "epoch": 34.03897254207263, "grad_norm": 0.22346657514572144, "learning_rate": 1e-05, "loss": 0.9769, "step": 38430 }, { "epoch": 34.04340124003543, "grad_norm": 0.25589868426322937, "learning_rate": 1e-05, "loss": 1.0, "step": 38435 }, { "epoch": 34.04782993799823, "grad_norm": 0.24962647259235382, "learning_rate": 1e-05, "loss": 1.0211, "step": 38440 }, { "epoch": 34.052258635961024, "grad_norm": 0.2575848698616028, "learning_rate": 1e-05, "loss": 1.0672, "step": 38445 }, { "epoch": 34.056687333923826, "grad_norm": 0.24088113009929657, "learning_rate": 1e-05, "loss": 1.0532, "step": 38450 }, { "epoch": 34.06111603188663, "grad_norm": 0.21632416546344757, "learning_rate": 1e-05, "loss": 1.0163, "step": 38455 }, { "epoch": 34.06554472984942, "grad_norm": 0.2102256864309311, "learning_rate": 1e-05, "loss": 1.0161, "step": 38460 }, { "epoch": 34.06997342781222, "grad_norm": 0.22546735405921936, "learning_rate": 1e-05, "loss": 0.9752, "step": 38465 }, { "epoch": 34.07440212577502, "grad_norm": 0.24194324016571045, "learning_rate": 1e-05, "loss": 0.9711, "step": 38470 }, { "epoch": 34.078830823737825, "grad_norm": 0.21429917216300964, "learning_rate": 1e-05, "loss": 1.0106, "step": 38475 }, { "epoch": 34.08325952170062, "grad_norm": 0.21943578124046326, "learning_rate": 1e-05, "loss": 1.0001, "step": 38480 }, { "epoch": 34.08768821966342, "grad_norm": 0.2599654197692871, "learning_rate": 1e-05, "loss": 0.9424, "step": 38485 }, { "epoch": 34.09211691762622, "grad_norm": 0.27270013093948364, "learning_rate": 1e-05, "loss": 0.9447, "step": 38490 }, { "epoch": 34.096545615589015, "grad_norm": 0.22257526218891144, "learning_rate": 1e-05, "loss": 0.9938, "step": 38495 }, { "epoch": 34.100974313551816, "grad_norm": 0.24975185096263885, "learning_rate": 1e-05, "loss": 0.9729, "step": 38500 }, { "epoch": 34.10540301151462, "grad_norm": 0.22256126999855042, "learning_rate": 1e-05, "loss": 0.9654, "step": 38505 }, { "epoch": 34.10983170947741, "grad_norm": 0.2118120789527893, "learning_rate": 1e-05, "loss": 0.959, "step": 38510 }, { "epoch": 34.11426040744021, "grad_norm": 0.20445938408374786, "learning_rate": 1e-05, "loss": 1.0002, "step": 38515 }, { "epoch": 34.118689105403014, "grad_norm": 0.2057626098394394, "learning_rate": 1e-05, "loss": 0.9544, "step": 38520 }, { "epoch": 34.12311780336581, "grad_norm": 0.2255978137254715, "learning_rate": 1e-05, "loss": 0.9618, "step": 38525 }, { "epoch": 34.12754650132861, "grad_norm": 0.24340316653251648, "learning_rate": 1e-05, "loss": 0.9843, "step": 38530 }, { "epoch": 34.13197519929141, "grad_norm": 0.2793462872505188, "learning_rate": 1e-05, "loss": 1.0341, "step": 38535 }, { "epoch": 34.136403897254205, "grad_norm": 0.27937790751457214, "learning_rate": 1e-05, "loss": 0.9503, "step": 38540 }, { "epoch": 34.140832595217006, "grad_norm": 0.2662443220615387, "learning_rate": 1e-05, "loss": 0.9711, "step": 38545 }, { "epoch": 34.14526129317981, "grad_norm": 0.2826865315437317, "learning_rate": 1e-05, "loss": 0.9485, "step": 38550 }, { "epoch": 34.1496899911426, "grad_norm": 0.2715863585472107, "learning_rate": 1e-05, "loss": 1.0083, "step": 38555 }, { "epoch": 34.1541186891054, "grad_norm": 0.23822639882564545, "learning_rate": 1e-05, "loss": 0.9738, "step": 38560 }, { "epoch": 34.158547387068204, "grad_norm": 0.30323848128318787, "learning_rate": 1e-05, "loss": 0.9568, "step": 38565 }, { "epoch": 34.162976085031, "grad_norm": 0.2628025412559509, "learning_rate": 1e-05, "loss": 1.0421, "step": 38570 }, { "epoch": 34.1674047829938, "grad_norm": 0.21528016030788422, "learning_rate": 1e-05, "loss": 0.9277, "step": 38575 }, { "epoch": 34.1718334809566, "grad_norm": 0.2175915688276291, "learning_rate": 1e-05, "loss": 0.9849, "step": 38580 }, { "epoch": 34.176262178919394, "grad_norm": 0.25207310914993286, "learning_rate": 1e-05, "loss": 0.9896, "step": 38585 }, { "epoch": 34.180690876882196, "grad_norm": 0.2646348178386688, "learning_rate": 1e-05, "loss": 0.9494, "step": 38590 }, { "epoch": 34.185119574845, "grad_norm": 0.22858981788158417, "learning_rate": 1e-05, "loss": 1.0032, "step": 38595 }, { "epoch": 34.1895482728078, "grad_norm": 0.24562247097492218, "learning_rate": 1e-05, "loss": 0.9772, "step": 38600 }, { "epoch": 34.19397697077059, "grad_norm": 0.24626575410366058, "learning_rate": 1e-05, "loss": 0.9551, "step": 38605 }, { "epoch": 34.19840566873339, "grad_norm": 0.2909865379333496, "learning_rate": 1e-05, "loss": 0.9588, "step": 38610 }, { "epoch": 34.202834366696194, "grad_norm": 0.2305697798728943, "learning_rate": 1e-05, "loss": 1.0126, "step": 38615 }, { "epoch": 34.20726306465899, "grad_norm": 0.27132418751716614, "learning_rate": 1e-05, "loss": 0.9564, "step": 38620 }, { "epoch": 34.21169176262179, "grad_norm": 0.25381582975387573, "learning_rate": 1e-05, "loss": 0.9984, "step": 38625 }, { "epoch": 34.21612046058459, "grad_norm": 0.26009371876716614, "learning_rate": 1e-05, "loss": 0.9533, "step": 38630 }, { "epoch": 34.220549158547385, "grad_norm": 0.23428818583488464, "learning_rate": 1e-05, "loss": 0.9807, "step": 38635 }, { "epoch": 34.224977856510186, "grad_norm": 0.2927929759025574, "learning_rate": 1e-05, "loss": 0.9862, "step": 38640 }, { "epoch": 34.22940655447299, "grad_norm": 0.24169239401817322, "learning_rate": 1e-05, "loss": 0.9852, "step": 38645 }, { "epoch": 34.23383525243578, "grad_norm": 0.23842723667621613, "learning_rate": 1e-05, "loss": 1.0179, "step": 38650 }, { "epoch": 34.23826395039858, "grad_norm": 0.19830359518527985, "learning_rate": 1e-05, "loss": 0.964, "step": 38655 }, { "epoch": 34.242692648361384, "grad_norm": 0.24296438694000244, "learning_rate": 1e-05, "loss": 0.967, "step": 38660 }, { "epoch": 34.24712134632418, "grad_norm": 0.2316388040781021, "learning_rate": 1e-05, "loss": 0.9881, "step": 38665 }, { "epoch": 34.25155004428698, "grad_norm": 0.22557847201824188, "learning_rate": 1e-05, "loss": 0.917, "step": 38670 }, { "epoch": 34.25597874224978, "grad_norm": 0.2527371048927307, "learning_rate": 1e-05, "loss": 0.9605, "step": 38675 }, { "epoch": 34.260407440212575, "grad_norm": 0.26969581842422485, "learning_rate": 1e-05, "loss": 0.9677, "step": 38680 }, { "epoch": 34.264836138175376, "grad_norm": 0.24290160834789276, "learning_rate": 1e-05, "loss": 0.9354, "step": 38685 }, { "epoch": 34.26926483613818, "grad_norm": 0.25625211000442505, "learning_rate": 1e-05, "loss": 0.9803, "step": 38690 }, { "epoch": 34.27369353410097, "grad_norm": 0.2537311315536499, "learning_rate": 1e-05, "loss": 0.9764, "step": 38695 }, { "epoch": 34.27812223206377, "grad_norm": 0.2425544410943985, "learning_rate": 1e-05, "loss": 0.9387, "step": 38700 }, { "epoch": 34.282550930026574, "grad_norm": 0.2746416926383972, "learning_rate": 1e-05, "loss": 0.9531, "step": 38705 }, { "epoch": 34.28697962798937, "grad_norm": 0.2213379591703415, "learning_rate": 1e-05, "loss": 0.9637, "step": 38710 }, { "epoch": 34.29140832595217, "grad_norm": 0.23981286585330963, "learning_rate": 1e-05, "loss": 0.9879, "step": 38715 }, { "epoch": 34.29583702391497, "grad_norm": 0.2633059620857239, "learning_rate": 1e-05, "loss": 0.9759, "step": 38720 }, { "epoch": 34.30026572187777, "grad_norm": 0.2280246466398239, "learning_rate": 1e-05, "loss": 0.9844, "step": 38725 }, { "epoch": 34.304694419840565, "grad_norm": 0.22413896024227142, "learning_rate": 1e-05, "loss": 0.9839, "step": 38730 }, { "epoch": 34.30912311780337, "grad_norm": 0.2260398417711258, "learning_rate": 1e-05, "loss": 1.0143, "step": 38735 }, { "epoch": 34.31355181576617, "grad_norm": 0.2194809466600418, "learning_rate": 1e-05, "loss": 0.9701, "step": 38740 }, { "epoch": 34.31798051372896, "grad_norm": 0.26567405462265015, "learning_rate": 1e-05, "loss": 1.0166, "step": 38745 }, { "epoch": 34.32240921169176, "grad_norm": 0.2985020875930786, "learning_rate": 1e-05, "loss": 0.9592, "step": 38750 }, { "epoch": 34.326837909654564, "grad_norm": 0.27523016929626465, "learning_rate": 1e-05, "loss": 1.0021, "step": 38755 }, { "epoch": 34.33126660761736, "grad_norm": 0.257125586271286, "learning_rate": 1e-05, "loss": 1.0009, "step": 38760 }, { "epoch": 34.33569530558016, "grad_norm": 0.24871249496936798, "learning_rate": 1e-05, "loss": 1.0432, "step": 38765 }, { "epoch": 34.34012400354296, "grad_norm": 0.2568099796772003, "learning_rate": 1e-05, "loss": 1.0167, "step": 38770 }, { "epoch": 34.344552701505755, "grad_norm": 0.3367270231246948, "learning_rate": 1e-05, "loss": 1.0079, "step": 38775 }, { "epoch": 34.348981399468556, "grad_norm": 0.2605668008327484, "learning_rate": 1e-05, "loss": 1.0017, "step": 38780 }, { "epoch": 34.35341009743136, "grad_norm": 0.24907691776752472, "learning_rate": 1e-05, "loss": 0.9494, "step": 38785 }, { "epoch": 34.35783879539415, "grad_norm": 0.2581411302089691, "learning_rate": 1e-05, "loss": 0.9647, "step": 38790 }, { "epoch": 34.36226749335695, "grad_norm": 0.31799376010894775, "learning_rate": 1e-05, "loss": 0.9674, "step": 38795 }, { "epoch": 34.366696191319754, "grad_norm": 0.249354749917984, "learning_rate": 1e-05, "loss": 0.9854, "step": 38800 }, { "epoch": 34.37112488928255, "grad_norm": 0.27642133831977844, "learning_rate": 1e-05, "loss": 0.9264, "step": 38805 }, { "epoch": 34.37555358724535, "grad_norm": 0.22749485075473785, "learning_rate": 1e-05, "loss": 0.9758, "step": 38810 }, { "epoch": 34.37998228520815, "grad_norm": 0.23874270915985107, "learning_rate": 1e-05, "loss": 0.9781, "step": 38815 }, { "epoch": 34.384410983170945, "grad_norm": 0.2383110225200653, "learning_rate": 1e-05, "loss": 0.9567, "step": 38820 }, { "epoch": 34.388839681133746, "grad_norm": 0.2674820125102997, "learning_rate": 1e-05, "loss": 0.9785, "step": 38825 }, { "epoch": 34.39326837909655, "grad_norm": 0.3682923913002014, "learning_rate": 1e-05, "loss": 0.9967, "step": 38830 }, { "epoch": 34.39769707705934, "grad_norm": 0.31268978118896484, "learning_rate": 1e-05, "loss": 0.9847, "step": 38835 }, { "epoch": 34.40212577502214, "grad_norm": 0.26730576157569885, "learning_rate": 1e-05, "loss": 0.9969, "step": 38840 }, { "epoch": 34.40655447298494, "grad_norm": 0.234216570854187, "learning_rate": 1e-05, "loss": 0.9362, "step": 38845 }, { "epoch": 34.410983170947745, "grad_norm": 0.2032884657382965, "learning_rate": 1e-05, "loss": 0.9808, "step": 38850 }, { "epoch": 34.41541186891054, "grad_norm": 0.24037154018878937, "learning_rate": 1e-05, "loss": 0.9978, "step": 38855 }, { "epoch": 34.41984056687334, "grad_norm": 0.2099924236536026, "learning_rate": 1e-05, "loss": 1.0094, "step": 38860 }, { "epoch": 34.42426926483614, "grad_norm": 0.21875235438346863, "learning_rate": 1e-05, "loss": 0.982, "step": 38865 }, { "epoch": 34.428697962798935, "grad_norm": 0.2246522158384323, "learning_rate": 1e-05, "loss": 0.934, "step": 38870 }, { "epoch": 34.43312666076174, "grad_norm": 0.2580392062664032, "learning_rate": 1e-05, "loss": 0.9773, "step": 38875 }, { "epoch": 34.43755535872454, "grad_norm": 0.21624650061130524, "learning_rate": 1e-05, "loss": 0.9263, "step": 38880 }, { "epoch": 34.44198405668733, "grad_norm": 0.19981111586093903, "learning_rate": 1e-05, "loss": 0.9913, "step": 38885 }, { "epoch": 34.44641275465013, "grad_norm": 0.2141062617301941, "learning_rate": 1e-05, "loss": 1.0004, "step": 38890 }, { "epoch": 34.450841452612934, "grad_norm": 0.21368104219436646, "learning_rate": 1e-05, "loss": 0.9554, "step": 38895 }, { "epoch": 34.45527015057573, "grad_norm": 0.27000999450683594, "learning_rate": 1e-05, "loss": 0.9564, "step": 38900 }, { "epoch": 34.45969884853853, "grad_norm": 0.2587941288948059, "learning_rate": 1e-05, "loss": 0.9977, "step": 38905 }, { "epoch": 34.46412754650133, "grad_norm": 0.2249888777732849, "learning_rate": 1e-05, "loss": 0.9905, "step": 38910 }, { "epoch": 34.468556244464125, "grad_norm": 0.26439368724823, "learning_rate": 1e-05, "loss": 0.9928, "step": 38915 }, { "epoch": 34.472984942426926, "grad_norm": 0.23644816875457764, "learning_rate": 1e-05, "loss": 0.9721, "step": 38920 }, { "epoch": 34.47741364038973, "grad_norm": 0.22862541675567627, "learning_rate": 1e-05, "loss": 0.921, "step": 38925 }, { "epoch": 34.48184233835252, "grad_norm": 0.22303332388401031, "learning_rate": 1e-05, "loss": 0.9209, "step": 38930 }, { "epoch": 34.48627103631532, "grad_norm": 0.23572546243667603, "learning_rate": 1e-05, "loss": 0.9811, "step": 38935 }, { "epoch": 34.490699734278124, "grad_norm": 0.19901789724826813, "learning_rate": 1e-05, "loss": 0.9686, "step": 38940 }, { "epoch": 34.49512843224092, "grad_norm": 0.25095057487487793, "learning_rate": 1e-05, "loss": 0.9877, "step": 38945 }, { "epoch": 34.49955713020372, "grad_norm": 0.2433033287525177, "learning_rate": 1e-05, "loss": 1.019, "step": 38950 }, { "epoch": 34.50398582816652, "grad_norm": 0.2725948691368103, "learning_rate": 1e-05, "loss": 1.0249, "step": 38955 }, { "epoch": 34.508414526129314, "grad_norm": 0.275831013917923, "learning_rate": 1e-05, "loss": 0.9822, "step": 38960 }, { "epoch": 34.512843224092116, "grad_norm": 0.26582422852516174, "learning_rate": 1e-05, "loss": 0.9793, "step": 38965 }, { "epoch": 34.51727192205492, "grad_norm": 0.22523094713687897, "learning_rate": 1e-05, "loss": 0.9756, "step": 38970 }, { "epoch": 34.52170062001772, "grad_norm": 0.2237347811460495, "learning_rate": 1e-05, "loss": 1.0344, "step": 38975 }, { "epoch": 34.52612931798051, "grad_norm": 0.23026876151561737, "learning_rate": 1e-05, "loss": 0.9825, "step": 38980 }, { "epoch": 34.53055801594331, "grad_norm": 0.2516075670719147, "learning_rate": 1e-05, "loss": 0.9661, "step": 38985 }, { "epoch": 34.534986713906115, "grad_norm": 0.2620795667171478, "learning_rate": 1e-05, "loss": 0.9421, "step": 38990 }, { "epoch": 34.53941541186891, "grad_norm": 0.2485264390707016, "learning_rate": 1e-05, "loss": 0.9364, "step": 38995 }, { "epoch": 34.54384410983171, "grad_norm": 0.26360583305358887, "learning_rate": 1e-05, "loss": 1.0098, "step": 39000 }, { "epoch": 34.54827280779451, "grad_norm": 0.2605079412460327, "learning_rate": 1e-05, "loss": 0.9838, "step": 39005 }, { "epoch": 34.552701505757305, "grad_norm": 0.20501109957695007, "learning_rate": 1e-05, "loss": 1.0172, "step": 39010 }, { "epoch": 34.55713020372011, "grad_norm": 0.23418430984020233, "learning_rate": 1e-05, "loss": 0.97, "step": 39015 }, { "epoch": 34.56155890168291, "grad_norm": 0.25498753786087036, "learning_rate": 1e-05, "loss": 0.9381, "step": 39020 }, { "epoch": 34.5659875996457, "grad_norm": 0.2587830722332001, "learning_rate": 1e-05, "loss": 0.9885, "step": 39025 }, { "epoch": 34.5704162976085, "grad_norm": 0.2637541890144348, "learning_rate": 1e-05, "loss": 0.9834, "step": 39030 }, { "epoch": 34.574844995571304, "grad_norm": 0.25061196088790894, "learning_rate": 1e-05, "loss": 0.9959, "step": 39035 }, { "epoch": 34.5792736935341, "grad_norm": 0.26215267181396484, "learning_rate": 1e-05, "loss": 1.0071, "step": 39040 }, { "epoch": 34.5837023914969, "grad_norm": 0.223074808716774, "learning_rate": 1e-05, "loss": 0.9485, "step": 39045 }, { "epoch": 34.5881310894597, "grad_norm": 0.2833113372325897, "learning_rate": 1e-05, "loss": 0.9652, "step": 39050 }, { "epoch": 34.592559787422495, "grad_norm": 0.2382892668247223, "learning_rate": 1e-05, "loss": 0.9622, "step": 39055 }, { "epoch": 34.596988485385296, "grad_norm": 0.29259565472602844, "learning_rate": 1e-05, "loss": 1.0029, "step": 39060 }, { "epoch": 34.6014171833481, "grad_norm": 0.3025352954864502, "learning_rate": 1e-05, "loss": 0.9788, "step": 39065 }, { "epoch": 34.60584588131089, "grad_norm": 0.263430655002594, "learning_rate": 1e-05, "loss": 0.9994, "step": 39070 }, { "epoch": 34.61027457927369, "grad_norm": 0.2834121584892273, "learning_rate": 1e-05, "loss": 1.0209, "step": 39075 }, { "epoch": 34.614703277236494, "grad_norm": 0.21686379611492157, "learning_rate": 1e-05, "loss": 1.0044, "step": 39080 }, { "epoch": 34.619131975199295, "grad_norm": 0.2334703654050827, "learning_rate": 1e-05, "loss": 0.9876, "step": 39085 }, { "epoch": 34.62356067316209, "grad_norm": 0.23110033571720123, "learning_rate": 1e-05, "loss": 1.0545, "step": 39090 }, { "epoch": 34.62798937112489, "grad_norm": 0.2550921142101288, "learning_rate": 1e-05, "loss": 0.9702, "step": 39095 }, { "epoch": 34.63241806908769, "grad_norm": 0.20780588686466217, "learning_rate": 1e-05, "loss": 0.9909, "step": 39100 }, { "epoch": 34.636846767050486, "grad_norm": 0.2538759410381317, "learning_rate": 1e-05, "loss": 1.0107, "step": 39105 }, { "epoch": 34.64127546501329, "grad_norm": 0.2352839708328247, "learning_rate": 1e-05, "loss": 1.001, "step": 39110 }, { "epoch": 34.64570416297609, "grad_norm": 0.2324318140745163, "learning_rate": 1e-05, "loss": 1.0232, "step": 39115 }, { "epoch": 34.65013286093888, "grad_norm": 0.2471347451210022, "learning_rate": 1e-05, "loss": 0.9299, "step": 39120 }, { "epoch": 34.65456155890168, "grad_norm": 0.22780823707580566, "learning_rate": 1e-05, "loss": 0.9819, "step": 39125 }, { "epoch": 34.658990256864485, "grad_norm": 0.2593483626842499, "learning_rate": 1e-05, "loss": 0.997, "step": 39130 }, { "epoch": 34.66341895482728, "grad_norm": 0.24731208384037018, "learning_rate": 1e-05, "loss": 1.0208, "step": 39135 }, { "epoch": 34.66784765279008, "grad_norm": 0.2653765380382538, "learning_rate": 1e-05, "loss": 0.9727, "step": 39140 }, { "epoch": 34.67227635075288, "grad_norm": 0.22021587193012238, "learning_rate": 1e-05, "loss": 0.972, "step": 39145 }, { "epoch": 34.676705048715675, "grad_norm": 0.22607065737247467, "learning_rate": 1e-05, "loss": 0.9982, "step": 39150 }, { "epoch": 34.681133746678476, "grad_norm": 0.28130224347114563, "learning_rate": 1e-05, "loss": 1.024, "step": 39155 }, { "epoch": 34.68556244464128, "grad_norm": 0.22432592511177063, "learning_rate": 1e-05, "loss": 0.9745, "step": 39160 }, { "epoch": 34.68999114260407, "grad_norm": 0.268875390291214, "learning_rate": 1e-05, "loss": 1.0473, "step": 39165 }, { "epoch": 34.69441984056687, "grad_norm": 0.24163834750652313, "learning_rate": 1e-05, "loss": 0.9306, "step": 39170 }, { "epoch": 34.698848538529674, "grad_norm": 0.22692064940929413, "learning_rate": 1e-05, "loss": 0.9517, "step": 39175 }, { "epoch": 34.70327723649247, "grad_norm": 0.25467365980148315, "learning_rate": 1e-05, "loss": 1.0191, "step": 39180 }, { "epoch": 34.70770593445527, "grad_norm": 0.25165635347366333, "learning_rate": 1e-05, "loss": 0.9869, "step": 39185 }, { "epoch": 34.71213463241807, "grad_norm": 0.2267739325761795, "learning_rate": 1e-05, "loss": 0.9296, "step": 39190 }, { "epoch": 34.716563330380865, "grad_norm": 0.2208961546421051, "learning_rate": 1e-05, "loss": 0.993, "step": 39195 }, { "epoch": 34.720992028343666, "grad_norm": 0.21921059489250183, "learning_rate": 1e-05, "loss": 0.9856, "step": 39200 }, { "epoch": 34.72542072630647, "grad_norm": 0.25159573554992676, "learning_rate": 1e-05, "loss": 0.9612, "step": 39205 }, { "epoch": 34.72984942426926, "grad_norm": 0.2510949373245239, "learning_rate": 1e-05, "loss": 0.9441, "step": 39210 }, { "epoch": 34.73427812223206, "grad_norm": 0.2238406538963318, "learning_rate": 1e-05, "loss": 0.985, "step": 39215 }, { "epoch": 34.738706820194864, "grad_norm": 0.23065055906772614, "learning_rate": 1e-05, "loss": 1.0101, "step": 39220 }, { "epoch": 34.743135518157665, "grad_norm": 0.2058543562889099, "learning_rate": 1e-05, "loss": 0.9992, "step": 39225 }, { "epoch": 34.74756421612046, "grad_norm": 0.21793067455291748, "learning_rate": 1e-05, "loss": 0.997, "step": 39230 }, { "epoch": 34.75199291408326, "grad_norm": 0.2603197693824768, "learning_rate": 1e-05, "loss": 0.9779, "step": 39235 }, { "epoch": 34.75642161204606, "grad_norm": 0.23578760027885437, "learning_rate": 1e-05, "loss": 0.915, "step": 39240 }, { "epoch": 34.760850310008856, "grad_norm": 0.218476340174675, "learning_rate": 1e-05, "loss": 0.9745, "step": 39245 }, { "epoch": 34.76527900797166, "grad_norm": 0.26012128591537476, "learning_rate": 1e-05, "loss": 0.9907, "step": 39250 }, { "epoch": 34.76970770593446, "grad_norm": 0.21792545914649963, "learning_rate": 1e-05, "loss": 1.0365, "step": 39255 }, { "epoch": 34.77413640389725, "grad_norm": 0.2458389401435852, "learning_rate": 1e-05, "loss": 0.9856, "step": 39260 }, { "epoch": 34.77856510186005, "grad_norm": 0.19058483839035034, "learning_rate": 1e-05, "loss": 1.0008, "step": 39265 }, { "epoch": 34.782993799822854, "grad_norm": 0.2451803833246231, "learning_rate": 1e-05, "loss": 0.984, "step": 39270 }, { "epoch": 34.78742249778565, "grad_norm": 0.3127826750278473, "learning_rate": 1e-05, "loss": 0.9932, "step": 39275 }, { "epoch": 34.79185119574845, "grad_norm": 0.2494623363018036, "learning_rate": 1e-05, "loss": 0.9792, "step": 39280 }, { "epoch": 34.79627989371125, "grad_norm": 0.2247854471206665, "learning_rate": 1e-05, "loss": 0.9946, "step": 39285 }, { "epoch": 34.800708591674045, "grad_norm": 0.23555737733840942, "learning_rate": 1e-05, "loss": 1.01, "step": 39290 }, { "epoch": 34.805137289636846, "grad_norm": 0.20528742671012878, "learning_rate": 1e-05, "loss": 0.9369, "step": 39295 }, { "epoch": 34.80956598759965, "grad_norm": 0.243865966796875, "learning_rate": 1e-05, "loss": 0.976, "step": 39300 }, { "epoch": 34.81399468556244, "grad_norm": 0.2291424721479416, "learning_rate": 1e-05, "loss": 1.0047, "step": 39305 }, { "epoch": 34.81842338352524, "grad_norm": 0.3096823990345001, "learning_rate": 1e-05, "loss": 0.9977, "step": 39310 }, { "epoch": 34.822852081488044, "grad_norm": 0.3650055527687073, "learning_rate": 1e-05, "loss": 1.0347, "step": 39315 }, { "epoch": 34.82728077945084, "grad_norm": 0.22102035582065582, "learning_rate": 1e-05, "loss": 0.9473, "step": 39320 }, { "epoch": 34.83170947741364, "grad_norm": 0.20257841050624847, "learning_rate": 1e-05, "loss": 0.9859, "step": 39325 }, { "epoch": 34.83613817537644, "grad_norm": 0.2753404974937439, "learning_rate": 1e-05, "loss": 0.9374, "step": 39330 }, { "epoch": 34.84056687333924, "grad_norm": 0.226443350315094, "learning_rate": 1e-05, "loss": 0.9716, "step": 39335 }, { "epoch": 34.844995571302036, "grad_norm": 0.23265786468982697, "learning_rate": 1e-05, "loss": 0.9785, "step": 39340 }, { "epoch": 34.84942426926484, "grad_norm": 0.2845379114151001, "learning_rate": 1e-05, "loss": 0.9521, "step": 39345 }, { "epoch": 34.85385296722764, "grad_norm": 0.21679973602294922, "learning_rate": 1e-05, "loss": 0.9602, "step": 39350 }, { "epoch": 34.85828166519043, "grad_norm": 0.21578486263751984, "learning_rate": 1e-05, "loss": 0.9172, "step": 39355 }, { "epoch": 34.862710363153234, "grad_norm": 0.2823736071586609, "learning_rate": 1e-05, "loss": 0.9604, "step": 39360 }, { "epoch": 34.867139061116035, "grad_norm": 0.23898911476135254, "learning_rate": 1e-05, "loss": 1.0114, "step": 39365 }, { "epoch": 34.87156775907883, "grad_norm": 0.21913652122020721, "learning_rate": 1e-05, "loss": 1.0596, "step": 39370 }, { "epoch": 34.87599645704163, "grad_norm": 0.25310397148132324, "learning_rate": 1e-05, "loss": 0.9624, "step": 39375 }, { "epoch": 34.88042515500443, "grad_norm": 0.2798282504081726, "learning_rate": 1e-05, "loss": 0.9879, "step": 39380 }, { "epoch": 34.884853852967225, "grad_norm": 0.23490358889102936, "learning_rate": 1e-05, "loss": 0.9403, "step": 39385 }, { "epoch": 34.88928255093003, "grad_norm": 0.2537546753883362, "learning_rate": 1e-05, "loss": 0.9872, "step": 39390 }, { "epoch": 34.89371124889283, "grad_norm": 0.2565337121486664, "learning_rate": 1e-05, "loss": 0.9589, "step": 39395 }, { "epoch": 34.89813994685562, "grad_norm": 0.20885735750198364, "learning_rate": 1e-05, "loss": 0.9732, "step": 39400 }, { "epoch": 34.90256864481842, "grad_norm": 0.1987423151731491, "learning_rate": 1e-05, "loss": 0.9782, "step": 39405 }, { "epoch": 34.906997342781224, "grad_norm": 0.22835476696491241, "learning_rate": 1e-05, "loss": 0.9323, "step": 39410 }, { "epoch": 34.91142604074402, "grad_norm": 0.27420395612716675, "learning_rate": 1e-05, "loss": 0.9859, "step": 39415 }, { "epoch": 34.91585473870682, "grad_norm": 0.2783346474170685, "learning_rate": 1e-05, "loss": 0.9482, "step": 39420 }, { "epoch": 34.92028343666962, "grad_norm": 0.2649555802345276, "learning_rate": 1e-05, "loss": 1.0004, "step": 39425 }, { "epoch": 34.924712134632415, "grad_norm": 0.2542673647403717, "learning_rate": 1e-05, "loss": 0.9758, "step": 39430 }, { "epoch": 34.929140832595216, "grad_norm": 0.21622420847415924, "learning_rate": 1e-05, "loss": 0.9745, "step": 39435 }, { "epoch": 34.93356953055802, "grad_norm": 0.2599319517612457, "learning_rate": 1e-05, "loss": 0.9755, "step": 39440 }, { "epoch": 34.93799822852081, "grad_norm": 0.18314099311828613, "learning_rate": 1e-05, "loss": 0.9486, "step": 39445 }, { "epoch": 34.94242692648361, "grad_norm": 0.3421282470226288, "learning_rate": 1e-05, "loss": 1.0118, "step": 39450 }, { "epoch": 34.946855624446414, "grad_norm": 0.23871494829654694, "learning_rate": 1e-05, "loss": 0.9865, "step": 39455 }, { "epoch": 34.951284322409215, "grad_norm": 0.23737779259681702, "learning_rate": 1e-05, "loss": 0.9843, "step": 39460 }, { "epoch": 34.95571302037201, "grad_norm": 0.2778080403804779, "learning_rate": 1e-05, "loss": 0.9155, "step": 39465 }, { "epoch": 34.96014171833481, "grad_norm": 0.23091882467269897, "learning_rate": 1e-05, "loss": 0.9828, "step": 39470 }, { "epoch": 34.96457041629761, "grad_norm": 0.20534102618694305, "learning_rate": 1e-05, "loss": 0.9647, "step": 39475 }, { "epoch": 34.968999114260406, "grad_norm": 0.26630571484565735, "learning_rate": 1e-05, "loss": 0.9995, "step": 39480 }, { "epoch": 34.97342781222321, "grad_norm": 0.26347237825393677, "learning_rate": 1e-05, "loss": 0.9813, "step": 39485 }, { "epoch": 34.97785651018601, "grad_norm": 0.2595798671245575, "learning_rate": 1e-05, "loss": 0.9685, "step": 39490 }, { "epoch": 34.9822852081488, "grad_norm": 0.22914078831672668, "learning_rate": 1e-05, "loss": 0.9713, "step": 39495 }, { "epoch": 34.9867139061116, "grad_norm": 0.20719504356384277, "learning_rate": 1e-05, "loss": 1.0008, "step": 39500 }, { "epoch": 34.991142604074405, "grad_norm": 0.22324219346046448, "learning_rate": 1e-05, "loss": 0.9936, "step": 39505 }, { "epoch": 34.9955713020372, "grad_norm": 0.22987961769104004, "learning_rate": 1e-05, "loss": 0.9975, "step": 39510 }, { "epoch": 35.0, "grad_norm": 0.2068951427936554, "learning_rate": 1e-05, "loss": 1.0492, "step": 39515 }, { "epoch": 35.0044286979628, "grad_norm": 0.23593661189079285, "learning_rate": 1e-05, "loss": 0.9706, "step": 39520 }, { "epoch": 35.008857395925595, "grad_norm": 0.315938264131546, "learning_rate": 1e-05, "loss": 0.9888, "step": 39525 }, { "epoch": 35.0132860938884, "grad_norm": 0.2850911617279053, "learning_rate": 1e-05, "loss": 0.9903, "step": 39530 }, { "epoch": 35.0177147918512, "grad_norm": 0.21571305394172668, "learning_rate": 1e-05, "loss": 0.9728, "step": 39535 }, { "epoch": 35.02214348981399, "grad_norm": 0.23679733276367188, "learning_rate": 1e-05, "loss": 0.9533, "step": 39540 }, { "epoch": 35.02657218777679, "grad_norm": 0.22444473206996918, "learning_rate": 1e-05, "loss": 1.0042, "step": 39545 }, { "epoch": 35.031000885739594, "grad_norm": 0.22798216342926025, "learning_rate": 1e-05, "loss": 0.9656, "step": 39550 }, { "epoch": 35.03542958370239, "grad_norm": 0.23759490251541138, "learning_rate": 1e-05, "loss": 0.9771, "step": 39555 }, { "epoch": 35.03985828166519, "grad_norm": 0.25344133377075195, "learning_rate": 1e-05, "loss": 0.9797, "step": 39560 }, { "epoch": 35.04428697962799, "grad_norm": 0.2339189499616623, "learning_rate": 1e-05, "loss": 0.9845, "step": 39565 }, { "epoch": 35.048715677590785, "grad_norm": 0.24252775311470032, "learning_rate": 1e-05, "loss": 0.9104, "step": 39570 }, { "epoch": 35.053144375553586, "grad_norm": 0.24591848254203796, "learning_rate": 1e-05, "loss": 0.9707, "step": 39575 }, { "epoch": 35.05757307351639, "grad_norm": 0.2737513780593872, "learning_rate": 1e-05, "loss": 0.9496, "step": 39580 }, { "epoch": 35.06200177147919, "grad_norm": 0.24790601432323456, "learning_rate": 1e-05, "loss": 1.0131, "step": 39585 }, { "epoch": 35.06643046944198, "grad_norm": 0.21346385776996613, "learning_rate": 1e-05, "loss": 0.965, "step": 39590 }, { "epoch": 35.070859167404784, "grad_norm": 0.2736019492149353, "learning_rate": 1e-05, "loss": 0.9304, "step": 39595 }, { "epoch": 35.075287865367585, "grad_norm": 0.2315564900636673, "learning_rate": 1e-05, "loss": 0.9429, "step": 39600 }, { "epoch": 35.07971656333038, "grad_norm": 0.23989319801330566, "learning_rate": 1e-05, "loss": 0.9853, "step": 39605 }, { "epoch": 35.08414526129318, "grad_norm": 0.3100568652153015, "learning_rate": 1e-05, "loss": 0.9846, "step": 39610 }, { "epoch": 35.08857395925598, "grad_norm": 0.26376354694366455, "learning_rate": 1e-05, "loss": 0.9554, "step": 39615 }, { "epoch": 35.093002657218776, "grad_norm": 0.2705543637275696, "learning_rate": 1e-05, "loss": 0.9691, "step": 39620 }, { "epoch": 35.09743135518158, "grad_norm": 0.21611632406711578, "learning_rate": 1e-05, "loss": 0.9967, "step": 39625 }, { "epoch": 35.10186005314438, "grad_norm": 0.2686470150947571, "learning_rate": 1e-05, "loss": 0.984, "step": 39630 }, { "epoch": 35.10628875110717, "grad_norm": 0.21684277057647705, "learning_rate": 1e-05, "loss": 0.9657, "step": 39635 }, { "epoch": 35.11071744906997, "grad_norm": 0.23775018751621246, "learning_rate": 1e-05, "loss": 1.0134, "step": 39640 }, { "epoch": 35.115146147032775, "grad_norm": 0.251823753118515, "learning_rate": 1e-05, "loss": 0.9738, "step": 39645 }, { "epoch": 35.11957484499557, "grad_norm": 0.27192607522010803, "learning_rate": 1e-05, "loss": 0.9474, "step": 39650 }, { "epoch": 35.12400354295837, "grad_norm": 0.21933510899543762, "learning_rate": 1e-05, "loss": 0.9557, "step": 39655 }, { "epoch": 35.12843224092117, "grad_norm": 0.2719583511352539, "learning_rate": 1e-05, "loss": 0.9944, "step": 39660 }, { "epoch": 35.132860938883965, "grad_norm": 0.26673954725265503, "learning_rate": 1e-05, "loss": 0.8995, "step": 39665 }, { "epoch": 35.137289636846766, "grad_norm": 0.26488256454467773, "learning_rate": 1e-05, "loss": 0.9875, "step": 39670 }, { "epoch": 35.14171833480957, "grad_norm": 0.2852939963340759, "learning_rate": 1e-05, "loss": 0.9168, "step": 39675 }, { "epoch": 35.14614703277236, "grad_norm": 0.2433881014585495, "learning_rate": 1e-05, "loss": 1.0, "step": 39680 }, { "epoch": 35.15057573073516, "grad_norm": 0.2733299136161804, "learning_rate": 1e-05, "loss": 1.0203, "step": 39685 }, { "epoch": 35.155004428697964, "grad_norm": 0.2582400143146515, "learning_rate": 1e-05, "loss": 0.9912, "step": 39690 }, { "epoch": 35.15943312666076, "grad_norm": 0.2586967349052429, "learning_rate": 1e-05, "loss": 1.0354, "step": 39695 }, { "epoch": 35.16386182462356, "grad_norm": 0.24145683646202087, "learning_rate": 1e-05, "loss": 0.9635, "step": 39700 }, { "epoch": 35.16829052258636, "grad_norm": 0.22906555235385895, "learning_rate": 1e-05, "loss": 0.9644, "step": 39705 }, { "epoch": 35.17271922054916, "grad_norm": 0.22689169645309448, "learning_rate": 1e-05, "loss": 0.9319, "step": 39710 }, { "epoch": 35.177147918511956, "grad_norm": 0.2927563488483429, "learning_rate": 1e-05, "loss": 1.004, "step": 39715 }, { "epoch": 35.18157661647476, "grad_norm": 0.24924597144126892, "learning_rate": 1e-05, "loss": 0.9984, "step": 39720 }, { "epoch": 35.18600531443756, "grad_norm": 0.2501330077648163, "learning_rate": 1e-05, "loss": 0.9813, "step": 39725 }, { "epoch": 35.19043401240035, "grad_norm": 0.3229110836982727, "learning_rate": 1e-05, "loss": 0.9725, "step": 39730 }, { "epoch": 35.194862710363154, "grad_norm": 0.25542911887168884, "learning_rate": 1e-05, "loss": 0.9466, "step": 39735 }, { "epoch": 35.199291408325955, "grad_norm": 0.22127750515937805, "learning_rate": 1e-05, "loss": 0.9979, "step": 39740 }, { "epoch": 35.20372010628875, "grad_norm": 0.22302144765853882, "learning_rate": 1e-05, "loss": 0.9592, "step": 39745 }, { "epoch": 35.20814880425155, "grad_norm": 0.24056854844093323, "learning_rate": 1e-05, "loss": 0.9356, "step": 39750 }, { "epoch": 35.21257750221435, "grad_norm": 0.23887591063976288, "learning_rate": 1e-05, "loss": 0.9837, "step": 39755 }, { "epoch": 35.217006200177146, "grad_norm": 0.29898229241371155, "learning_rate": 1e-05, "loss": 0.9494, "step": 39760 }, { "epoch": 35.22143489813995, "grad_norm": 0.24393925070762634, "learning_rate": 1e-05, "loss": 0.9712, "step": 39765 }, { "epoch": 35.22586359610275, "grad_norm": 0.2737123668193817, "learning_rate": 1e-05, "loss": 0.9889, "step": 39770 }, { "epoch": 35.23029229406554, "grad_norm": 0.22036582231521606, "learning_rate": 1e-05, "loss": 0.9765, "step": 39775 }, { "epoch": 35.23472099202834, "grad_norm": 0.28304049372673035, "learning_rate": 1e-05, "loss": 0.9696, "step": 39780 }, { "epoch": 35.239149689991144, "grad_norm": 0.25792399048805237, "learning_rate": 1e-05, "loss": 1.0082, "step": 39785 }, { "epoch": 35.24357838795394, "grad_norm": 0.2297327071428299, "learning_rate": 1e-05, "loss": 0.9765, "step": 39790 }, { "epoch": 35.24800708591674, "grad_norm": 0.24497422575950623, "learning_rate": 1e-05, "loss": 0.9764, "step": 39795 }, { "epoch": 35.25243578387954, "grad_norm": 0.24111978709697723, "learning_rate": 1e-05, "loss": 1.0242, "step": 39800 }, { "epoch": 35.256864481842335, "grad_norm": 0.21529093384742737, "learning_rate": 1e-05, "loss": 0.9989, "step": 39805 }, { "epoch": 35.261293179805136, "grad_norm": 0.19911101460456848, "learning_rate": 1e-05, "loss": 0.9693, "step": 39810 }, { "epoch": 35.26572187776794, "grad_norm": 0.22599324584007263, "learning_rate": 1e-05, "loss": 0.9252, "step": 39815 }, { "epoch": 35.27015057573073, "grad_norm": 0.3066536784172058, "learning_rate": 1e-05, "loss": 0.9683, "step": 39820 }, { "epoch": 35.27457927369353, "grad_norm": 0.22253525257110596, "learning_rate": 1e-05, "loss": 0.9848, "step": 39825 }, { "epoch": 35.279007971656334, "grad_norm": 0.24628353118896484, "learning_rate": 1e-05, "loss": 1.0054, "step": 39830 }, { "epoch": 35.283436669619135, "grad_norm": 0.28410613536834717, "learning_rate": 1e-05, "loss": 1.0271, "step": 39835 }, { "epoch": 35.28786536758193, "grad_norm": 0.2228560894727707, "learning_rate": 1e-05, "loss": 1.0042, "step": 39840 }, { "epoch": 35.29229406554473, "grad_norm": 0.2113189399242401, "learning_rate": 1e-05, "loss": 1.0036, "step": 39845 }, { "epoch": 35.29672276350753, "grad_norm": 0.2591398060321808, "learning_rate": 1e-05, "loss": 0.9479, "step": 39850 }, { "epoch": 35.301151461470326, "grad_norm": 0.2836805582046509, "learning_rate": 1e-05, "loss": 1.0385, "step": 39855 }, { "epoch": 35.30558015943313, "grad_norm": 0.23124606907367706, "learning_rate": 1e-05, "loss": 0.9745, "step": 39860 }, { "epoch": 35.31000885739593, "grad_norm": 0.25152158737182617, "learning_rate": 1e-05, "loss": 0.9902, "step": 39865 }, { "epoch": 35.31443755535872, "grad_norm": 0.273102730512619, "learning_rate": 1e-05, "loss": 0.9787, "step": 39870 }, { "epoch": 35.318866253321524, "grad_norm": 0.2898319959640503, "learning_rate": 1e-05, "loss": 0.938, "step": 39875 }, { "epoch": 35.323294951284325, "grad_norm": 0.23062893748283386, "learning_rate": 1e-05, "loss": 1.0128, "step": 39880 }, { "epoch": 35.32772364924712, "grad_norm": 0.2247716337442398, "learning_rate": 1e-05, "loss": 1.0273, "step": 39885 }, { "epoch": 35.33215234720992, "grad_norm": 0.2621605694293976, "learning_rate": 1e-05, "loss": 0.9773, "step": 39890 }, { "epoch": 35.33658104517272, "grad_norm": 0.24151019752025604, "learning_rate": 1e-05, "loss": 0.9945, "step": 39895 }, { "epoch": 35.341009743135515, "grad_norm": 0.21506355702877045, "learning_rate": 1e-05, "loss": 1.0164, "step": 39900 }, { "epoch": 35.34543844109832, "grad_norm": 0.23143921792507172, "learning_rate": 1e-05, "loss": 0.9712, "step": 39905 }, { "epoch": 35.34986713906112, "grad_norm": 0.22458681464195251, "learning_rate": 1e-05, "loss": 1.0175, "step": 39910 }, { "epoch": 35.35429583702391, "grad_norm": 0.25023940205574036, "learning_rate": 1e-05, "loss": 0.97, "step": 39915 }, { "epoch": 35.35872453498671, "grad_norm": 0.24931608140468597, "learning_rate": 1e-05, "loss": 0.9904, "step": 39920 }, { "epoch": 35.363153232949514, "grad_norm": 0.24820096790790558, "learning_rate": 1e-05, "loss": 1.0071, "step": 39925 }, { "epoch": 35.36758193091231, "grad_norm": 0.2608039081096649, "learning_rate": 1e-05, "loss": 0.9936, "step": 39930 }, { "epoch": 35.37201062887511, "grad_norm": 0.26962557435035706, "learning_rate": 1e-05, "loss": 0.9636, "step": 39935 }, { "epoch": 35.37643932683791, "grad_norm": 0.25606101751327515, "learning_rate": 1e-05, "loss": 0.9888, "step": 39940 }, { "epoch": 35.380868024800705, "grad_norm": 0.2733430862426758, "learning_rate": 1e-05, "loss": 0.9969, "step": 39945 }, { "epoch": 35.385296722763506, "grad_norm": 0.2497810423374176, "learning_rate": 1e-05, "loss": 0.9209, "step": 39950 }, { "epoch": 35.38972542072631, "grad_norm": 0.22486664354801178, "learning_rate": 1e-05, "loss": 0.9801, "step": 39955 }, { "epoch": 35.39415411868911, "grad_norm": 0.24633093178272247, "learning_rate": 1e-05, "loss": 0.9817, "step": 39960 }, { "epoch": 35.3985828166519, "grad_norm": 0.232732892036438, "learning_rate": 1e-05, "loss": 0.969, "step": 39965 }, { "epoch": 35.403011514614704, "grad_norm": 0.2210260033607483, "learning_rate": 1e-05, "loss": 0.9711, "step": 39970 }, { "epoch": 35.407440212577505, "grad_norm": 0.23340089619159698, "learning_rate": 1e-05, "loss": 1.0022, "step": 39975 }, { "epoch": 35.4118689105403, "grad_norm": 0.23357854783535004, "learning_rate": 1e-05, "loss": 1.0109, "step": 39980 }, { "epoch": 35.4162976085031, "grad_norm": 0.2467680126428604, "learning_rate": 1e-05, "loss": 0.9663, "step": 39985 }, { "epoch": 35.4207263064659, "grad_norm": 0.24557289481163025, "learning_rate": 1e-05, "loss": 0.9766, "step": 39990 }, { "epoch": 35.425155004428696, "grad_norm": 0.22820210456848145, "learning_rate": 1e-05, "loss": 0.9699, "step": 39995 }, { "epoch": 35.4295837023915, "grad_norm": 0.27644363045692444, "learning_rate": 1e-05, "loss": 1.0076, "step": 40000 }, { "epoch": 35.4340124003543, "grad_norm": 0.3036370277404785, "learning_rate": 1e-05, "loss": 0.9676, "step": 40005 }, { "epoch": 35.43844109831709, "grad_norm": 0.2707492709159851, "learning_rate": 1e-05, "loss": 0.9704, "step": 40010 }, { "epoch": 35.44286979627989, "grad_norm": 0.22467005252838135, "learning_rate": 1e-05, "loss": 0.969, "step": 40015 }, { "epoch": 35.447298494242695, "grad_norm": 0.2091584950685501, "learning_rate": 1e-05, "loss": 0.9561, "step": 40020 }, { "epoch": 35.45172719220549, "grad_norm": 0.2864615321159363, "learning_rate": 1e-05, "loss": 0.9804, "step": 40025 }, { "epoch": 35.45615589016829, "grad_norm": 0.2828342616558075, "learning_rate": 1e-05, "loss": 0.9673, "step": 40030 }, { "epoch": 35.46058458813109, "grad_norm": 0.23655110597610474, "learning_rate": 1e-05, "loss": 0.9521, "step": 40035 }, { "epoch": 35.465013286093885, "grad_norm": 0.2519761621952057, "learning_rate": 1e-05, "loss": 0.9339, "step": 40040 }, { "epoch": 35.46944198405669, "grad_norm": 0.2154015600681305, "learning_rate": 1e-05, "loss": 0.9686, "step": 40045 }, { "epoch": 35.47387068201949, "grad_norm": 0.2232111096382141, "learning_rate": 1e-05, "loss": 0.9423, "step": 40050 }, { "epoch": 35.47829937998228, "grad_norm": 0.22750414907932281, "learning_rate": 1e-05, "loss": 0.9996, "step": 40055 }, { "epoch": 35.48272807794508, "grad_norm": 0.22685526311397552, "learning_rate": 1e-05, "loss": 0.9703, "step": 40060 }, { "epoch": 35.487156775907884, "grad_norm": 0.218906432390213, "learning_rate": 1e-05, "loss": 1.0079, "step": 40065 }, { "epoch": 35.491585473870686, "grad_norm": 0.271619588136673, "learning_rate": 1e-05, "loss": 0.9826, "step": 40070 }, { "epoch": 35.49601417183348, "grad_norm": 0.23013056814670563, "learning_rate": 1e-05, "loss": 0.9447, "step": 40075 }, { "epoch": 35.50044286979628, "grad_norm": 0.25824683904647827, "learning_rate": 1e-05, "loss": 0.9406, "step": 40080 }, { "epoch": 35.50487156775908, "grad_norm": 0.26319095492362976, "learning_rate": 1e-05, "loss": 0.9817, "step": 40085 }, { "epoch": 35.509300265721876, "grad_norm": 0.279031902551651, "learning_rate": 1e-05, "loss": 1.0212, "step": 40090 }, { "epoch": 35.51372896368468, "grad_norm": 0.2286226451396942, "learning_rate": 1e-05, "loss": 0.9478, "step": 40095 }, { "epoch": 35.51815766164748, "grad_norm": 0.28177499771118164, "learning_rate": 1e-05, "loss": 0.9304, "step": 40100 }, { "epoch": 35.52258635961027, "grad_norm": 0.2695818543434143, "learning_rate": 1e-05, "loss": 0.9916, "step": 40105 }, { "epoch": 35.527015057573074, "grad_norm": 0.22232311964035034, "learning_rate": 1e-05, "loss": 0.9788, "step": 40110 }, { "epoch": 35.531443755535875, "grad_norm": 0.2320026308298111, "learning_rate": 1e-05, "loss": 0.9886, "step": 40115 }, { "epoch": 35.53587245349867, "grad_norm": 0.21246756613254547, "learning_rate": 1e-05, "loss": 0.9911, "step": 40120 }, { "epoch": 35.54030115146147, "grad_norm": 0.2931070625782013, "learning_rate": 1e-05, "loss": 1.02, "step": 40125 }, { "epoch": 35.54472984942427, "grad_norm": 0.21984563767910004, "learning_rate": 1e-05, "loss": 0.9792, "step": 40130 }, { "epoch": 35.549158547387066, "grad_norm": 0.2637583911418915, "learning_rate": 1e-05, "loss": 0.9928, "step": 40135 }, { "epoch": 35.55358724534987, "grad_norm": 0.2627270519733429, "learning_rate": 1e-05, "loss": 0.979, "step": 40140 }, { "epoch": 35.55801594331267, "grad_norm": 0.23151454329490662, "learning_rate": 1e-05, "loss": 0.9994, "step": 40145 }, { "epoch": 35.56244464127546, "grad_norm": 0.2779940962791443, "learning_rate": 1e-05, "loss": 0.9549, "step": 40150 }, { "epoch": 35.56687333923826, "grad_norm": 0.22090676426887512, "learning_rate": 1e-05, "loss": 1.0245, "step": 40155 }, { "epoch": 35.571302037201065, "grad_norm": 0.2810547649860382, "learning_rate": 1e-05, "loss": 0.9715, "step": 40160 }, { "epoch": 35.57573073516386, "grad_norm": 0.23355476558208466, "learning_rate": 1e-05, "loss": 0.9965, "step": 40165 }, { "epoch": 35.58015943312666, "grad_norm": 0.23681817948818207, "learning_rate": 1e-05, "loss": 0.9674, "step": 40170 }, { "epoch": 35.58458813108946, "grad_norm": 0.2516898810863495, "learning_rate": 1e-05, "loss": 0.9709, "step": 40175 }, { "epoch": 35.589016829052255, "grad_norm": 0.2581563889980316, "learning_rate": 1e-05, "loss": 1.0385, "step": 40180 }, { "epoch": 35.59344552701506, "grad_norm": 0.2318749874830246, "learning_rate": 1e-05, "loss": 0.9556, "step": 40185 }, { "epoch": 35.59787422497786, "grad_norm": 0.3203091323375702, "learning_rate": 1e-05, "loss": 1.0021, "step": 40190 }, { "epoch": 35.60230292294066, "grad_norm": 0.2570744752883911, "learning_rate": 1e-05, "loss": 0.9435, "step": 40195 }, { "epoch": 35.60673162090345, "grad_norm": 0.28338080644607544, "learning_rate": 1e-05, "loss": 1.0022, "step": 40200 }, { "epoch": 35.611160318866254, "grad_norm": 0.23535174131393433, "learning_rate": 1e-05, "loss": 0.9886, "step": 40205 }, { "epoch": 35.615589016829055, "grad_norm": 0.2677377760410309, "learning_rate": 1e-05, "loss": 0.9905, "step": 40210 }, { "epoch": 35.62001771479185, "grad_norm": 0.26248013973236084, "learning_rate": 1e-05, "loss": 0.9542, "step": 40215 }, { "epoch": 35.62444641275465, "grad_norm": 0.28670191764831543, "learning_rate": 1e-05, "loss": 1.0087, "step": 40220 }, { "epoch": 35.62887511071745, "grad_norm": 0.2198255956172943, "learning_rate": 1e-05, "loss": 0.9551, "step": 40225 }, { "epoch": 35.633303808680246, "grad_norm": 0.24472220242023468, "learning_rate": 1e-05, "loss": 1.0006, "step": 40230 }, { "epoch": 35.63773250664305, "grad_norm": 0.19831225275993347, "learning_rate": 1e-05, "loss": 0.9893, "step": 40235 }, { "epoch": 35.64216120460585, "grad_norm": 0.2829912304878235, "learning_rate": 1e-05, "loss": 0.9663, "step": 40240 }, { "epoch": 35.64658990256864, "grad_norm": 0.24533922970294952, "learning_rate": 1e-05, "loss": 1.0, "step": 40245 }, { "epoch": 35.651018600531444, "grad_norm": 0.2577371895313263, "learning_rate": 1e-05, "loss": 0.9846, "step": 40250 }, { "epoch": 35.655447298494245, "grad_norm": 0.2771191895008087, "learning_rate": 1e-05, "loss": 0.9649, "step": 40255 }, { "epoch": 35.65987599645704, "grad_norm": 0.2723691761493683, "learning_rate": 1e-05, "loss": 1.0382, "step": 40260 }, { "epoch": 35.66430469441984, "grad_norm": 0.23274526000022888, "learning_rate": 1e-05, "loss": 0.937, "step": 40265 }, { "epoch": 35.66873339238264, "grad_norm": 0.2028035968542099, "learning_rate": 1e-05, "loss": 0.9719, "step": 40270 }, { "epoch": 35.673162090345436, "grad_norm": 0.24089133739471436, "learning_rate": 1e-05, "loss": 0.9342, "step": 40275 }, { "epoch": 35.67759078830824, "grad_norm": 0.2692558765411377, "learning_rate": 1e-05, "loss": 0.9764, "step": 40280 }, { "epoch": 35.68201948627104, "grad_norm": 0.23566405475139618, "learning_rate": 1e-05, "loss": 0.9635, "step": 40285 }, { "epoch": 35.68644818423383, "grad_norm": 0.22322937846183777, "learning_rate": 1e-05, "loss": 1.0599, "step": 40290 }, { "epoch": 35.69087688219663, "grad_norm": 0.2418806105852127, "learning_rate": 1e-05, "loss": 1.0301, "step": 40295 }, { "epoch": 35.695305580159435, "grad_norm": 0.2599877417087555, "learning_rate": 1e-05, "loss": 0.9459, "step": 40300 }, { "epoch": 35.69973427812223, "grad_norm": 0.27923688292503357, "learning_rate": 1e-05, "loss": 0.9824, "step": 40305 }, { "epoch": 35.70416297608503, "grad_norm": 0.28744441270828247, "learning_rate": 1e-05, "loss": 0.9623, "step": 40310 }, { "epoch": 35.70859167404783, "grad_norm": 0.25891977548599243, "learning_rate": 1e-05, "loss": 0.9304, "step": 40315 }, { "epoch": 35.71302037201063, "grad_norm": 0.30485567450523376, "learning_rate": 1e-05, "loss": 0.958, "step": 40320 }, { "epoch": 35.717449069973426, "grad_norm": 0.22354458272457123, "learning_rate": 1e-05, "loss": 0.989, "step": 40325 }, { "epoch": 35.72187776793623, "grad_norm": 0.26116660237312317, "learning_rate": 1e-05, "loss": 0.9815, "step": 40330 }, { "epoch": 35.72630646589903, "grad_norm": 0.24663841724395752, "learning_rate": 1e-05, "loss": 0.9629, "step": 40335 }, { "epoch": 35.73073516386182, "grad_norm": 0.27957314252853394, "learning_rate": 1e-05, "loss": 1.0001, "step": 40340 }, { "epoch": 35.735163861824624, "grad_norm": 0.27709126472473145, "learning_rate": 1e-05, "loss": 1.0266, "step": 40345 }, { "epoch": 35.739592559787425, "grad_norm": 0.2336152046918869, "learning_rate": 1e-05, "loss": 0.9768, "step": 40350 }, { "epoch": 35.74402125775022, "grad_norm": 0.24564549326896667, "learning_rate": 1e-05, "loss": 0.9686, "step": 40355 }, { "epoch": 35.74844995571302, "grad_norm": 0.2178659737110138, "learning_rate": 1e-05, "loss": 1.0251, "step": 40360 }, { "epoch": 35.75287865367582, "grad_norm": 0.2633017897605896, "learning_rate": 1e-05, "loss": 0.9366, "step": 40365 }, { "epoch": 35.757307351638616, "grad_norm": 0.23075000941753387, "learning_rate": 1e-05, "loss": 0.9924, "step": 40370 }, { "epoch": 35.76173604960142, "grad_norm": 0.24644388258457184, "learning_rate": 1e-05, "loss": 0.9283, "step": 40375 }, { "epoch": 35.76616474756422, "grad_norm": 0.1923307478427887, "learning_rate": 1e-05, "loss": 0.974, "step": 40380 }, { "epoch": 35.77059344552701, "grad_norm": 0.2611455023288727, "learning_rate": 1e-05, "loss": 0.9661, "step": 40385 }, { "epoch": 35.775022143489814, "grad_norm": 0.25513362884521484, "learning_rate": 1e-05, "loss": 0.9761, "step": 40390 }, { "epoch": 35.779450841452615, "grad_norm": 0.2518458366394043, "learning_rate": 1e-05, "loss": 0.9426, "step": 40395 }, { "epoch": 35.78387953941541, "grad_norm": 0.299014151096344, "learning_rate": 1e-05, "loss": 0.9349, "step": 40400 }, { "epoch": 35.78830823737821, "grad_norm": 0.2497718781232834, "learning_rate": 1e-05, "loss": 1.0104, "step": 40405 }, { "epoch": 35.79273693534101, "grad_norm": 0.23372980952262878, "learning_rate": 1e-05, "loss": 1.0559, "step": 40410 }, { "epoch": 35.797165633303806, "grad_norm": 0.2446320503950119, "learning_rate": 1e-05, "loss": 0.9616, "step": 40415 }, { "epoch": 35.80159433126661, "grad_norm": 0.2172650396823883, "learning_rate": 1e-05, "loss": 0.9852, "step": 40420 }, { "epoch": 35.80602302922941, "grad_norm": 0.22092875838279724, "learning_rate": 1e-05, "loss": 0.9523, "step": 40425 }, { "epoch": 35.8104517271922, "grad_norm": 0.2555411756038666, "learning_rate": 1e-05, "loss": 0.9635, "step": 40430 }, { "epoch": 35.814880425155, "grad_norm": 0.2262481302022934, "learning_rate": 1e-05, "loss": 0.9538, "step": 40435 }, { "epoch": 35.819309123117804, "grad_norm": 0.24759456515312195, "learning_rate": 1e-05, "loss": 1.0139, "step": 40440 }, { "epoch": 35.823737821080606, "grad_norm": 0.2330486923456192, "learning_rate": 1e-05, "loss": 0.9839, "step": 40445 }, { "epoch": 35.8281665190434, "grad_norm": 0.2492523342370987, "learning_rate": 1e-05, "loss": 1.0005, "step": 40450 }, { "epoch": 35.8325952170062, "grad_norm": 0.2578888237476349, "learning_rate": 1e-05, "loss": 0.9712, "step": 40455 }, { "epoch": 35.837023914969, "grad_norm": 0.23786818981170654, "learning_rate": 1e-05, "loss": 0.952, "step": 40460 }, { "epoch": 35.841452612931796, "grad_norm": 0.2568472921848297, "learning_rate": 1e-05, "loss": 0.9878, "step": 40465 }, { "epoch": 35.8458813108946, "grad_norm": 0.2453257292509079, "learning_rate": 1e-05, "loss": 0.9709, "step": 40470 }, { "epoch": 35.8503100088574, "grad_norm": 0.2947475016117096, "learning_rate": 1e-05, "loss": 0.9495, "step": 40475 }, { "epoch": 35.85473870682019, "grad_norm": 0.223874032497406, "learning_rate": 1e-05, "loss": 0.9081, "step": 40480 }, { "epoch": 35.859167404782994, "grad_norm": 0.23459452390670776, "learning_rate": 1e-05, "loss": 0.992, "step": 40485 }, { "epoch": 35.863596102745795, "grad_norm": 0.26651930809020996, "learning_rate": 1e-05, "loss": 0.9373, "step": 40490 }, { "epoch": 35.86802480070859, "grad_norm": 0.2747059166431427, "learning_rate": 1e-05, "loss": 0.9809, "step": 40495 }, { "epoch": 35.87245349867139, "grad_norm": 0.22639040648937225, "learning_rate": 1e-05, "loss": 0.9986, "step": 40500 }, { "epoch": 35.87688219663419, "grad_norm": 0.2218010574579239, "learning_rate": 1e-05, "loss": 0.9785, "step": 40505 }, { "epoch": 35.881310894596986, "grad_norm": 0.22835911810398102, "learning_rate": 1e-05, "loss": 0.9697, "step": 40510 }, { "epoch": 35.88573959255979, "grad_norm": 0.24543921649456024, "learning_rate": 1e-05, "loss": 1.0116, "step": 40515 }, { "epoch": 35.89016829052259, "grad_norm": 0.26899459958076477, "learning_rate": 1e-05, "loss": 0.9665, "step": 40520 }, { "epoch": 35.89459698848538, "grad_norm": 0.22196191549301147, "learning_rate": 1e-05, "loss": 0.9963, "step": 40525 }, { "epoch": 35.899025686448184, "grad_norm": 0.23421040177345276, "learning_rate": 1e-05, "loss": 0.9657, "step": 40530 }, { "epoch": 35.903454384410985, "grad_norm": 0.2716995179653168, "learning_rate": 1e-05, "loss": 0.9814, "step": 40535 }, { "epoch": 35.90788308237378, "grad_norm": 0.20850670337677002, "learning_rate": 1e-05, "loss": 0.9452, "step": 40540 }, { "epoch": 35.91231178033658, "grad_norm": 0.23377592861652374, "learning_rate": 1e-05, "loss": 1.0152, "step": 40545 }, { "epoch": 35.91674047829938, "grad_norm": 0.21797043085098267, "learning_rate": 1e-05, "loss": 1.0193, "step": 40550 }, { "epoch": 35.921169176262175, "grad_norm": 0.2463165670633316, "learning_rate": 1e-05, "loss": 1.0125, "step": 40555 }, { "epoch": 35.92559787422498, "grad_norm": 0.22919940948486328, "learning_rate": 1e-05, "loss": 0.9782, "step": 40560 }, { "epoch": 35.93002657218778, "grad_norm": 0.2925037145614624, "learning_rate": 1e-05, "loss": 0.9905, "step": 40565 }, { "epoch": 35.93445527015058, "grad_norm": 0.22773735225200653, "learning_rate": 1e-05, "loss": 1.0037, "step": 40570 }, { "epoch": 35.93888396811337, "grad_norm": 0.22901175916194916, "learning_rate": 1e-05, "loss": 0.9971, "step": 40575 }, { "epoch": 35.943312666076174, "grad_norm": 0.3081228733062744, "learning_rate": 1e-05, "loss": 0.9619, "step": 40580 }, { "epoch": 35.947741364038976, "grad_norm": 0.3218163251876831, "learning_rate": 1e-05, "loss": 0.9114, "step": 40585 }, { "epoch": 35.95217006200177, "grad_norm": 0.22988595068454742, "learning_rate": 1e-05, "loss": 0.94, "step": 40590 }, { "epoch": 35.95659875996457, "grad_norm": 0.23910291492938995, "learning_rate": 1e-05, "loss": 0.9379, "step": 40595 }, { "epoch": 35.96102745792737, "grad_norm": 0.3272089660167694, "learning_rate": 1e-05, "loss": 0.9443, "step": 40600 }, { "epoch": 35.965456155890166, "grad_norm": 0.23385964334011078, "learning_rate": 1e-05, "loss": 1.0337, "step": 40605 }, { "epoch": 35.96988485385297, "grad_norm": 0.24690861999988556, "learning_rate": 1e-05, "loss": 0.9996, "step": 40610 }, { "epoch": 35.97431355181577, "grad_norm": 0.2402074784040451, "learning_rate": 1e-05, "loss": 1.0091, "step": 40615 }, { "epoch": 35.97874224977856, "grad_norm": 0.2547287344932556, "learning_rate": 1e-05, "loss": 1.0178, "step": 40620 }, { "epoch": 35.983170947741364, "grad_norm": 0.25014543533325195, "learning_rate": 1e-05, "loss": 0.9819, "step": 40625 }, { "epoch": 35.987599645704165, "grad_norm": 0.2261633425951004, "learning_rate": 1e-05, "loss": 0.9636, "step": 40630 }, { "epoch": 35.99202834366696, "grad_norm": 0.22299930453300476, "learning_rate": 1e-05, "loss": 0.9891, "step": 40635 }, { "epoch": 35.99645704162976, "grad_norm": 0.23826146125793457, "learning_rate": 1e-05, "loss": 0.9352, "step": 40640 }, { "epoch": 36.00088573959256, "grad_norm": 0.22658748924732208, "learning_rate": 1e-05, "loss": 0.9619, "step": 40645 }, { "epoch": 36.005314437555356, "grad_norm": 0.20685550570487976, "learning_rate": 1e-05, "loss": 0.9958, "step": 40650 }, { "epoch": 36.00974313551816, "grad_norm": 0.24671468138694763, "learning_rate": 1e-05, "loss": 0.9836, "step": 40655 }, { "epoch": 36.01417183348096, "grad_norm": 0.24889251589775085, "learning_rate": 1e-05, "loss": 0.9439, "step": 40660 }, { "epoch": 36.01860053144375, "grad_norm": 0.2482403814792633, "learning_rate": 1e-05, "loss": 0.9881, "step": 40665 }, { "epoch": 36.02302922940655, "grad_norm": 0.22407257556915283, "learning_rate": 1e-05, "loss": 0.9928, "step": 40670 }, { "epoch": 36.027457927369355, "grad_norm": 0.22811315953731537, "learning_rate": 1e-05, "loss": 1.0006, "step": 40675 }, { "epoch": 36.03188662533215, "grad_norm": 0.24326738715171814, "learning_rate": 1e-05, "loss": 0.95, "step": 40680 }, { "epoch": 36.03631532329495, "grad_norm": 0.22853440046310425, "learning_rate": 1e-05, "loss": 1.0014, "step": 40685 }, { "epoch": 36.04074402125775, "grad_norm": 0.24981658160686493, "learning_rate": 1e-05, "loss": 0.9271, "step": 40690 }, { "epoch": 36.04517271922055, "grad_norm": 0.22410358488559723, "learning_rate": 1e-05, "loss": 0.9397, "step": 40695 }, { "epoch": 36.04960141718335, "grad_norm": 0.2527514398097992, "learning_rate": 1e-05, "loss": 1.0095, "step": 40700 }, { "epoch": 36.05403011514615, "grad_norm": 0.22499558329582214, "learning_rate": 1e-05, "loss": 0.9435, "step": 40705 }, { "epoch": 36.05845881310895, "grad_norm": 0.2345588058233261, "learning_rate": 1e-05, "loss": 0.9655, "step": 40710 }, { "epoch": 36.06288751107174, "grad_norm": 0.23372536897659302, "learning_rate": 1e-05, "loss": 0.9907, "step": 40715 }, { "epoch": 36.067316209034544, "grad_norm": 0.24152472615242004, "learning_rate": 1e-05, "loss": 1.0142, "step": 40720 }, { "epoch": 36.071744906997345, "grad_norm": 0.23476995527744293, "learning_rate": 1e-05, "loss": 0.9732, "step": 40725 }, { "epoch": 36.07617360496014, "grad_norm": 0.332564115524292, "learning_rate": 1e-05, "loss": 0.9826, "step": 40730 }, { "epoch": 36.08060230292294, "grad_norm": 0.23601870238780975, "learning_rate": 1e-05, "loss": 0.9537, "step": 40735 }, { "epoch": 36.08503100088574, "grad_norm": 0.24262824654579163, "learning_rate": 1e-05, "loss": 0.9701, "step": 40740 }, { "epoch": 36.089459698848536, "grad_norm": 0.2730354070663452, "learning_rate": 1e-05, "loss": 0.994, "step": 40745 }, { "epoch": 36.09388839681134, "grad_norm": 0.28003156185150146, "learning_rate": 1e-05, "loss": 1.0146, "step": 40750 }, { "epoch": 36.09831709477414, "grad_norm": 0.26177123188972473, "learning_rate": 1e-05, "loss": 0.9343, "step": 40755 }, { "epoch": 36.10274579273693, "grad_norm": 0.2058447003364563, "learning_rate": 1e-05, "loss": 0.9929, "step": 40760 }, { "epoch": 36.107174490699734, "grad_norm": 0.21758095920085907, "learning_rate": 1e-05, "loss": 0.9653, "step": 40765 }, { "epoch": 36.111603188662535, "grad_norm": 0.21229171752929688, "learning_rate": 1e-05, "loss": 0.965, "step": 40770 }, { "epoch": 36.11603188662533, "grad_norm": 0.2983495593070984, "learning_rate": 1e-05, "loss": 1.0051, "step": 40775 }, { "epoch": 36.12046058458813, "grad_norm": 0.23964758217334747, "learning_rate": 1e-05, "loss": 0.9733, "step": 40780 }, { "epoch": 36.12488928255093, "grad_norm": 0.2386459857225418, "learning_rate": 1e-05, "loss": 0.967, "step": 40785 }, { "epoch": 36.129317980513726, "grad_norm": 0.2813543975353241, "learning_rate": 1e-05, "loss": 0.918, "step": 40790 }, { "epoch": 36.13374667847653, "grad_norm": 0.26468729972839355, "learning_rate": 1e-05, "loss": 0.9322, "step": 40795 }, { "epoch": 36.13817537643933, "grad_norm": 0.25739598274230957, "learning_rate": 1e-05, "loss": 0.9893, "step": 40800 }, { "epoch": 36.14260407440213, "grad_norm": 0.23630237579345703, "learning_rate": 1e-05, "loss": 0.9476, "step": 40805 }, { "epoch": 36.14703277236492, "grad_norm": 0.2426365166902542, "learning_rate": 1e-05, "loss": 0.9649, "step": 40810 }, { "epoch": 36.151461470327725, "grad_norm": 0.24553999304771423, "learning_rate": 1e-05, "loss": 1.0207, "step": 40815 }, { "epoch": 36.155890168290526, "grad_norm": 0.23183932900428772, "learning_rate": 1e-05, "loss": 0.9476, "step": 40820 }, { "epoch": 36.16031886625332, "grad_norm": 0.2705097496509552, "learning_rate": 1e-05, "loss": 1.0494, "step": 40825 }, { "epoch": 36.16474756421612, "grad_norm": 0.20186571776866913, "learning_rate": 1e-05, "loss": 1.0008, "step": 40830 }, { "epoch": 36.16917626217892, "grad_norm": 0.21930205821990967, "learning_rate": 1e-05, "loss": 0.9587, "step": 40835 }, { "epoch": 36.173604960141716, "grad_norm": 0.27030783891677856, "learning_rate": 1e-05, "loss": 0.964, "step": 40840 }, { "epoch": 36.17803365810452, "grad_norm": 0.23785150051116943, "learning_rate": 1e-05, "loss": 0.9695, "step": 40845 }, { "epoch": 36.18246235606732, "grad_norm": 0.23985576629638672, "learning_rate": 1e-05, "loss": 0.9845, "step": 40850 }, { "epoch": 36.18689105403011, "grad_norm": 0.24489888548851013, "learning_rate": 1e-05, "loss": 0.9477, "step": 40855 }, { "epoch": 36.191319751992914, "grad_norm": 0.22954946756362915, "learning_rate": 1e-05, "loss": 1.0347, "step": 40860 }, { "epoch": 36.195748449955715, "grad_norm": 0.23638089001178741, "learning_rate": 1e-05, "loss": 1.0018, "step": 40865 }, { "epoch": 36.20017714791851, "grad_norm": 0.23923087120056152, "learning_rate": 1e-05, "loss": 0.9847, "step": 40870 }, { "epoch": 36.20460584588131, "grad_norm": 0.2198793590068817, "learning_rate": 1e-05, "loss": 0.9558, "step": 40875 }, { "epoch": 36.20903454384411, "grad_norm": 0.2216985672712326, "learning_rate": 1e-05, "loss": 0.9481, "step": 40880 }, { "epoch": 36.213463241806906, "grad_norm": 0.25956448912620544, "learning_rate": 1e-05, "loss": 1.0008, "step": 40885 }, { "epoch": 36.21789193976971, "grad_norm": 0.21048910915851593, "learning_rate": 1e-05, "loss": 1.0148, "step": 40890 }, { "epoch": 36.22232063773251, "grad_norm": 0.277170330286026, "learning_rate": 1e-05, "loss": 0.9562, "step": 40895 }, { "epoch": 36.2267493356953, "grad_norm": 0.23861654102802277, "learning_rate": 1e-05, "loss": 0.9772, "step": 40900 }, { "epoch": 36.231178033658104, "grad_norm": 0.2342303991317749, "learning_rate": 1e-05, "loss": 0.9623, "step": 40905 }, { "epoch": 36.235606731620905, "grad_norm": 0.27772581577301025, "learning_rate": 1e-05, "loss": 0.9892, "step": 40910 }, { "epoch": 36.2400354295837, "grad_norm": 0.25007620453834534, "learning_rate": 1e-05, "loss": 0.9301, "step": 40915 }, { "epoch": 36.2444641275465, "grad_norm": 0.234497532248497, "learning_rate": 1e-05, "loss": 1.0619, "step": 40920 }, { "epoch": 36.2488928255093, "grad_norm": 0.2606408894062042, "learning_rate": 1e-05, "loss": 0.9774, "step": 40925 }, { "epoch": 36.2533215234721, "grad_norm": 0.2422298938035965, "learning_rate": 1e-05, "loss": 0.9554, "step": 40930 }, { "epoch": 36.2577502214349, "grad_norm": 0.2119947373867035, "learning_rate": 1e-05, "loss": 0.9781, "step": 40935 }, { "epoch": 36.2621789193977, "grad_norm": 0.25420472025871277, "learning_rate": 1e-05, "loss": 1.0521, "step": 40940 }, { "epoch": 36.2666076173605, "grad_norm": 0.25499996542930603, "learning_rate": 1e-05, "loss": 0.9844, "step": 40945 }, { "epoch": 36.27103631532329, "grad_norm": 0.2024070769548416, "learning_rate": 1e-05, "loss": 0.9763, "step": 40950 }, { "epoch": 36.275465013286095, "grad_norm": 0.21779686212539673, "learning_rate": 1e-05, "loss": 0.9839, "step": 40955 }, { "epoch": 36.279893711248896, "grad_norm": 0.24275332689285278, "learning_rate": 1e-05, "loss": 1.0219, "step": 40960 }, { "epoch": 36.28432240921169, "grad_norm": 0.23373295366764069, "learning_rate": 1e-05, "loss": 0.944, "step": 40965 }, { "epoch": 36.28875110717449, "grad_norm": 0.24112732708454132, "learning_rate": 1e-05, "loss": 0.9863, "step": 40970 }, { "epoch": 36.29317980513729, "grad_norm": 0.22080321609973907, "learning_rate": 1e-05, "loss": 0.9807, "step": 40975 }, { "epoch": 36.297608503100086, "grad_norm": 0.21972325444221497, "learning_rate": 1e-05, "loss": 1.0237, "step": 40980 }, { "epoch": 36.30203720106289, "grad_norm": 0.23970209062099457, "learning_rate": 1e-05, "loss": 1.0164, "step": 40985 }, { "epoch": 36.30646589902569, "grad_norm": 0.24055175483226776, "learning_rate": 1e-05, "loss": 0.9984, "step": 40990 }, { "epoch": 36.31089459698848, "grad_norm": 0.24951638281345367, "learning_rate": 1e-05, "loss": 0.9765, "step": 40995 }, { "epoch": 36.315323294951284, "grad_norm": 0.23876352608203888, "learning_rate": 1e-05, "loss": 0.9579, "step": 41000 }, { "epoch": 36.319751992914085, "grad_norm": 0.29305002093315125, "learning_rate": 1e-05, "loss": 0.9789, "step": 41005 }, { "epoch": 36.32418069087688, "grad_norm": 0.2657853364944458, "learning_rate": 1e-05, "loss": 0.9816, "step": 41010 }, { "epoch": 36.32860938883968, "grad_norm": 0.25105082988739014, "learning_rate": 1e-05, "loss": 0.9694, "step": 41015 }, { "epoch": 36.33303808680248, "grad_norm": 0.2244410514831543, "learning_rate": 1e-05, "loss": 1.0627, "step": 41020 }, { "epoch": 36.337466784765276, "grad_norm": 0.23671582341194153, "learning_rate": 1e-05, "loss": 0.9983, "step": 41025 }, { "epoch": 36.34189548272808, "grad_norm": 0.2655124366283417, "learning_rate": 1e-05, "loss": 0.9562, "step": 41030 }, { "epoch": 36.34632418069088, "grad_norm": 0.2666459083557129, "learning_rate": 1e-05, "loss": 0.9588, "step": 41035 }, { "epoch": 36.35075287865367, "grad_norm": 0.25170886516571045, "learning_rate": 1e-05, "loss": 1.0348, "step": 41040 }, { "epoch": 36.355181576616474, "grad_norm": 0.2830251157283783, "learning_rate": 1e-05, "loss": 0.9938, "step": 41045 }, { "epoch": 36.359610274579275, "grad_norm": 0.27102768421173096, "learning_rate": 1e-05, "loss": 1.0052, "step": 41050 }, { "epoch": 36.364038972542076, "grad_norm": 0.24004510045051575, "learning_rate": 1e-05, "loss": 1.0108, "step": 41055 }, { "epoch": 36.36846767050487, "grad_norm": 0.2342505156993866, "learning_rate": 1e-05, "loss": 1.0229, "step": 41060 }, { "epoch": 36.37289636846767, "grad_norm": 0.22039338946342468, "learning_rate": 1e-05, "loss": 0.9683, "step": 41065 }, { "epoch": 36.37732506643047, "grad_norm": 0.2308778464794159, "learning_rate": 1e-05, "loss": 0.9704, "step": 41070 }, { "epoch": 36.38175376439327, "grad_norm": 0.2716803252696991, "learning_rate": 1e-05, "loss": 1.0063, "step": 41075 }, { "epoch": 36.38618246235607, "grad_norm": 0.2282794564962387, "learning_rate": 1e-05, "loss": 1.0362, "step": 41080 }, { "epoch": 36.39061116031887, "grad_norm": 0.21496593952178955, "learning_rate": 1e-05, "loss": 0.9766, "step": 41085 }, { "epoch": 36.39503985828166, "grad_norm": 0.2691848874092102, "learning_rate": 1e-05, "loss": 0.9861, "step": 41090 }, { "epoch": 36.399468556244464, "grad_norm": 0.25886887311935425, "learning_rate": 1e-05, "loss": 0.9789, "step": 41095 }, { "epoch": 36.403897254207266, "grad_norm": 0.23968742787837982, "learning_rate": 1e-05, "loss": 0.9976, "step": 41100 }, { "epoch": 36.40832595217006, "grad_norm": 0.22284342348575592, "learning_rate": 1e-05, "loss": 1.0186, "step": 41105 }, { "epoch": 36.41275465013286, "grad_norm": 0.27596932649612427, "learning_rate": 1e-05, "loss": 1.0498, "step": 41110 }, { "epoch": 36.41718334809566, "grad_norm": 0.24692389369010925, "learning_rate": 1e-05, "loss": 0.9744, "step": 41115 }, { "epoch": 36.421612046058456, "grad_norm": 0.274220734834671, "learning_rate": 1e-05, "loss": 0.9417, "step": 41120 }, { "epoch": 36.42604074402126, "grad_norm": 0.222972109913826, "learning_rate": 1e-05, "loss": 0.9793, "step": 41125 }, { "epoch": 36.43046944198406, "grad_norm": 0.20551925897598267, "learning_rate": 1e-05, "loss": 0.9611, "step": 41130 }, { "epoch": 36.43489813994685, "grad_norm": 0.266122043132782, "learning_rate": 1e-05, "loss": 0.9665, "step": 41135 }, { "epoch": 36.439326837909654, "grad_norm": 0.2282188981771469, "learning_rate": 1e-05, "loss": 0.9566, "step": 41140 }, { "epoch": 36.443755535872455, "grad_norm": 0.23219631612300873, "learning_rate": 1e-05, "loss": 0.9874, "step": 41145 }, { "epoch": 36.44818423383525, "grad_norm": 0.22225667536258698, "learning_rate": 1e-05, "loss": 0.9682, "step": 41150 }, { "epoch": 36.45261293179805, "grad_norm": 0.2564220130443573, "learning_rate": 1e-05, "loss": 0.9807, "step": 41155 }, { "epoch": 36.45704162976085, "grad_norm": 0.22233328223228455, "learning_rate": 1e-05, "loss": 1.0041, "step": 41160 }, { "epoch": 36.461470327723646, "grad_norm": 0.24494224786758423, "learning_rate": 1e-05, "loss": 1.0279, "step": 41165 }, { "epoch": 36.46589902568645, "grad_norm": 0.2292802631855011, "learning_rate": 1e-05, "loss": 1.0053, "step": 41170 }, { "epoch": 36.47032772364925, "grad_norm": 0.2072295844554901, "learning_rate": 1e-05, "loss": 0.9134, "step": 41175 }, { "epoch": 36.47475642161205, "grad_norm": 0.2157590687274933, "learning_rate": 1e-05, "loss": 0.9612, "step": 41180 }, { "epoch": 36.479185119574844, "grad_norm": 0.2594149112701416, "learning_rate": 1e-05, "loss": 0.9902, "step": 41185 }, { "epoch": 36.483613817537645, "grad_norm": 0.23352308571338654, "learning_rate": 1e-05, "loss": 0.9477, "step": 41190 }, { "epoch": 36.488042515500446, "grad_norm": 0.2546723186969757, "learning_rate": 1e-05, "loss": 0.9665, "step": 41195 }, { "epoch": 36.49247121346324, "grad_norm": 0.24370384216308594, "learning_rate": 1e-05, "loss": 0.9829, "step": 41200 }, { "epoch": 36.49689991142604, "grad_norm": 0.23751384019851685, "learning_rate": 1e-05, "loss": 0.9602, "step": 41205 }, { "epoch": 36.50132860938884, "grad_norm": 0.20981431007385254, "learning_rate": 1e-05, "loss": 1.0063, "step": 41210 }, { "epoch": 36.50575730735164, "grad_norm": 0.24499230086803436, "learning_rate": 1e-05, "loss": 0.9509, "step": 41215 }, { "epoch": 36.51018600531444, "grad_norm": 0.19730959832668304, "learning_rate": 1e-05, "loss": 0.9899, "step": 41220 }, { "epoch": 36.51461470327724, "grad_norm": 0.22785040736198425, "learning_rate": 1e-05, "loss": 0.9556, "step": 41225 }, { "epoch": 36.51904340124003, "grad_norm": 0.22181697189807892, "learning_rate": 1e-05, "loss": 0.9576, "step": 41230 }, { "epoch": 36.523472099202834, "grad_norm": 0.23306769132614136, "learning_rate": 1e-05, "loss": 0.9835, "step": 41235 }, { "epoch": 36.527900797165636, "grad_norm": 0.2977795898914337, "learning_rate": 1e-05, "loss": 0.9906, "step": 41240 }, { "epoch": 36.53232949512843, "grad_norm": 0.24598073959350586, "learning_rate": 1e-05, "loss": 0.9759, "step": 41245 }, { "epoch": 36.53675819309123, "grad_norm": 0.26347070932388306, "learning_rate": 1e-05, "loss": 0.9769, "step": 41250 }, { "epoch": 36.54118689105403, "grad_norm": 0.24939309060573578, "learning_rate": 1e-05, "loss": 0.9593, "step": 41255 }, { "epoch": 36.545615589016826, "grad_norm": 0.3335600793361664, "learning_rate": 1e-05, "loss": 0.9671, "step": 41260 }, { "epoch": 36.55004428697963, "grad_norm": 0.22563804686069489, "learning_rate": 1e-05, "loss": 0.9332, "step": 41265 }, { "epoch": 36.55447298494243, "grad_norm": 0.22194381058216095, "learning_rate": 1e-05, "loss": 0.9922, "step": 41270 }, { "epoch": 36.55890168290522, "grad_norm": 0.2519119083881378, "learning_rate": 1e-05, "loss": 0.9612, "step": 41275 }, { "epoch": 36.563330380868024, "grad_norm": 0.24392183125019073, "learning_rate": 1e-05, "loss": 0.9376, "step": 41280 }, { "epoch": 36.567759078830825, "grad_norm": 0.3094474971294403, "learning_rate": 1e-05, "loss": 0.9898, "step": 41285 }, { "epoch": 36.57218777679362, "grad_norm": 0.21822114288806915, "learning_rate": 1e-05, "loss": 0.9707, "step": 41290 }, { "epoch": 36.57661647475642, "grad_norm": 0.23856975138187408, "learning_rate": 1e-05, "loss": 0.9597, "step": 41295 }, { "epoch": 36.58104517271922, "grad_norm": 0.2759832441806793, "learning_rate": 1e-05, "loss": 0.9929, "step": 41300 }, { "epoch": 36.58547387068202, "grad_norm": 0.28036031126976013, "learning_rate": 1e-05, "loss": 0.9978, "step": 41305 }, { "epoch": 36.58990256864482, "grad_norm": 0.21053095161914825, "learning_rate": 1e-05, "loss": 1.0126, "step": 41310 }, { "epoch": 36.59433126660762, "grad_norm": 0.2949060797691345, "learning_rate": 1e-05, "loss": 0.9475, "step": 41315 }, { "epoch": 36.59875996457042, "grad_norm": 0.2263978272676468, "learning_rate": 1e-05, "loss": 1.0108, "step": 41320 }, { "epoch": 36.60318866253321, "grad_norm": 0.2795294225215912, "learning_rate": 1e-05, "loss": 0.9913, "step": 41325 }, { "epoch": 36.607617360496015, "grad_norm": 0.2060815691947937, "learning_rate": 1e-05, "loss": 0.9501, "step": 41330 }, { "epoch": 36.612046058458816, "grad_norm": 0.2050504982471466, "learning_rate": 1e-05, "loss": 0.9378, "step": 41335 }, { "epoch": 36.61647475642161, "grad_norm": 0.2511064410209656, "learning_rate": 1e-05, "loss": 0.9955, "step": 41340 }, { "epoch": 36.62090345438441, "grad_norm": 0.26368412375450134, "learning_rate": 1e-05, "loss": 0.9488, "step": 41345 }, { "epoch": 36.62533215234721, "grad_norm": 0.21881631016731262, "learning_rate": 1e-05, "loss": 0.9606, "step": 41350 }, { "epoch": 36.62976085031001, "grad_norm": 0.24021776020526886, "learning_rate": 1e-05, "loss": 0.9528, "step": 41355 }, { "epoch": 36.63418954827281, "grad_norm": 0.26761144399642944, "learning_rate": 1e-05, "loss": 1.0077, "step": 41360 }, { "epoch": 36.63861824623561, "grad_norm": 0.3264281451702118, "learning_rate": 1e-05, "loss": 0.9344, "step": 41365 }, { "epoch": 36.6430469441984, "grad_norm": 0.21894054114818573, "learning_rate": 1e-05, "loss": 1.0121, "step": 41370 }, { "epoch": 36.647475642161204, "grad_norm": 0.24946807324886322, "learning_rate": 1e-05, "loss": 0.9749, "step": 41375 }, { "epoch": 36.651904340124005, "grad_norm": 0.22466206550598145, "learning_rate": 1e-05, "loss": 0.9498, "step": 41380 }, { "epoch": 36.6563330380868, "grad_norm": 0.2501908242702484, "learning_rate": 1e-05, "loss": 1.0323, "step": 41385 }, { "epoch": 36.6607617360496, "grad_norm": 0.2907739281654358, "learning_rate": 1e-05, "loss": 0.9819, "step": 41390 }, { "epoch": 36.6651904340124, "grad_norm": 0.2530531883239746, "learning_rate": 1e-05, "loss": 1.0021, "step": 41395 }, { "epoch": 36.669619131975196, "grad_norm": 0.27774450182914734, "learning_rate": 1e-05, "loss": 1.0385, "step": 41400 }, { "epoch": 36.674047829938, "grad_norm": 0.2448064535856247, "learning_rate": 1e-05, "loss": 1.0071, "step": 41405 }, { "epoch": 36.6784765279008, "grad_norm": 0.2538725733757019, "learning_rate": 1e-05, "loss": 0.9744, "step": 41410 }, { "epoch": 36.68290522586359, "grad_norm": 0.2417149692773819, "learning_rate": 1e-05, "loss": 0.9131, "step": 41415 }, { "epoch": 36.687333923826394, "grad_norm": 0.22603629529476166, "learning_rate": 1e-05, "loss": 1.0074, "step": 41420 }, { "epoch": 36.691762621789195, "grad_norm": 0.2294352948665619, "learning_rate": 1e-05, "loss": 0.9951, "step": 41425 }, { "epoch": 36.696191319751996, "grad_norm": 0.24379414319992065, "learning_rate": 1e-05, "loss": 0.9879, "step": 41430 }, { "epoch": 36.70062001771479, "grad_norm": 0.24497553706169128, "learning_rate": 1e-05, "loss": 0.9783, "step": 41435 }, { "epoch": 36.70504871567759, "grad_norm": 0.2463923841714859, "learning_rate": 1e-05, "loss": 0.987, "step": 41440 }, { "epoch": 36.70947741364039, "grad_norm": 0.2909233868122101, "learning_rate": 1e-05, "loss": 0.9737, "step": 41445 }, { "epoch": 36.71390611160319, "grad_norm": 0.24570921063423157, "learning_rate": 1e-05, "loss": 1.0233, "step": 41450 }, { "epoch": 36.71833480956599, "grad_norm": 0.23515303432941437, "learning_rate": 1e-05, "loss": 0.9587, "step": 41455 }, { "epoch": 36.72276350752879, "grad_norm": 0.28930291533470154, "learning_rate": 1e-05, "loss": 1.0255, "step": 41460 }, { "epoch": 36.72719220549158, "grad_norm": 0.2584001421928406, "learning_rate": 1e-05, "loss": 0.9724, "step": 41465 }, { "epoch": 36.731620903454385, "grad_norm": 0.22280341386795044, "learning_rate": 1e-05, "loss": 0.9927, "step": 41470 }, { "epoch": 36.736049601417186, "grad_norm": 0.250087708234787, "learning_rate": 1e-05, "loss": 0.9554, "step": 41475 }, { "epoch": 36.74047829937998, "grad_norm": 0.255700945854187, "learning_rate": 1e-05, "loss": 1.0305, "step": 41480 }, { "epoch": 36.74490699734278, "grad_norm": 0.23839564621448517, "learning_rate": 1e-05, "loss": 0.9608, "step": 41485 }, { "epoch": 36.74933569530558, "grad_norm": 0.26836100220680237, "learning_rate": 1e-05, "loss": 0.972, "step": 41490 }, { "epoch": 36.753764393268376, "grad_norm": 0.21705758571624756, "learning_rate": 1e-05, "loss": 0.9626, "step": 41495 }, { "epoch": 36.75819309123118, "grad_norm": 0.204350084066391, "learning_rate": 1e-05, "loss": 0.9801, "step": 41500 }, { "epoch": 36.76262178919398, "grad_norm": 0.2528984546661377, "learning_rate": 1e-05, "loss": 0.9791, "step": 41505 }, { "epoch": 36.76705048715677, "grad_norm": 0.19084975123405457, "learning_rate": 1e-05, "loss": 0.9978, "step": 41510 }, { "epoch": 36.771479185119574, "grad_norm": 0.2052205353975296, "learning_rate": 1e-05, "loss": 0.9566, "step": 41515 }, { "epoch": 36.775907883082375, "grad_norm": 0.24439504742622375, "learning_rate": 1e-05, "loss": 1.039, "step": 41520 }, { "epoch": 36.78033658104517, "grad_norm": 0.25444209575653076, "learning_rate": 1e-05, "loss": 0.9225, "step": 41525 }, { "epoch": 36.78476527900797, "grad_norm": 0.23856742680072784, "learning_rate": 1e-05, "loss": 1.0034, "step": 41530 }, { "epoch": 36.78919397697077, "grad_norm": 0.2234344482421875, "learning_rate": 1e-05, "loss": 1.027, "step": 41535 }, { "epoch": 36.79362267493357, "grad_norm": 0.270016074180603, "learning_rate": 1e-05, "loss": 0.9702, "step": 41540 }, { "epoch": 36.79805137289637, "grad_norm": 0.2673543393611908, "learning_rate": 1e-05, "loss": 0.9746, "step": 41545 }, { "epoch": 36.80248007085917, "grad_norm": 0.24772733449935913, "learning_rate": 1e-05, "loss": 0.9652, "step": 41550 }, { "epoch": 36.80690876882197, "grad_norm": 0.23683005571365356, "learning_rate": 1e-05, "loss": 0.9731, "step": 41555 }, { "epoch": 36.811337466784764, "grad_norm": 0.23282134532928467, "learning_rate": 1e-05, "loss": 0.9265, "step": 41560 }, { "epoch": 36.815766164747565, "grad_norm": 0.21743695437908173, "learning_rate": 1e-05, "loss": 0.9675, "step": 41565 }, { "epoch": 36.820194862710366, "grad_norm": 0.2388668954372406, "learning_rate": 1e-05, "loss": 0.9789, "step": 41570 }, { "epoch": 36.82462356067316, "grad_norm": 0.24171118438243866, "learning_rate": 1e-05, "loss": 0.9667, "step": 41575 }, { "epoch": 36.82905225863596, "grad_norm": 0.20600350201129913, "learning_rate": 1e-05, "loss": 0.999, "step": 41580 }, { "epoch": 36.83348095659876, "grad_norm": 0.22473964095115662, "learning_rate": 1e-05, "loss": 0.9424, "step": 41585 }, { "epoch": 36.83790965456156, "grad_norm": 0.2945292592048645, "learning_rate": 1e-05, "loss": 1.0587, "step": 41590 }, { "epoch": 36.84233835252436, "grad_norm": 0.21868018805980682, "learning_rate": 1e-05, "loss": 0.9537, "step": 41595 }, { "epoch": 36.84676705048716, "grad_norm": 0.21495646238327026, "learning_rate": 1e-05, "loss": 0.9893, "step": 41600 }, { "epoch": 36.85119574844995, "grad_norm": 0.235142320394516, "learning_rate": 1e-05, "loss": 1.0093, "step": 41605 }, { "epoch": 36.855624446412754, "grad_norm": 0.2598854601383209, "learning_rate": 1e-05, "loss": 0.9751, "step": 41610 }, { "epoch": 36.860053144375556, "grad_norm": 0.2535991370677948, "learning_rate": 1e-05, "loss": 0.9656, "step": 41615 }, { "epoch": 36.86448184233835, "grad_norm": 0.22093145549297333, "learning_rate": 1e-05, "loss": 0.9738, "step": 41620 }, { "epoch": 36.86891054030115, "grad_norm": 0.22329245507717133, "learning_rate": 1e-05, "loss": 0.9494, "step": 41625 }, { "epoch": 36.87333923826395, "grad_norm": 0.24171333014965057, "learning_rate": 1e-05, "loss": 0.963, "step": 41630 }, { "epoch": 36.877767936226746, "grad_norm": 0.23357275128364563, "learning_rate": 1e-05, "loss": 1.0445, "step": 41635 }, { "epoch": 36.88219663418955, "grad_norm": 0.2335280030965805, "learning_rate": 1e-05, "loss": 0.9161, "step": 41640 }, { "epoch": 36.88662533215235, "grad_norm": 0.2635389566421509, "learning_rate": 1e-05, "loss": 0.9531, "step": 41645 }, { "epoch": 36.89105403011514, "grad_norm": 0.21385855972766876, "learning_rate": 1e-05, "loss": 0.9695, "step": 41650 }, { "epoch": 36.895482728077944, "grad_norm": 0.22912786900997162, "learning_rate": 1e-05, "loss": 0.9871, "step": 41655 }, { "epoch": 36.899911426040745, "grad_norm": 0.25736311078071594, "learning_rate": 1e-05, "loss": 0.9987, "step": 41660 }, { "epoch": 36.90434012400354, "grad_norm": 0.21302156150341034, "learning_rate": 1e-05, "loss": 0.9423, "step": 41665 }, { "epoch": 36.90876882196634, "grad_norm": 0.2374745011329651, "learning_rate": 1e-05, "loss": 0.9416, "step": 41670 }, { "epoch": 36.91319751992914, "grad_norm": 0.23203137516975403, "learning_rate": 1e-05, "loss": 0.9889, "step": 41675 }, { "epoch": 36.91762621789194, "grad_norm": 0.21209439635276794, "learning_rate": 1e-05, "loss": 0.9944, "step": 41680 }, { "epoch": 36.92205491585474, "grad_norm": 0.23146556317806244, "learning_rate": 1e-05, "loss": 0.9596, "step": 41685 }, { "epoch": 36.92648361381754, "grad_norm": 0.2478378564119339, "learning_rate": 1e-05, "loss": 0.9366, "step": 41690 }, { "epoch": 36.93091231178034, "grad_norm": 0.20333725214004517, "learning_rate": 1e-05, "loss": 0.9559, "step": 41695 }, { "epoch": 36.935341009743134, "grad_norm": 0.2504282295703888, "learning_rate": 1e-05, "loss": 0.9616, "step": 41700 }, { "epoch": 36.939769707705935, "grad_norm": 0.20910699665546417, "learning_rate": 1e-05, "loss": 0.9825, "step": 41705 }, { "epoch": 36.944198405668736, "grad_norm": 0.23719508945941925, "learning_rate": 1e-05, "loss": 1.0086, "step": 41710 }, { "epoch": 36.94862710363153, "grad_norm": 0.26588308811187744, "learning_rate": 1e-05, "loss": 0.9446, "step": 41715 }, { "epoch": 36.95305580159433, "grad_norm": 0.24046368896961212, "learning_rate": 1e-05, "loss": 1.0432, "step": 41720 }, { "epoch": 36.95748449955713, "grad_norm": 0.2910871207714081, "learning_rate": 1e-05, "loss": 1.0048, "step": 41725 }, { "epoch": 36.96191319751993, "grad_norm": 0.2554914355278015, "learning_rate": 1e-05, "loss": 0.9567, "step": 41730 }, { "epoch": 36.96634189548273, "grad_norm": 0.22151389718055725, "learning_rate": 1e-05, "loss": 0.9816, "step": 41735 }, { "epoch": 36.97077059344553, "grad_norm": 0.23031800985336304, "learning_rate": 1e-05, "loss": 1.0011, "step": 41740 }, { "epoch": 36.97519929140832, "grad_norm": 0.24828629195690155, "learning_rate": 1e-05, "loss": 0.9952, "step": 41745 }, { "epoch": 36.979627989371124, "grad_norm": 0.20596984028816223, "learning_rate": 1e-05, "loss": 0.9713, "step": 41750 }, { "epoch": 36.984056687333926, "grad_norm": 0.23722344636917114, "learning_rate": 1e-05, "loss": 0.9374, "step": 41755 }, { "epoch": 36.98848538529672, "grad_norm": 0.21555255353450775, "learning_rate": 1e-05, "loss": 0.995, "step": 41760 }, { "epoch": 36.99291408325952, "grad_norm": 0.2219674289226532, "learning_rate": 1e-05, "loss": 0.9881, "step": 41765 }, { "epoch": 36.99734278122232, "grad_norm": 0.28827813267707825, "learning_rate": 1e-05, "loss": 1.0603, "step": 41770 }, { "epoch": 37.001771479185116, "grad_norm": 0.2842288911342621, "learning_rate": 1e-05, "loss": 0.9961, "step": 41775 }, { "epoch": 37.00620017714792, "grad_norm": 0.29359903931617737, "learning_rate": 1e-05, "loss": 0.9657, "step": 41780 }, { "epoch": 37.01062887511072, "grad_norm": 0.23733164370059967, "learning_rate": 1e-05, "loss": 0.9974, "step": 41785 }, { "epoch": 37.01505757307352, "grad_norm": 0.23545072972774506, "learning_rate": 1e-05, "loss": 0.9886, "step": 41790 }, { "epoch": 37.019486271036314, "grad_norm": 0.2726118564605713, "learning_rate": 1e-05, "loss": 1.0027, "step": 41795 }, { "epoch": 37.023914968999115, "grad_norm": 0.207491934299469, "learning_rate": 1e-05, "loss": 1.0088, "step": 41800 }, { "epoch": 37.028343666961916, "grad_norm": 0.3502281606197357, "learning_rate": 1e-05, "loss": 0.936, "step": 41805 }, { "epoch": 37.03277236492471, "grad_norm": 0.21178008615970612, "learning_rate": 1e-05, "loss": 0.9651, "step": 41810 }, { "epoch": 37.03720106288751, "grad_norm": 0.2580762505531311, "learning_rate": 1e-05, "loss": 0.9795, "step": 41815 }, { "epoch": 37.04162976085031, "grad_norm": 0.23341518640518188, "learning_rate": 1e-05, "loss": 0.9682, "step": 41820 }, { "epoch": 37.04605845881311, "grad_norm": 0.2185945212841034, "learning_rate": 1e-05, "loss": 0.9944, "step": 41825 }, { "epoch": 37.05048715677591, "grad_norm": 0.25499385595321655, "learning_rate": 1e-05, "loss": 0.9509, "step": 41830 }, { "epoch": 37.05491585473871, "grad_norm": 0.23552343249320984, "learning_rate": 1e-05, "loss": 0.9808, "step": 41835 }, { "epoch": 37.0593445527015, "grad_norm": 0.22409363090991974, "learning_rate": 1e-05, "loss": 0.9344, "step": 41840 }, { "epoch": 37.063773250664305, "grad_norm": 0.27179014682769775, "learning_rate": 1e-05, "loss": 0.9817, "step": 41845 }, { "epoch": 37.068201948627106, "grad_norm": 0.23248104751110077, "learning_rate": 1e-05, "loss": 0.9784, "step": 41850 }, { "epoch": 37.0726306465899, "grad_norm": 0.227968230843544, "learning_rate": 1e-05, "loss": 0.9322, "step": 41855 }, { "epoch": 37.0770593445527, "grad_norm": 0.2531631886959076, "learning_rate": 1e-05, "loss": 0.9973, "step": 41860 }, { "epoch": 37.0814880425155, "grad_norm": 0.26105639338493347, "learning_rate": 1e-05, "loss": 1.004, "step": 41865 }, { "epoch": 37.0859167404783, "grad_norm": 0.22814108431339264, "learning_rate": 1e-05, "loss": 1.0374, "step": 41870 }, { "epoch": 37.0903454384411, "grad_norm": 0.26840445399284363, "learning_rate": 1e-05, "loss": 0.923, "step": 41875 }, { "epoch": 37.0947741364039, "grad_norm": 0.2333166003227234, "learning_rate": 1e-05, "loss": 0.9677, "step": 41880 }, { "epoch": 37.09920283436669, "grad_norm": 0.28321367502212524, "learning_rate": 1e-05, "loss": 0.9766, "step": 41885 }, { "epoch": 37.103631532329494, "grad_norm": 0.23226363956928253, "learning_rate": 1e-05, "loss": 0.9847, "step": 41890 }, { "epoch": 37.108060230292296, "grad_norm": 0.2403482049703598, "learning_rate": 1e-05, "loss": 0.9726, "step": 41895 }, { "epoch": 37.11248892825509, "grad_norm": 0.26225462555885315, "learning_rate": 1e-05, "loss": 0.9825, "step": 41900 }, { "epoch": 37.11691762621789, "grad_norm": 0.245097354054451, "learning_rate": 1e-05, "loss": 0.9675, "step": 41905 }, { "epoch": 37.12134632418069, "grad_norm": 0.23149815201759338, "learning_rate": 1e-05, "loss": 0.9742, "step": 41910 }, { "epoch": 37.12577502214349, "grad_norm": 0.25361597537994385, "learning_rate": 1e-05, "loss": 0.9861, "step": 41915 }, { "epoch": 37.13020372010629, "grad_norm": 0.27507415413856506, "learning_rate": 1e-05, "loss": 0.9688, "step": 41920 }, { "epoch": 37.13463241806909, "grad_norm": 0.23268146812915802, "learning_rate": 1e-05, "loss": 0.9779, "step": 41925 }, { "epoch": 37.13906111603189, "grad_norm": 0.22741036117076874, "learning_rate": 1e-05, "loss": 0.9861, "step": 41930 }, { "epoch": 37.143489813994684, "grad_norm": 0.2483917474746704, "learning_rate": 1e-05, "loss": 1.0125, "step": 41935 }, { "epoch": 37.147918511957485, "grad_norm": 0.2293664962053299, "learning_rate": 1e-05, "loss": 0.9441, "step": 41940 }, { "epoch": 37.152347209920286, "grad_norm": 0.24596326053142548, "learning_rate": 1e-05, "loss": 1.0282, "step": 41945 }, { "epoch": 37.15677590788308, "grad_norm": 0.25122880935668945, "learning_rate": 1e-05, "loss": 0.9767, "step": 41950 }, { "epoch": 37.16120460584588, "grad_norm": 0.25325894355773926, "learning_rate": 1e-05, "loss": 0.9551, "step": 41955 }, { "epoch": 37.16563330380868, "grad_norm": 0.22633004188537598, "learning_rate": 1e-05, "loss": 0.9861, "step": 41960 }, { "epoch": 37.17006200177148, "grad_norm": 0.25204646587371826, "learning_rate": 1e-05, "loss": 0.9756, "step": 41965 }, { "epoch": 37.17449069973428, "grad_norm": 0.20441824197769165, "learning_rate": 1e-05, "loss": 0.9281, "step": 41970 }, { "epoch": 37.17891939769708, "grad_norm": 0.2294832319021225, "learning_rate": 1e-05, "loss": 0.9661, "step": 41975 }, { "epoch": 37.18334809565987, "grad_norm": 0.231958270072937, "learning_rate": 1e-05, "loss": 1.0102, "step": 41980 }, { "epoch": 37.187776793622675, "grad_norm": 0.22556857764720917, "learning_rate": 1e-05, "loss": 0.9531, "step": 41985 }, { "epoch": 37.192205491585476, "grad_norm": 0.25151780247688293, "learning_rate": 1e-05, "loss": 0.9886, "step": 41990 }, { "epoch": 37.19663418954827, "grad_norm": 0.2676210403442383, "learning_rate": 1e-05, "loss": 0.984, "step": 41995 }, { "epoch": 37.20106288751107, "grad_norm": 0.23232288658618927, "learning_rate": 1e-05, "loss": 1.0055, "step": 42000 }, { "epoch": 37.20549158547387, "grad_norm": 0.2164548635482788, "learning_rate": 1e-05, "loss": 0.9873, "step": 42005 }, { "epoch": 37.20992028343667, "grad_norm": 0.2006070911884308, "learning_rate": 1e-05, "loss": 0.9636, "step": 42010 }, { "epoch": 37.21434898139947, "grad_norm": 0.22555896639823914, "learning_rate": 1e-05, "loss": 0.9554, "step": 42015 }, { "epoch": 37.21877767936227, "grad_norm": 0.24526165425777435, "learning_rate": 1e-05, "loss": 1.0351, "step": 42020 }, { "epoch": 37.22320637732506, "grad_norm": 0.22633777558803558, "learning_rate": 1e-05, "loss": 1.0161, "step": 42025 }, { "epoch": 37.227635075287864, "grad_norm": 0.2592720091342926, "learning_rate": 1e-05, "loss": 1.0106, "step": 42030 }, { "epoch": 37.232063773250665, "grad_norm": 0.24568088352680206, "learning_rate": 1e-05, "loss": 0.9909, "step": 42035 }, { "epoch": 37.23649247121347, "grad_norm": 0.23025763034820557, "learning_rate": 1e-05, "loss": 0.9866, "step": 42040 }, { "epoch": 37.24092116917626, "grad_norm": 0.224689781665802, "learning_rate": 1e-05, "loss": 0.9293, "step": 42045 }, { "epoch": 37.24534986713906, "grad_norm": 0.21344514191150665, "learning_rate": 1e-05, "loss": 0.9594, "step": 42050 }, { "epoch": 37.24977856510186, "grad_norm": 0.23611518740653992, "learning_rate": 1e-05, "loss": 1.0014, "step": 42055 }, { "epoch": 37.25420726306466, "grad_norm": 0.22356264293193817, "learning_rate": 1e-05, "loss": 0.9363, "step": 42060 }, { "epoch": 37.25863596102746, "grad_norm": 0.26769402623176575, "learning_rate": 1e-05, "loss": 0.9451, "step": 42065 }, { "epoch": 37.26306465899026, "grad_norm": 0.26295900344848633, "learning_rate": 1e-05, "loss": 1.0254, "step": 42070 }, { "epoch": 37.267493356953054, "grad_norm": 0.25025105476379395, "learning_rate": 1e-05, "loss": 0.983, "step": 42075 }, { "epoch": 37.271922054915855, "grad_norm": 0.2221536487340927, "learning_rate": 1e-05, "loss": 0.9725, "step": 42080 }, { "epoch": 37.276350752878656, "grad_norm": 0.25521183013916016, "learning_rate": 1e-05, "loss": 1.0205, "step": 42085 }, { "epoch": 37.28077945084145, "grad_norm": 0.24157032370567322, "learning_rate": 1e-05, "loss": 0.9606, "step": 42090 }, { "epoch": 37.28520814880425, "grad_norm": 0.25145530700683594, "learning_rate": 1e-05, "loss": 1.0328, "step": 42095 }, { "epoch": 37.28963684676705, "grad_norm": 0.23742009699344635, "learning_rate": 1e-05, "loss": 0.9801, "step": 42100 }, { "epoch": 37.29406554472985, "grad_norm": 0.2109355926513672, "learning_rate": 1e-05, "loss": 0.9587, "step": 42105 }, { "epoch": 37.29849424269265, "grad_norm": 0.22865231335163116, "learning_rate": 1e-05, "loss": 0.9818, "step": 42110 }, { "epoch": 37.30292294065545, "grad_norm": 0.23039765655994415, "learning_rate": 1e-05, "loss": 1.017, "step": 42115 }, { "epoch": 37.30735163861824, "grad_norm": 0.23206283152103424, "learning_rate": 1e-05, "loss": 0.9424, "step": 42120 }, { "epoch": 37.311780336581045, "grad_norm": 0.20329523086547852, "learning_rate": 1e-05, "loss": 1.0216, "step": 42125 }, { "epoch": 37.316209034543846, "grad_norm": 0.2435583621263504, "learning_rate": 1e-05, "loss": 1.0024, "step": 42130 }, { "epoch": 37.32063773250664, "grad_norm": 0.21034710109233856, "learning_rate": 1e-05, "loss": 0.9848, "step": 42135 }, { "epoch": 37.32506643046944, "grad_norm": 0.22206032276153564, "learning_rate": 1e-05, "loss": 0.9506, "step": 42140 }, { "epoch": 37.32949512843224, "grad_norm": 0.24671857059001923, "learning_rate": 1e-05, "loss": 0.9609, "step": 42145 }, { "epoch": 37.333923826395036, "grad_norm": 0.2695288062095642, "learning_rate": 1e-05, "loss": 0.9362, "step": 42150 }, { "epoch": 37.33835252435784, "grad_norm": 0.2319755107164383, "learning_rate": 1e-05, "loss": 0.9513, "step": 42155 }, { "epoch": 37.34278122232064, "grad_norm": 0.27511054277420044, "learning_rate": 1e-05, "loss": 1.1001, "step": 42160 }, { "epoch": 37.34720992028344, "grad_norm": 0.214742511510849, "learning_rate": 1e-05, "loss": 1.0146, "step": 42165 }, { "epoch": 37.351638618246234, "grad_norm": 0.24615897238254547, "learning_rate": 1e-05, "loss": 0.957, "step": 42170 }, { "epoch": 37.356067316209035, "grad_norm": 0.31881681084632874, "learning_rate": 1e-05, "loss": 0.9576, "step": 42175 }, { "epoch": 37.36049601417184, "grad_norm": 0.2792544364929199, "learning_rate": 1e-05, "loss": 1.0591, "step": 42180 }, { "epoch": 37.36492471213463, "grad_norm": 0.2725650370121002, "learning_rate": 1e-05, "loss": 1.0034, "step": 42185 }, { "epoch": 37.36935341009743, "grad_norm": 0.2541310489177704, "learning_rate": 1e-05, "loss": 0.9359, "step": 42190 }, { "epoch": 37.37378210806023, "grad_norm": 0.2675561010837555, "learning_rate": 1e-05, "loss": 0.9696, "step": 42195 }, { "epoch": 37.37821080602303, "grad_norm": 0.21635383367538452, "learning_rate": 1e-05, "loss": 0.9992, "step": 42200 }, { "epoch": 37.38263950398583, "grad_norm": 0.2331739217042923, "learning_rate": 1e-05, "loss": 1.0138, "step": 42205 }, { "epoch": 37.38706820194863, "grad_norm": 0.302857369184494, "learning_rate": 1e-05, "loss": 0.9439, "step": 42210 }, { "epoch": 37.391496899911424, "grad_norm": 0.2117435783147812, "learning_rate": 1e-05, "loss": 0.9858, "step": 42215 }, { "epoch": 37.395925597874225, "grad_norm": 0.23086801171302795, "learning_rate": 1e-05, "loss": 0.9502, "step": 42220 }, { "epoch": 37.400354295837026, "grad_norm": 0.23856469988822937, "learning_rate": 1e-05, "loss": 1.0029, "step": 42225 }, { "epoch": 37.40478299379982, "grad_norm": 0.2832426428794861, "learning_rate": 1e-05, "loss": 1.0441, "step": 42230 }, { "epoch": 37.40921169176262, "grad_norm": 0.22969260811805725, "learning_rate": 1e-05, "loss": 0.9612, "step": 42235 }, { "epoch": 37.41364038972542, "grad_norm": 0.20280721783638, "learning_rate": 1e-05, "loss": 0.9984, "step": 42240 }, { "epoch": 37.41806908768822, "grad_norm": 0.23133990168571472, "learning_rate": 1e-05, "loss": 0.9603, "step": 42245 }, { "epoch": 37.42249778565102, "grad_norm": 0.2690482437610626, "learning_rate": 1e-05, "loss": 1.0172, "step": 42250 }, { "epoch": 37.42692648361382, "grad_norm": 0.20983082056045532, "learning_rate": 1e-05, "loss": 0.943, "step": 42255 }, { "epoch": 37.43135518157661, "grad_norm": 0.2177984118461609, "learning_rate": 1e-05, "loss": 1.0036, "step": 42260 }, { "epoch": 37.435783879539414, "grad_norm": 0.22971859574317932, "learning_rate": 1e-05, "loss": 0.9897, "step": 42265 }, { "epoch": 37.440212577502216, "grad_norm": 0.22158464789390564, "learning_rate": 1e-05, "loss": 0.9703, "step": 42270 }, { "epoch": 37.44464127546502, "grad_norm": 0.24342280626296997, "learning_rate": 1e-05, "loss": 0.9572, "step": 42275 }, { "epoch": 37.44906997342781, "grad_norm": 0.23846440017223358, "learning_rate": 1e-05, "loss": 0.9368, "step": 42280 }, { "epoch": 37.45349867139061, "grad_norm": 0.219167560338974, "learning_rate": 1e-05, "loss": 0.9362, "step": 42285 }, { "epoch": 37.45792736935341, "grad_norm": 0.23654691874980927, "learning_rate": 1e-05, "loss": 0.9252, "step": 42290 }, { "epoch": 37.46235606731621, "grad_norm": 0.2326965034008026, "learning_rate": 1e-05, "loss": 0.9586, "step": 42295 }, { "epoch": 37.46678476527901, "grad_norm": 0.25994324684143066, "learning_rate": 1e-05, "loss": 1.0317, "step": 42300 }, { "epoch": 37.47121346324181, "grad_norm": 0.21999827027320862, "learning_rate": 1e-05, "loss": 0.9772, "step": 42305 }, { "epoch": 37.475642161204604, "grad_norm": 0.2213774174451828, "learning_rate": 1e-05, "loss": 0.971, "step": 42310 }, { "epoch": 37.480070859167405, "grad_norm": 0.24380311369895935, "learning_rate": 1e-05, "loss": 0.9499, "step": 42315 }, { "epoch": 37.484499557130206, "grad_norm": 0.24933016300201416, "learning_rate": 1e-05, "loss": 0.9882, "step": 42320 }, { "epoch": 37.488928255093, "grad_norm": 0.28443336486816406, "learning_rate": 1e-05, "loss": 1.0276, "step": 42325 }, { "epoch": 37.4933569530558, "grad_norm": 0.2330223023891449, "learning_rate": 1e-05, "loss": 0.9481, "step": 42330 }, { "epoch": 37.4977856510186, "grad_norm": 0.22605866193771362, "learning_rate": 1e-05, "loss": 0.9715, "step": 42335 }, { "epoch": 37.5022143489814, "grad_norm": 0.23135940730571747, "learning_rate": 1e-05, "loss": 0.9583, "step": 42340 }, { "epoch": 37.5066430469442, "grad_norm": 0.2751252353191376, "learning_rate": 1e-05, "loss": 1.0115, "step": 42345 }, { "epoch": 37.511071744907, "grad_norm": 0.2597109079360962, "learning_rate": 1e-05, "loss": 0.9851, "step": 42350 }, { "epoch": 37.515500442869794, "grad_norm": 0.23957784473896027, "learning_rate": 1e-05, "loss": 1.0129, "step": 42355 }, { "epoch": 37.519929140832595, "grad_norm": 0.22048811614513397, "learning_rate": 1e-05, "loss": 0.9478, "step": 42360 }, { "epoch": 37.524357838795396, "grad_norm": 0.22523728013038635, "learning_rate": 1e-05, "loss": 0.9362, "step": 42365 }, { "epoch": 37.52878653675819, "grad_norm": 0.24047182500362396, "learning_rate": 1e-05, "loss": 1.0356, "step": 42370 }, { "epoch": 37.53321523472099, "grad_norm": 0.23936474323272705, "learning_rate": 1e-05, "loss": 0.9982, "step": 42375 }, { "epoch": 37.53764393268379, "grad_norm": 0.23928560316562653, "learning_rate": 1e-05, "loss": 1.0217, "step": 42380 }, { "epoch": 37.54207263064659, "grad_norm": 0.3341608941555023, "learning_rate": 1e-05, "loss": 1.016, "step": 42385 }, { "epoch": 37.54650132860939, "grad_norm": 0.24284934997558594, "learning_rate": 1e-05, "loss": 0.9654, "step": 42390 }, { "epoch": 37.55093002657219, "grad_norm": 0.26976075768470764, "learning_rate": 1e-05, "loss": 0.9734, "step": 42395 }, { "epoch": 37.55535872453498, "grad_norm": 0.2603054642677307, "learning_rate": 1e-05, "loss": 1.0147, "step": 42400 }, { "epoch": 37.559787422497784, "grad_norm": 0.2568533718585968, "learning_rate": 1e-05, "loss": 0.9577, "step": 42405 }, { "epoch": 37.564216120460586, "grad_norm": 0.26029160618782043, "learning_rate": 1e-05, "loss": 1.005, "step": 42410 }, { "epoch": 37.56864481842339, "grad_norm": 0.24650785326957703, "learning_rate": 1e-05, "loss": 1.0075, "step": 42415 }, { "epoch": 37.57307351638618, "grad_norm": 0.2329334318637848, "learning_rate": 1e-05, "loss": 0.937, "step": 42420 }, { "epoch": 37.57750221434898, "grad_norm": 0.26726096868515015, "learning_rate": 1e-05, "loss": 0.9768, "step": 42425 }, { "epoch": 37.58193091231178, "grad_norm": 0.24087293446063995, "learning_rate": 1e-05, "loss": 1.0013, "step": 42430 }, { "epoch": 37.58635961027458, "grad_norm": 0.30094900727272034, "learning_rate": 1e-05, "loss": 0.9959, "step": 42435 }, { "epoch": 37.59078830823738, "grad_norm": 0.2575588524341583, "learning_rate": 1e-05, "loss": 0.9556, "step": 42440 }, { "epoch": 37.59521700620018, "grad_norm": 0.20232011377811432, "learning_rate": 1e-05, "loss": 0.9031, "step": 42445 }, { "epoch": 37.599645704162974, "grad_norm": 0.21574756503105164, "learning_rate": 1e-05, "loss": 0.9831, "step": 42450 }, { "epoch": 37.604074402125775, "grad_norm": 0.2227298468351364, "learning_rate": 1e-05, "loss": 0.9339, "step": 42455 }, { "epoch": 37.608503100088576, "grad_norm": 0.2319817841053009, "learning_rate": 1e-05, "loss": 1.0514, "step": 42460 }, { "epoch": 37.61293179805137, "grad_norm": 0.22359859943389893, "learning_rate": 1e-05, "loss": 1.0336, "step": 42465 }, { "epoch": 37.61736049601417, "grad_norm": 0.24118474125862122, "learning_rate": 1e-05, "loss": 1.0052, "step": 42470 }, { "epoch": 37.62178919397697, "grad_norm": 0.2634837329387665, "learning_rate": 1e-05, "loss": 0.9938, "step": 42475 }, { "epoch": 37.62621789193977, "grad_norm": 0.23753149807453156, "learning_rate": 1e-05, "loss": 0.9612, "step": 42480 }, { "epoch": 37.63064658990257, "grad_norm": 0.22305628657341003, "learning_rate": 1e-05, "loss": 0.9743, "step": 42485 }, { "epoch": 37.63507528786537, "grad_norm": 0.21795621514320374, "learning_rate": 1e-05, "loss": 0.9892, "step": 42490 }, { "epoch": 37.63950398582816, "grad_norm": 0.22745104134082794, "learning_rate": 1e-05, "loss": 1.0455, "step": 42495 }, { "epoch": 37.643932683790965, "grad_norm": 0.2625316083431244, "learning_rate": 1e-05, "loss": 1.0352, "step": 42500 }, { "epoch": 37.648361381753766, "grad_norm": 0.26233750581741333, "learning_rate": 1e-05, "loss": 1.01, "step": 42505 }, { "epoch": 37.65279007971656, "grad_norm": 0.2161959856748581, "learning_rate": 1e-05, "loss": 0.9353, "step": 42510 }, { "epoch": 37.65721877767936, "grad_norm": 0.19854971766471863, "learning_rate": 1e-05, "loss": 0.9562, "step": 42515 }, { "epoch": 37.66164747564216, "grad_norm": 0.21185769140720367, "learning_rate": 1e-05, "loss": 0.9631, "step": 42520 }, { "epoch": 37.666076173604964, "grad_norm": 0.23087051510810852, "learning_rate": 1e-05, "loss": 1.0131, "step": 42525 }, { "epoch": 37.67050487156776, "grad_norm": 0.223099946975708, "learning_rate": 1e-05, "loss": 1.0289, "step": 42530 }, { "epoch": 37.67493356953056, "grad_norm": 0.23845601081848145, "learning_rate": 1e-05, "loss": 0.9943, "step": 42535 }, { "epoch": 37.67936226749336, "grad_norm": 0.23099152743816376, "learning_rate": 1e-05, "loss": 0.9433, "step": 42540 }, { "epoch": 37.683790965456154, "grad_norm": 0.23859800398349762, "learning_rate": 1e-05, "loss": 1.0066, "step": 42545 }, { "epoch": 37.688219663418955, "grad_norm": 0.23860591650009155, "learning_rate": 1e-05, "loss": 1.0156, "step": 42550 }, { "epoch": 37.69264836138176, "grad_norm": 0.23042136430740356, "learning_rate": 1e-05, "loss": 0.9997, "step": 42555 }, { "epoch": 37.69707705934455, "grad_norm": 0.23309612274169922, "learning_rate": 1e-05, "loss": 0.9673, "step": 42560 }, { "epoch": 37.70150575730735, "grad_norm": 0.2884846031665802, "learning_rate": 1e-05, "loss": 0.9422, "step": 42565 }, { "epoch": 37.70593445527015, "grad_norm": 0.2549743354320526, "learning_rate": 1e-05, "loss": 0.9516, "step": 42570 }, { "epoch": 37.71036315323295, "grad_norm": 0.26618364453315735, "learning_rate": 1e-05, "loss": 1.0189, "step": 42575 }, { "epoch": 37.71479185119575, "grad_norm": 0.2884793281555176, "learning_rate": 1e-05, "loss": 0.9756, "step": 42580 }, { "epoch": 37.71922054915855, "grad_norm": 0.2261142134666443, "learning_rate": 1e-05, "loss": 0.9551, "step": 42585 }, { "epoch": 37.723649247121344, "grad_norm": 0.2919895350933075, "learning_rate": 1e-05, "loss": 0.9869, "step": 42590 }, { "epoch": 37.728077945084145, "grad_norm": 0.24840529263019562, "learning_rate": 1e-05, "loss": 1.0128, "step": 42595 }, { "epoch": 37.732506643046946, "grad_norm": 0.25027358531951904, "learning_rate": 1e-05, "loss": 0.9721, "step": 42600 }, { "epoch": 37.73693534100974, "grad_norm": 0.21553629636764526, "learning_rate": 1e-05, "loss": 1.0068, "step": 42605 }, { "epoch": 37.74136403897254, "grad_norm": 0.2082803100347519, "learning_rate": 1e-05, "loss": 0.9812, "step": 42610 }, { "epoch": 37.74579273693534, "grad_norm": 0.23363971710205078, "learning_rate": 1e-05, "loss": 0.9597, "step": 42615 }, { "epoch": 37.75022143489814, "grad_norm": 0.23775963485240936, "learning_rate": 1e-05, "loss": 0.9532, "step": 42620 }, { "epoch": 37.75465013286094, "grad_norm": 0.23447728157043457, "learning_rate": 1e-05, "loss": 0.9724, "step": 42625 }, { "epoch": 37.75907883082374, "grad_norm": 0.2571112811565399, "learning_rate": 1e-05, "loss": 0.9974, "step": 42630 }, { "epoch": 37.76350752878653, "grad_norm": 0.26900970935821533, "learning_rate": 1e-05, "loss": 0.9756, "step": 42635 }, { "epoch": 37.767936226749335, "grad_norm": 0.2583164870738983, "learning_rate": 1e-05, "loss": 0.9837, "step": 42640 }, { "epoch": 37.772364924712136, "grad_norm": 0.22950932383537292, "learning_rate": 1e-05, "loss": 0.9663, "step": 42645 }, { "epoch": 37.77679362267494, "grad_norm": 0.29403239488601685, "learning_rate": 1e-05, "loss": 0.9893, "step": 42650 }, { "epoch": 37.78122232063773, "grad_norm": 0.2755625247955322, "learning_rate": 1e-05, "loss": 1.0198, "step": 42655 }, { "epoch": 37.78565101860053, "grad_norm": 0.2673417329788208, "learning_rate": 1e-05, "loss": 0.9604, "step": 42660 }, { "epoch": 37.79007971656333, "grad_norm": 0.23710231482982635, "learning_rate": 1e-05, "loss": 0.9831, "step": 42665 }, { "epoch": 37.79450841452613, "grad_norm": 0.2526950538158417, "learning_rate": 1e-05, "loss": 0.9737, "step": 42670 }, { "epoch": 37.79893711248893, "grad_norm": 0.23061133921146393, "learning_rate": 1e-05, "loss": 0.9919, "step": 42675 }, { "epoch": 37.80336581045173, "grad_norm": 0.25248298048973083, "learning_rate": 1e-05, "loss": 0.9889, "step": 42680 }, { "epoch": 37.807794508414524, "grad_norm": 0.2378055304288864, "learning_rate": 1e-05, "loss": 1.0148, "step": 42685 }, { "epoch": 37.812223206377325, "grad_norm": 0.23923656344413757, "learning_rate": 1e-05, "loss": 0.9874, "step": 42690 }, { "epoch": 37.81665190434013, "grad_norm": 0.22410640120506287, "learning_rate": 1e-05, "loss": 0.9473, "step": 42695 }, { "epoch": 37.82108060230292, "grad_norm": 0.2792249917984009, "learning_rate": 1e-05, "loss": 0.9627, "step": 42700 }, { "epoch": 37.82550930026572, "grad_norm": 0.2228323072195053, "learning_rate": 1e-05, "loss": 0.9599, "step": 42705 }, { "epoch": 37.82993799822852, "grad_norm": 0.24203184247016907, "learning_rate": 1e-05, "loss": 0.9697, "step": 42710 }, { "epoch": 37.83436669619132, "grad_norm": 0.25683319568634033, "learning_rate": 1e-05, "loss": 0.9792, "step": 42715 }, { "epoch": 37.83879539415412, "grad_norm": 0.257548451423645, "learning_rate": 1e-05, "loss": 0.9738, "step": 42720 }, { "epoch": 37.84322409211692, "grad_norm": 0.2615880072116852, "learning_rate": 1e-05, "loss": 0.9588, "step": 42725 }, { "epoch": 37.847652790079714, "grad_norm": 0.2446560561656952, "learning_rate": 1e-05, "loss": 1.0293, "step": 42730 }, { "epoch": 37.852081488042515, "grad_norm": 0.28907597064971924, "learning_rate": 1e-05, "loss": 1.0055, "step": 42735 }, { "epoch": 37.856510186005316, "grad_norm": 0.25594547390937805, "learning_rate": 1e-05, "loss": 0.9987, "step": 42740 }, { "epoch": 37.86093888396811, "grad_norm": 0.23299019038677216, "learning_rate": 1e-05, "loss": 1.0016, "step": 42745 }, { "epoch": 37.86536758193091, "grad_norm": 0.2175140678882599, "learning_rate": 1e-05, "loss": 1.0, "step": 42750 }, { "epoch": 37.86979627989371, "grad_norm": 0.2387598156929016, "learning_rate": 1e-05, "loss": 0.9668, "step": 42755 }, { "epoch": 37.87422497785651, "grad_norm": 0.21592430770397186, "learning_rate": 1e-05, "loss": 0.948, "step": 42760 }, { "epoch": 37.87865367581931, "grad_norm": 0.21279841661453247, "learning_rate": 1e-05, "loss": 1.0434, "step": 42765 }, { "epoch": 37.88308237378211, "grad_norm": 0.29665660858154297, "learning_rate": 1e-05, "loss": 1.0192, "step": 42770 }, { "epoch": 37.88751107174491, "grad_norm": 0.2409961223602295, "learning_rate": 1e-05, "loss": 0.9858, "step": 42775 }, { "epoch": 37.891939769707704, "grad_norm": 0.23460136353969574, "learning_rate": 1e-05, "loss": 0.9727, "step": 42780 }, { "epoch": 37.896368467670506, "grad_norm": 0.25908422470092773, "learning_rate": 1e-05, "loss": 1.0003, "step": 42785 }, { "epoch": 37.90079716563331, "grad_norm": 0.26455163955688477, "learning_rate": 1e-05, "loss": 1.0008, "step": 42790 }, { "epoch": 37.9052258635961, "grad_norm": 0.26139652729034424, "learning_rate": 1e-05, "loss": 0.9528, "step": 42795 }, { "epoch": 37.9096545615589, "grad_norm": 0.26581835746765137, "learning_rate": 1e-05, "loss": 1.0417, "step": 42800 }, { "epoch": 37.9140832595217, "grad_norm": 0.29815199971199036, "learning_rate": 1e-05, "loss": 0.9624, "step": 42805 }, { "epoch": 37.9185119574845, "grad_norm": 0.27939558029174805, "learning_rate": 1e-05, "loss": 1.0046, "step": 42810 }, { "epoch": 37.9229406554473, "grad_norm": 0.21969373524188995, "learning_rate": 1e-05, "loss": 0.9527, "step": 42815 }, { "epoch": 37.9273693534101, "grad_norm": 0.25108328461647034, "learning_rate": 1e-05, "loss": 0.9753, "step": 42820 }, { "epoch": 37.931798051372894, "grad_norm": 0.23512962460517883, "learning_rate": 1e-05, "loss": 0.9435, "step": 42825 }, { "epoch": 37.936226749335695, "grad_norm": 0.24433231353759766, "learning_rate": 1e-05, "loss": 0.9271, "step": 42830 }, { "epoch": 37.9406554472985, "grad_norm": 0.22840668261051178, "learning_rate": 1e-05, "loss": 0.9831, "step": 42835 }, { "epoch": 37.94508414526129, "grad_norm": 0.2117973417043686, "learning_rate": 1e-05, "loss": 0.9643, "step": 42840 }, { "epoch": 37.94951284322409, "grad_norm": 0.2078549861907959, "learning_rate": 1e-05, "loss": 1.026, "step": 42845 }, { "epoch": 37.95394154118689, "grad_norm": 0.2514343559741974, "learning_rate": 1e-05, "loss": 0.9886, "step": 42850 }, { "epoch": 37.95837023914969, "grad_norm": 0.262573778629303, "learning_rate": 1e-05, "loss": 0.949, "step": 42855 }, { "epoch": 37.96279893711249, "grad_norm": 0.22597801685333252, "learning_rate": 1e-05, "loss": 0.9788, "step": 42860 }, { "epoch": 37.96722763507529, "grad_norm": 0.23276439309120178, "learning_rate": 1e-05, "loss": 0.9429, "step": 42865 }, { "epoch": 37.971656333038084, "grad_norm": 0.25960466265678406, "learning_rate": 1e-05, "loss": 0.9754, "step": 42870 }, { "epoch": 37.976085031000885, "grad_norm": 0.25331589579582214, "learning_rate": 1e-05, "loss": 1.0139, "step": 42875 }, { "epoch": 37.980513728963686, "grad_norm": 0.2831728160381317, "learning_rate": 1e-05, "loss": 0.9507, "step": 42880 }, { "epoch": 37.98494242692648, "grad_norm": 0.24022911489009857, "learning_rate": 1e-05, "loss": 0.9902, "step": 42885 }, { "epoch": 37.98937112488928, "grad_norm": 0.2533038854598999, "learning_rate": 1e-05, "loss": 1.0139, "step": 42890 }, { "epoch": 37.99379982285208, "grad_norm": 0.20172680914402008, "learning_rate": 1e-05, "loss": 0.9885, "step": 42895 }, { "epoch": 37.998228520814884, "grad_norm": 0.2641580104827881, "learning_rate": 1e-05, "loss": 0.9216, "step": 42900 }, { "epoch": 38.00265721877768, "grad_norm": 0.2513423562049866, "learning_rate": 1e-05, "loss": 0.9747, "step": 42905 }, { "epoch": 38.00708591674048, "grad_norm": 0.24848389625549316, "learning_rate": 1e-05, "loss": 0.9471, "step": 42910 }, { "epoch": 38.01151461470328, "grad_norm": 0.2690280079841614, "learning_rate": 1e-05, "loss": 1.0302, "step": 42915 }, { "epoch": 38.015943312666074, "grad_norm": 0.2773149609565735, "learning_rate": 1e-05, "loss": 0.9905, "step": 42920 }, { "epoch": 38.020372010628876, "grad_norm": 0.2997073531150818, "learning_rate": 1e-05, "loss": 0.9831, "step": 42925 }, { "epoch": 38.02480070859168, "grad_norm": 0.259006142616272, "learning_rate": 1e-05, "loss": 0.981, "step": 42930 }, { "epoch": 38.02922940655447, "grad_norm": 0.27275604009628296, "learning_rate": 1e-05, "loss": 1.0243, "step": 42935 }, { "epoch": 38.03365810451727, "grad_norm": 0.27408191561698914, "learning_rate": 1e-05, "loss": 0.9651, "step": 42940 }, { "epoch": 38.03808680248007, "grad_norm": 0.26808083057403564, "learning_rate": 1e-05, "loss": 1.0501, "step": 42945 }, { "epoch": 38.04251550044287, "grad_norm": 0.28611332178115845, "learning_rate": 1e-05, "loss": 0.9734, "step": 42950 }, { "epoch": 38.04694419840567, "grad_norm": 0.25341981649398804, "learning_rate": 1e-05, "loss": 0.947, "step": 42955 }, { "epoch": 38.05137289636847, "grad_norm": 0.26704496145248413, "learning_rate": 1e-05, "loss": 0.9849, "step": 42960 }, { "epoch": 38.055801594331264, "grad_norm": 0.25065797567367554, "learning_rate": 1e-05, "loss": 1.0108, "step": 42965 }, { "epoch": 38.060230292294065, "grad_norm": 0.2529211938381195, "learning_rate": 1e-05, "loss": 0.9888, "step": 42970 }, { "epoch": 38.064658990256866, "grad_norm": 0.22175614535808563, "learning_rate": 1e-05, "loss": 0.9547, "step": 42975 }, { "epoch": 38.06908768821966, "grad_norm": 0.23338641226291656, "learning_rate": 1e-05, "loss": 1.0403, "step": 42980 }, { "epoch": 38.07351638618246, "grad_norm": 0.24338971078395844, "learning_rate": 1e-05, "loss": 0.9665, "step": 42985 }, { "epoch": 38.07794508414526, "grad_norm": 0.2220805585384369, "learning_rate": 1e-05, "loss": 0.9874, "step": 42990 }, { "epoch": 38.08237378210806, "grad_norm": 0.24641622602939606, "learning_rate": 1e-05, "loss": 0.9987, "step": 42995 }, { "epoch": 38.08680248007086, "grad_norm": 0.26084789633750916, "learning_rate": 1e-05, "loss": 0.9897, "step": 43000 }, { "epoch": 38.09123117803366, "grad_norm": 0.25485512614250183, "learning_rate": 1e-05, "loss": 0.9689, "step": 43005 }, { "epoch": 38.09565987599645, "grad_norm": 0.254149854183197, "learning_rate": 1e-05, "loss": 0.9362, "step": 43010 }, { "epoch": 38.100088573959255, "grad_norm": 0.2314128428697586, "learning_rate": 1e-05, "loss": 0.9793, "step": 43015 }, { "epoch": 38.104517271922056, "grad_norm": 0.25796201825141907, "learning_rate": 1e-05, "loss": 0.9191, "step": 43020 }, { "epoch": 38.10894596988486, "grad_norm": 0.22887766361236572, "learning_rate": 1e-05, "loss": 0.9426, "step": 43025 }, { "epoch": 38.11337466784765, "grad_norm": 0.2184365689754486, "learning_rate": 1e-05, "loss": 0.9887, "step": 43030 }, { "epoch": 38.11780336581045, "grad_norm": 0.23559419810771942, "learning_rate": 1e-05, "loss": 0.9962, "step": 43035 }, { "epoch": 38.122232063773254, "grad_norm": 0.22974500060081482, "learning_rate": 1e-05, "loss": 0.9842, "step": 43040 }, { "epoch": 38.12666076173605, "grad_norm": 0.2795167863368988, "learning_rate": 1e-05, "loss": 0.9578, "step": 43045 }, { "epoch": 38.13108945969885, "grad_norm": 0.28331977128982544, "learning_rate": 1e-05, "loss": 1.0143, "step": 43050 }, { "epoch": 38.13551815766165, "grad_norm": 0.2635152339935303, "learning_rate": 1e-05, "loss": 1.0075, "step": 43055 }, { "epoch": 38.139946855624444, "grad_norm": 0.23196662962436676, "learning_rate": 1e-05, "loss": 1.0604, "step": 43060 }, { "epoch": 38.144375553587246, "grad_norm": 0.24418827891349792, "learning_rate": 1e-05, "loss": 0.9973, "step": 43065 }, { "epoch": 38.14880425155005, "grad_norm": 0.24671323597431183, "learning_rate": 1e-05, "loss": 0.9188, "step": 43070 }, { "epoch": 38.15323294951284, "grad_norm": 0.23762334883213043, "learning_rate": 1e-05, "loss": 0.9359, "step": 43075 }, { "epoch": 38.15766164747564, "grad_norm": 0.224981427192688, "learning_rate": 1e-05, "loss": 0.9872, "step": 43080 }, { "epoch": 38.16209034543844, "grad_norm": 0.21803024411201477, "learning_rate": 1e-05, "loss": 0.9205, "step": 43085 }, { "epoch": 38.16651904340124, "grad_norm": 0.22328072786331177, "learning_rate": 1e-05, "loss": 0.932, "step": 43090 }, { "epoch": 38.17094774136404, "grad_norm": 0.25803542137145996, "learning_rate": 1e-05, "loss": 1.0105, "step": 43095 }, { "epoch": 38.17537643932684, "grad_norm": 0.25329846143722534, "learning_rate": 1e-05, "loss": 0.9835, "step": 43100 }, { "epoch": 38.179805137289634, "grad_norm": 0.25826695561408997, "learning_rate": 1e-05, "loss": 1.0106, "step": 43105 }, { "epoch": 38.184233835252435, "grad_norm": 0.2522648572921753, "learning_rate": 1e-05, "loss": 0.9894, "step": 43110 }, { "epoch": 38.188662533215236, "grad_norm": 0.26407769322395325, "learning_rate": 1e-05, "loss": 0.9818, "step": 43115 }, { "epoch": 38.19309123117803, "grad_norm": 0.2603117525577545, "learning_rate": 1e-05, "loss": 0.9574, "step": 43120 }, { "epoch": 38.19751992914083, "grad_norm": 0.21637728810310364, "learning_rate": 1e-05, "loss": 0.9524, "step": 43125 }, { "epoch": 38.20194862710363, "grad_norm": 0.26354262232780457, "learning_rate": 1e-05, "loss": 0.954, "step": 43130 }, { "epoch": 38.20637732506643, "grad_norm": 0.24848845601081848, "learning_rate": 1e-05, "loss": 0.9426, "step": 43135 }, { "epoch": 38.21080602302923, "grad_norm": 0.24224057793617249, "learning_rate": 1e-05, "loss": 0.97, "step": 43140 }, { "epoch": 38.21523472099203, "grad_norm": 0.22280791401863098, "learning_rate": 1e-05, "loss": 0.998, "step": 43145 }, { "epoch": 38.21966341895483, "grad_norm": 0.21780434250831604, "learning_rate": 1e-05, "loss": 0.941, "step": 43150 }, { "epoch": 38.224092116917625, "grad_norm": 0.2702253460884094, "learning_rate": 1e-05, "loss": 0.9834, "step": 43155 }, { "epoch": 38.228520814880426, "grad_norm": 0.2269081175327301, "learning_rate": 1e-05, "loss": 0.965, "step": 43160 }, { "epoch": 38.23294951284323, "grad_norm": 0.183292418718338, "learning_rate": 1e-05, "loss": 0.9647, "step": 43165 }, { "epoch": 38.23737821080602, "grad_norm": 0.22489820420742035, "learning_rate": 1e-05, "loss": 0.9937, "step": 43170 }, { "epoch": 38.24180690876882, "grad_norm": 0.29860106110572815, "learning_rate": 1e-05, "loss": 0.915, "step": 43175 }, { "epoch": 38.246235606731624, "grad_norm": 0.2841985523700714, "learning_rate": 1e-05, "loss": 1.0521, "step": 43180 }, { "epoch": 38.25066430469442, "grad_norm": 0.2412446141242981, "learning_rate": 1e-05, "loss": 1.047, "step": 43185 }, { "epoch": 38.25509300265722, "grad_norm": 0.23004049062728882, "learning_rate": 1e-05, "loss": 0.9619, "step": 43190 }, { "epoch": 38.25952170062002, "grad_norm": 0.19633902609348297, "learning_rate": 1e-05, "loss": 1.0324, "step": 43195 }, { "epoch": 38.263950398582814, "grad_norm": 0.2862204909324646, "learning_rate": 1e-05, "loss": 0.9827, "step": 43200 }, { "epoch": 38.268379096545615, "grad_norm": 0.23032046854496002, "learning_rate": 1e-05, "loss": 0.9608, "step": 43205 }, { "epoch": 38.27280779450842, "grad_norm": 0.28612473607063293, "learning_rate": 1e-05, "loss": 0.9931, "step": 43210 }, { "epoch": 38.27723649247121, "grad_norm": 0.2367434948682785, "learning_rate": 1e-05, "loss": 0.9346, "step": 43215 }, { "epoch": 38.28166519043401, "grad_norm": 0.24974460899829865, "learning_rate": 1e-05, "loss": 1.0288, "step": 43220 }, { "epoch": 38.28609388839681, "grad_norm": 0.22161123156547546, "learning_rate": 1e-05, "loss": 0.9371, "step": 43225 }, { "epoch": 38.29052258635961, "grad_norm": 0.2765234410762787, "learning_rate": 1e-05, "loss": 0.9791, "step": 43230 }, { "epoch": 38.29495128432241, "grad_norm": 0.23298610746860504, "learning_rate": 1e-05, "loss": 0.9944, "step": 43235 }, { "epoch": 38.29937998228521, "grad_norm": 0.20875632762908936, "learning_rate": 1e-05, "loss": 0.9633, "step": 43240 }, { "epoch": 38.303808680248004, "grad_norm": 0.24329780042171478, "learning_rate": 1e-05, "loss": 0.9584, "step": 43245 }, { "epoch": 38.308237378210805, "grad_norm": 0.21711863577365875, "learning_rate": 1e-05, "loss": 0.9709, "step": 43250 }, { "epoch": 38.312666076173606, "grad_norm": 0.24762588739395142, "learning_rate": 1e-05, "loss": 1.0033, "step": 43255 }, { "epoch": 38.31709477413641, "grad_norm": 0.23348502814769745, "learning_rate": 1e-05, "loss": 1.0074, "step": 43260 }, { "epoch": 38.3215234720992, "grad_norm": 0.2355189323425293, "learning_rate": 1e-05, "loss": 0.9709, "step": 43265 }, { "epoch": 38.325952170062, "grad_norm": 0.20766477286815643, "learning_rate": 1e-05, "loss": 0.9624, "step": 43270 }, { "epoch": 38.330380868024804, "grad_norm": 0.24386738240718842, "learning_rate": 1e-05, "loss": 1.0072, "step": 43275 }, { "epoch": 38.3348095659876, "grad_norm": 0.23066936433315277, "learning_rate": 1e-05, "loss": 0.9872, "step": 43280 }, { "epoch": 38.3392382639504, "grad_norm": 0.22878050804138184, "learning_rate": 1e-05, "loss": 0.935, "step": 43285 }, { "epoch": 38.3436669619132, "grad_norm": 0.21676266193389893, "learning_rate": 1e-05, "loss": 0.9876, "step": 43290 }, { "epoch": 38.348095659875995, "grad_norm": 0.22341535985469818, "learning_rate": 1e-05, "loss": 0.9676, "step": 43295 }, { "epoch": 38.352524357838796, "grad_norm": 0.2633419930934906, "learning_rate": 1e-05, "loss": 0.9855, "step": 43300 }, { "epoch": 38.3569530558016, "grad_norm": 0.27283814549446106, "learning_rate": 1e-05, "loss": 0.997, "step": 43305 }, { "epoch": 38.36138175376439, "grad_norm": 0.2556975483894348, "learning_rate": 1e-05, "loss": 1.0088, "step": 43310 }, { "epoch": 38.36581045172719, "grad_norm": 0.21136456727981567, "learning_rate": 1e-05, "loss": 1.0326, "step": 43315 }, { "epoch": 38.37023914968999, "grad_norm": 0.2684818208217621, "learning_rate": 1e-05, "loss": 0.8982, "step": 43320 }, { "epoch": 38.37466784765279, "grad_norm": 0.23107388615608215, "learning_rate": 1e-05, "loss": 1.0092, "step": 43325 }, { "epoch": 38.37909654561559, "grad_norm": 0.2445419728755951, "learning_rate": 1e-05, "loss": 0.9835, "step": 43330 }, { "epoch": 38.38352524357839, "grad_norm": 0.2546462416648865, "learning_rate": 1e-05, "loss": 0.9481, "step": 43335 }, { "epoch": 38.387953941541184, "grad_norm": 0.23402030766010284, "learning_rate": 1e-05, "loss": 0.9754, "step": 43340 }, { "epoch": 38.392382639503985, "grad_norm": 0.22267848253250122, "learning_rate": 1e-05, "loss": 0.9501, "step": 43345 }, { "epoch": 38.39681133746679, "grad_norm": 0.27227842807769775, "learning_rate": 1e-05, "loss": 0.9723, "step": 43350 }, { "epoch": 38.40124003542958, "grad_norm": 0.24881525337696075, "learning_rate": 1e-05, "loss": 0.9448, "step": 43355 }, { "epoch": 38.40566873339238, "grad_norm": 0.21689139306545258, "learning_rate": 1e-05, "loss": 0.9587, "step": 43360 }, { "epoch": 38.41009743135518, "grad_norm": 0.2058870643377304, "learning_rate": 1e-05, "loss": 0.9412, "step": 43365 }, { "epoch": 38.41452612931798, "grad_norm": 0.20513705909252167, "learning_rate": 1e-05, "loss": 0.9899, "step": 43370 }, { "epoch": 38.41895482728078, "grad_norm": 0.20016486942768097, "learning_rate": 1e-05, "loss": 1.0036, "step": 43375 }, { "epoch": 38.42338352524358, "grad_norm": 0.22196191549301147, "learning_rate": 1e-05, "loss": 0.9755, "step": 43380 }, { "epoch": 38.42781222320638, "grad_norm": 0.2295418083667755, "learning_rate": 1e-05, "loss": 1.0095, "step": 43385 }, { "epoch": 38.432240921169175, "grad_norm": 0.2352699637413025, "learning_rate": 1e-05, "loss": 1.0047, "step": 43390 }, { "epoch": 38.436669619131976, "grad_norm": 0.31505686044692993, "learning_rate": 1e-05, "loss": 0.9373, "step": 43395 }, { "epoch": 38.44109831709478, "grad_norm": 0.22873611748218536, "learning_rate": 1e-05, "loss": 0.9878, "step": 43400 }, { "epoch": 38.44552701505757, "grad_norm": 0.21728169918060303, "learning_rate": 1e-05, "loss": 0.9843, "step": 43405 }, { "epoch": 38.44995571302037, "grad_norm": 0.2645941972732544, "learning_rate": 1e-05, "loss": 1.0007, "step": 43410 }, { "epoch": 38.454384410983174, "grad_norm": 0.22846366465091705, "learning_rate": 1e-05, "loss": 0.9919, "step": 43415 }, { "epoch": 38.45881310894597, "grad_norm": 0.22175458073616028, "learning_rate": 1e-05, "loss": 0.9857, "step": 43420 }, { "epoch": 38.46324180690877, "grad_norm": 0.26965638995170593, "learning_rate": 1e-05, "loss": 1.0016, "step": 43425 }, { "epoch": 38.46767050487157, "grad_norm": 0.22983549535274506, "learning_rate": 1e-05, "loss": 0.9794, "step": 43430 }, { "epoch": 38.472099202834364, "grad_norm": 0.24273781478405, "learning_rate": 1e-05, "loss": 0.9814, "step": 43435 }, { "epoch": 38.476527900797166, "grad_norm": 0.20609554648399353, "learning_rate": 1e-05, "loss": 0.9922, "step": 43440 }, { "epoch": 38.48095659875997, "grad_norm": 0.22920441627502441, "learning_rate": 1e-05, "loss": 0.9086, "step": 43445 }, { "epoch": 38.48538529672276, "grad_norm": 0.24241000413894653, "learning_rate": 1e-05, "loss": 0.9157, "step": 43450 }, { "epoch": 38.48981399468556, "grad_norm": 0.29323849081993103, "learning_rate": 1e-05, "loss": 0.9327, "step": 43455 }, { "epoch": 38.49424269264836, "grad_norm": 0.20561303198337555, "learning_rate": 1e-05, "loss": 1.002, "step": 43460 }, { "epoch": 38.49867139061116, "grad_norm": 0.22029079496860504, "learning_rate": 1e-05, "loss": 1.0018, "step": 43465 }, { "epoch": 38.50310008857396, "grad_norm": 0.26725682616233826, "learning_rate": 1e-05, "loss": 0.9894, "step": 43470 }, { "epoch": 38.50752878653676, "grad_norm": 0.2451079785823822, "learning_rate": 1e-05, "loss": 0.9421, "step": 43475 }, { "epoch": 38.511957484499554, "grad_norm": 0.20564872026443481, "learning_rate": 1e-05, "loss": 1.0132, "step": 43480 }, { "epoch": 38.516386182462355, "grad_norm": 0.2674044072628021, "learning_rate": 1e-05, "loss": 1.0024, "step": 43485 }, { "epoch": 38.520814880425156, "grad_norm": 0.26122304797172546, "learning_rate": 1e-05, "loss": 0.9169, "step": 43490 }, { "epoch": 38.52524357838795, "grad_norm": 0.2647591233253479, "learning_rate": 1e-05, "loss": 0.9765, "step": 43495 }, { "epoch": 38.52967227635075, "grad_norm": 0.2281855046749115, "learning_rate": 1e-05, "loss": 0.9366, "step": 43500 }, { "epoch": 38.53410097431355, "grad_norm": 0.2355351597070694, "learning_rate": 1e-05, "loss": 0.9469, "step": 43505 }, { "epoch": 38.538529672276354, "grad_norm": 0.25589779019355774, "learning_rate": 1e-05, "loss": 1.0137, "step": 43510 }, { "epoch": 38.54295837023915, "grad_norm": 0.211058109998703, "learning_rate": 1e-05, "loss": 0.9536, "step": 43515 }, { "epoch": 38.54738706820195, "grad_norm": 0.2609139382839203, "learning_rate": 1e-05, "loss": 1.0271, "step": 43520 }, { "epoch": 38.55181576616475, "grad_norm": 0.2622933089733124, "learning_rate": 1e-05, "loss": 0.9851, "step": 43525 }, { "epoch": 38.556244464127545, "grad_norm": 0.20722365379333496, "learning_rate": 1e-05, "loss": 1.0252, "step": 43530 }, { "epoch": 38.560673162090346, "grad_norm": 0.21178418397903442, "learning_rate": 1e-05, "loss": 0.9702, "step": 43535 }, { "epoch": 38.56510186005315, "grad_norm": 0.21725904941558838, "learning_rate": 1e-05, "loss": 0.9318, "step": 43540 }, { "epoch": 38.56953055801594, "grad_norm": 0.2667466700077057, "learning_rate": 1e-05, "loss": 0.9536, "step": 43545 }, { "epoch": 38.57395925597874, "grad_norm": 0.21910931169986725, "learning_rate": 1e-05, "loss": 0.983, "step": 43550 }, { "epoch": 38.578387953941544, "grad_norm": 0.2225453406572342, "learning_rate": 1e-05, "loss": 0.9482, "step": 43555 }, { "epoch": 38.58281665190434, "grad_norm": 0.23588016629219055, "learning_rate": 1e-05, "loss": 1.0271, "step": 43560 }, { "epoch": 38.58724534986714, "grad_norm": 0.23776131868362427, "learning_rate": 1e-05, "loss": 0.964, "step": 43565 }, { "epoch": 38.59167404782994, "grad_norm": 0.26252466440200806, "learning_rate": 1e-05, "loss": 1.0281, "step": 43570 }, { "epoch": 38.596102745792734, "grad_norm": 0.24215373396873474, "learning_rate": 1e-05, "loss": 0.9972, "step": 43575 }, { "epoch": 38.600531443755536, "grad_norm": 0.23254767060279846, "learning_rate": 1e-05, "loss": 0.9836, "step": 43580 }, { "epoch": 38.60496014171834, "grad_norm": 0.2724493443965912, "learning_rate": 1e-05, "loss": 0.9632, "step": 43585 }, { "epoch": 38.60938883968113, "grad_norm": 0.27124494314193726, "learning_rate": 1e-05, "loss": 0.9855, "step": 43590 }, { "epoch": 38.61381753764393, "grad_norm": 0.23203571140766144, "learning_rate": 1e-05, "loss": 0.9527, "step": 43595 }, { "epoch": 38.61824623560673, "grad_norm": 0.21647340059280396, "learning_rate": 1e-05, "loss": 1.0047, "step": 43600 }, { "epoch": 38.62267493356953, "grad_norm": 0.24302645027637482, "learning_rate": 1e-05, "loss": 1.0629, "step": 43605 }, { "epoch": 38.62710363153233, "grad_norm": 0.2427358478307724, "learning_rate": 1e-05, "loss": 0.9777, "step": 43610 }, { "epoch": 38.63153232949513, "grad_norm": 0.2331506907939911, "learning_rate": 1e-05, "loss": 1.0363, "step": 43615 }, { "epoch": 38.635961027457924, "grad_norm": 0.2289654016494751, "learning_rate": 1e-05, "loss": 0.9121, "step": 43620 }, { "epoch": 38.640389725420725, "grad_norm": 0.21937672793865204, "learning_rate": 1e-05, "loss": 1.0151, "step": 43625 }, { "epoch": 38.644818423383526, "grad_norm": 0.226113960146904, "learning_rate": 1e-05, "loss": 0.9869, "step": 43630 }, { "epoch": 38.64924712134633, "grad_norm": 0.239945188164711, "learning_rate": 1e-05, "loss": 0.9966, "step": 43635 }, { "epoch": 38.65367581930912, "grad_norm": 0.21665291488170624, "learning_rate": 1e-05, "loss": 0.9894, "step": 43640 }, { "epoch": 38.65810451727192, "grad_norm": 0.2411862015724182, "learning_rate": 1e-05, "loss": 0.9519, "step": 43645 }, { "epoch": 38.662533215234724, "grad_norm": 0.2256542295217514, "learning_rate": 1e-05, "loss": 0.9697, "step": 43650 }, { "epoch": 38.66696191319752, "grad_norm": 0.24472227692604065, "learning_rate": 1e-05, "loss": 0.9129, "step": 43655 }, { "epoch": 38.67139061116032, "grad_norm": 0.24234147369861603, "learning_rate": 1e-05, "loss": 1.0055, "step": 43660 }, { "epoch": 38.67581930912312, "grad_norm": 0.2653104364871979, "learning_rate": 1e-05, "loss": 1.025, "step": 43665 }, { "epoch": 38.680248007085915, "grad_norm": 0.18867559731006622, "learning_rate": 1e-05, "loss": 0.9619, "step": 43670 }, { "epoch": 38.684676705048716, "grad_norm": 0.24823516607284546, "learning_rate": 1e-05, "loss": 0.9793, "step": 43675 }, { "epoch": 38.68910540301152, "grad_norm": 0.23131290078163147, "learning_rate": 1e-05, "loss": 0.981, "step": 43680 }, { "epoch": 38.69353410097431, "grad_norm": 0.21727074682712555, "learning_rate": 1e-05, "loss": 0.969, "step": 43685 }, { "epoch": 38.69796279893711, "grad_norm": 0.26791298389434814, "learning_rate": 1e-05, "loss": 0.9508, "step": 43690 }, { "epoch": 38.702391496899914, "grad_norm": 0.23814387619495392, "learning_rate": 1e-05, "loss": 0.966, "step": 43695 }, { "epoch": 38.70682019486271, "grad_norm": 0.23660992085933685, "learning_rate": 1e-05, "loss": 1.0289, "step": 43700 }, { "epoch": 38.71124889282551, "grad_norm": 0.24554641544818878, "learning_rate": 1e-05, "loss": 0.9537, "step": 43705 }, { "epoch": 38.71567759078831, "grad_norm": 0.246957466006279, "learning_rate": 1e-05, "loss": 0.9745, "step": 43710 }, { "epoch": 38.720106288751104, "grad_norm": 0.22561015188694, "learning_rate": 1e-05, "loss": 1.0062, "step": 43715 }, { "epoch": 38.724534986713905, "grad_norm": 0.25264501571655273, "learning_rate": 1e-05, "loss": 0.972, "step": 43720 }, { "epoch": 38.72896368467671, "grad_norm": 0.21695616841316223, "learning_rate": 1e-05, "loss": 0.9587, "step": 43725 }, { "epoch": 38.7333923826395, "grad_norm": 0.24012257158756256, "learning_rate": 1e-05, "loss": 0.9504, "step": 43730 }, { "epoch": 38.7378210806023, "grad_norm": 0.23363974690437317, "learning_rate": 1e-05, "loss": 0.9876, "step": 43735 }, { "epoch": 38.7422497785651, "grad_norm": 0.2782191038131714, "learning_rate": 1e-05, "loss": 0.8982, "step": 43740 }, { "epoch": 38.7466784765279, "grad_norm": 0.2390768826007843, "learning_rate": 1e-05, "loss": 1.0151, "step": 43745 }, { "epoch": 38.7511071744907, "grad_norm": 0.2396044135093689, "learning_rate": 1e-05, "loss": 1.0264, "step": 43750 }, { "epoch": 38.7555358724535, "grad_norm": 0.23891164362430573, "learning_rate": 1e-05, "loss": 0.974, "step": 43755 }, { "epoch": 38.7599645704163, "grad_norm": 0.21045605838298798, "learning_rate": 1e-05, "loss": 0.8854, "step": 43760 }, { "epoch": 38.764393268379095, "grad_norm": 0.24695606529712677, "learning_rate": 1e-05, "loss": 1.0153, "step": 43765 }, { "epoch": 38.768821966341896, "grad_norm": 0.2641974985599518, "learning_rate": 1e-05, "loss": 0.9654, "step": 43770 }, { "epoch": 38.7732506643047, "grad_norm": 0.25685933232307434, "learning_rate": 1e-05, "loss": 1.0127, "step": 43775 }, { "epoch": 38.77767936226749, "grad_norm": 0.2249232828617096, "learning_rate": 1e-05, "loss": 0.9561, "step": 43780 }, { "epoch": 38.78210806023029, "grad_norm": 0.21370990574359894, "learning_rate": 1e-05, "loss": 0.9965, "step": 43785 }, { "epoch": 38.786536758193094, "grad_norm": 0.24395334720611572, "learning_rate": 1e-05, "loss": 0.9721, "step": 43790 }, { "epoch": 38.79096545615589, "grad_norm": 0.23896770179271698, "learning_rate": 1e-05, "loss": 0.9148, "step": 43795 }, { "epoch": 38.79539415411869, "grad_norm": 0.2118944376707077, "learning_rate": 1e-05, "loss": 0.9977, "step": 43800 }, { "epoch": 38.79982285208149, "grad_norm": 0.21172665059566498, "learning_rate": 1e-05, "loss": 0.9675, "step": 43805 }, { "epoch": 38.804251550044285, "grad_norm": 0.2833765149116516, "learning_rate": 1e-05, "loss": 1.0229, "step": 43810 }, { "epoch": 38.808680248007086, "grad_norm": 0.23825910687446594, "learning_rate": 1e-05, "loss": 1.0199, "step": 43815 }, { "epoch": 38.81310894596989, "grad_norm": 0.1943647712469101, "learning_rate": 1e-05, "loss": 0.9698, "step": 43820 }, { "epoch": 38.81753764393268, "grad_norm": 0.27112287282943726, "learning_rate": 1e-05, "loss": 0.9976, "step": 43825 }, { "epoch": 38.82196634189548, "grad_norm": 0.3868490755558014, "learning_rate": 1e-05, "loss": 0.9846, "step": 43830 }, { "epoch": 38.826395039858284, "grad_norm": 0.31806594133377075, "learning_rate": 1e-05, "loss": 0.9645, "step": 43835 }, { "epoch": 38.83082373782108, "grad_norm": 0.25791141390800476, "learning_rate": 1e-05, "loss": 0.9421, "step": 43840 }, { "epoch": 38.83525243578388, "grad_norm": 0.3068712055683136, "learning_rate": 1e-05, "loss": 0.9724, "step": 43845 }, { "epoch": 38.83968113374668, "grad_norm": 0.26004689931869507, "learning_rate": 1e-05, "loss": 0.9684, "step": 43850 }, { "epoch": 38.844109831709474, "grad_norm": 0.2442476749420166, "learning_rate": 1e-05, "loss": 0.955, "step": 43855 }, { "epoch": 38.848538529672275, "grad_norm": 0.2509099841117859, "learning_rate": 1e-05, "loss": 0.9754, "step": 43860 }, { "epoch": 38.85296722763508, "grad_norm": 0.22360014915466309, "learning_rate": 1e-05, "loss": 1.0124, "step": 43865 }, { "epoch": 38.85739592559787, "grad_norm": 0.25066977739334106, "learning_rate": 1e-05, "loss": 0.9833, "step": 43870 }, { "epoch": 38.86182462356067, "grad_norm": 0.2108364701271057, "learning_rate": 1e-05, "loss": 0.9693, "step": 43875 }, { "epoch": 38.86625332152347, "grad_norm": 0.23675523698329926, "learning_rate": 1e-05, "loss": 0.9895, "step": 43880 }, { "epoch": 38.870682019486274, "grad_norm": 0.22983263432979584, "learning_rate": 1e-05, "loss": 0.9619, "step": 43885 }, { "epoch": 38.87511071744907, "grad_norm": 0.21928900480270386, "learning_rate": 1e-05, "loss": 0.9458, "step": 43890 }, { "epoch": 38.87953941541187, "grad_norm": 0.23863732814788818, "learning_rate": 1e-05, "loss": 0.9534, "step": 43895 }, { "epoch": 38.88396811337467, "grad_norm": 0.22324906289577484, "learning_rate": 1e-05, "loss": 0.9832, "step": 43900 }, { "epoch": 38.888396811337465, "grad_norm": 0.26141077280044556, "learning_rate": 1e-05, "loss": 1.0267, "step": 43905 }, { "epoch": 38.892825509300266, "grad_norm": 0.2276572585105896, "learning_rate": 1e-05, "loss": 0.9473, "step": 43910 }, { "epoch": 38.89725420726307, "grad_norm": 0.21929779648780823, "learning_rate": 1e-05, "loss": 0.978, "step": 43915 }, { "epoch": 38.90168290522586, "grad_norm": 0.25231146812438965, "learning_rate": 1e-05, "loss": 1.0103, "step": 43920 }, { "epoch": 38.90611160318866, "grad_norm": 0.28124314546585083, "learning_rate": 1e-05, "loss": 0.9802, "step": 43925 }, { "epoch": 38.910540301151464, "grad_norm": 0.2180287390947342, "learning_rate": 1e-05, "loss": 0.9683, "step": 43930 }, { "epoch": 38.91496899911426, "grad_norm": 0.22384630143642426, "learning_rate": 1e-05, "loss": 0.9713, "step": 43935 }, { "epoch": 38.91939769707706, "grad_norm": 0.20836429297924042, "learning_rate": 1e-05, "loss": 1.0289, "step": 43940 }, { "epoch": 38.92382639503986, "grad_norm": 0.21534837782382965, "learning_rate": 1e-05, "loss": 0.9574, "step": 43945 }, { "epoch": 38.928255093002655, "grad_norm": 0.22883909940719604, "learning_rate": 1e-05, "loss": 0.9448, "step": 43950 }, { "epoch": 38.932683790965456, "grad_norm": 0.2744563817977905, "learning_rate": 1e-05, "loss": 0.9973, "step": 43955 }, { "epoch": 38.93711248892826, "grad_norm": 0.26108402013778687, "learning_rate": 1e-05, "loss": 0.9955, "step": 43960 }, { "epoch": 38.94154118689105, "grad_norm": 0.20949621498584747, "learning_rate": 1e-05, "loss": 0.9534, "step": 43965 }, { "epoch": 38.94596988485385, "grad_norm": 0.22525851428508759, "learning_rate": 1e-05, "loss": 0.9949, "step": 43970 }, { "epoch": 38.95039858281665, "grad_norm": 0.288987398147583, "learning_rate": 1e-05, "loss": 0.9567, "step": 43975 }, { "epoch": 38.95482728077945, "grad_norm": 0.2601258158683777, "learning_rate": 1e-05, "loss": 0.962, "step": 43980 }, { "epoch": 38.95925597874225, "grad_norm": 0.2004460245370865, "learning_rate": 1e-05, "loss": 0.9971, "step": 43985 }, { "epoch": 38.96368467670505, "grad_norm": 0.26141634583473206, "learning_rate": 1e-05, "loss": 1.0159, "step": 43990 }, { "epoch": 38.96811337466785, "grad_norm": 0.26430144906044006, "learning_rate": 1e-05, "loss": 0.9596, "step": 43995 }, { "epoch": 38.972542072630645, "grad_norm": 0.29508286714553833, "learning_rate": 1e-05, "loss": 1.0091, "step": 44000 }, { "epoch": 38.97697077059345, "grad_norm": 0.23690460622310638, "learning_rate": 1e-05, "loss": 0.9663, "step": 44005 }, { "epoch": 38.98139946855625, "grad_norm": 0.2896316647529602, "learning_rate": 1e-05, "loss": 0.9991, "step": 44010 }, { "epoch": 38.98582816651904, "grad_norm": 0.2283962517976761, "learning_rate": 1e-05, "loss": 0.9806, "step": 44015 }, { "epoch": 38.99025686448184, "grad_norm": 0.24760879576206207, "learning_rate": 1e-05, "loss": 0.9368, "step": 44020 }, { "epoch": 38.994685562444644, "grad_norm": 0.2084726095199585, "learning_rate": 1e-05, "loss": 0.8882, "step": 44025 }, { "epoch": 38.99911426040744, "grad_norm": 0.21469901502132416, "learning_rate": 1e-05, "loss": 0.9923, "step": 44030 }, { "epoch": 39.00354295837024, "grad_norm": 0.207458958029747, "learning_rate": 1e-05, "loss": 0.9782, "step": 44035 }, { "epoch": 39.00797165633304, "grad_norm": 0.23130010068416595, "learning_rate": 1e-05, "loss": 1.01, "step": 44040 }, { "epoch": 39.012400354295835, "grad_norm": 0.2177809774875641, "learning_rate": 1e-05, "loss": 0.9973, "step": 44045 }, { "epoch": 39.016829052258636, "grad_norm": 0.25430431962013245, "learning_rate": 1e-05, "loss": 0.9961, "step": 44050 }, { "epoch": 39.02125775022144, "grad_norm": 0.2396925538778305, "learning_rate": 1e-05, "loss": 1.0184, "step": 44055 }, { "epoch": 39.02568644818423, "grad_norm": 0.2683342397212982, "learning_rate": 1e-05, "loss": 0.9802, "step": 44060 }, { "epoch": 39.03011514614703, "grad_norm": 0.24209195375442505, "learning_rate": 1e-05, "loss": 0.9908, "step": 44065 }, { "epoch": 39.034543844109834, "grad_norm": 0.26828837394714355, "learning_rate": 1e-05, "loss": 0.9635, "step": 44070 }, { "epoch": 39.03897254207263, "grad_norm": 0.23773469030857086, "learning_rate": 1e-05, "loss": 0.9623, "step": 44075 }, { "epoch": 39.04340124003543, "grad_norm": 0.26834332942962646, "learning_rate": 1e-05, "loss": 0.999, "step": 44080 }, { "epoch": 39.04782993799823, "grad_norm": 0.2255697250366211, "learning_rate": 1e-05, "loss": 0.9245, "step": 44085 }, { "epoch": 39.052258635961024, "grad_norm": 0.2594926357269287, "learning_rate": 1e-05, "loss": 0.9737, "step": 44090 }, { "epoch": 39.056687333923826, "grad_norm": 0.29831230640411377, "learning_rate": 1e-05, "loss": 0.9617, "step": 44095 }, { "epoch": 39.06111603188663, "grad_norm": 0.22961190342903137, "learning_rate": 1e-05, "loss": 1.0086, "step": 44100 }, { "epoch": 39.06554472984942, "grad_norm": 0.25696319341659546, "learning_rate": 1e-05, "loss": 1.023, "step": 44105 }, { "epoch": 39.06997342781222, "grad_norm": 0.2482203096151352, "learning_rate": 1e-05, "loss": 0.9561, "step": 44110 }, { "epoch": 39.07440212577502, "grad_norm": 0.2834431231021881, "learning_rate": 1e-05, "loss": 0.951, "step": 44115 }, { "epoch": 39.078830823737825, "grad_norm": 0.24590784311294556, "learning_rate": 1e-05, "loss": 0.9654, "step": 44120 }, { "epoch": 39.08325952170062, "grad_norm": 0.2518172264099121, "learning_rate": 1e-05, "loss": 0.998, "step": 44125 }, { "epoch": 39.08768821966342, "grad_norm": 0.25441253185272217, "learning_rate": 1e-05, "loss": 0.9472, "step": 44130 }, { "epoch": 39.09211691762622, "grad_norm": 0.35825350880622864, "learning_rate": 1e-05, "loss": 0.9728, "step": 44135 }, { "epoch": 39.096545615589015, "grad_norm": 0.2582775950431824, "learning_rate": 1e-05, "loss": 1.0231, "step": 44140 }, { "epoch": 39.100974313551816, "grad_norm": 0.29992473125457764, "learning_rate": 1e-05, "loss": 1.0007, "step": 44145 }, { "epoch": 39.10540301151462, "grad_norm": 0.2526478171348572, "learning_rate": 1e-05, "loss": 1.0135, "step": 44150 }, { "epoch": 39.10983170947741, "grad_norm": 0.2766793966293335, "learning_rate": 1e-05, "loss": 0.9422, "step": 44155 }, { "epoch": 39.11426040744021, "grad_norm": 0.2710573971271515, "learning_rate": 1e-05, "loss": 1.0156, "step": 44160 }, { "epoch": 39.118689105403014, "grad_norm": 0.2608216106891632, "learning_rate": 1e-05, "loss": 1.0402, "step": 44165 }, { "epoch": 39.12311780336581, "grad_norm": 0.23495997488498688, "learning_rate": 1e-05, "loss": 1.0099, "step": 44170 }, { "epoch": 39.12754650132861, "grad_norm": 0.27490705251693726, "learning_rate": 1e-05, "loss": 0.9589, "step": 44175 }, { "epoch": 39.13197519929141, "grad_norm": 0.2554977536201477, "learning_rate": 1e-05, "loss": 0.8788, "step": 44180 }, { "epoch": 39.136403897254205, "grad_norm": 0.26549553871154785, "learning_rate": 1e-05, "loss": 0.9817, "step": 44185 }, { "epoch": 39.140832595217006, "grad_norm": 0.24583964049816132, "learning_rate": 1e-05, "loss": 0.9974, "step": 44190 }, { "epoch": 39.14526129317981, "grad_norm": 0.20966410636901855, "learning_rate": 1e-05, "loss": 1.0439, "step": 44195 }, { "epoch": 39.1496899911426, "grad_norm": 0.23834633827209473, "learning_rate": 1e-05, "loss": 1.0099, "step": 44200 }, { "epoch": 39.1541186891054, "grad_norm": 0.19447845220565796, "learning_rate": 1e-05, "loss": 0.9367, "step": 44205 }, { "epoch": 39.158547387068204, "grad_norm": 0.3450884222984314, "learning_rate": 1e-05, "loss": 1.0088, "step": 44210 }, { "epoch": 39.162976085031, "grad_norm": 0.22826288640499115, "learning_rate": 1e-05, "loss": 1.028, "step": 44215 }, { "epoch": 39.1674047829938, "grad_norm": 0.2646857798099518, "learning_rate": 1e-05, "loss": 1.0134, "step": 44220 }, { "epoch": 39.1718334809566, "grad_norm": 0.2580079436302185, "learning_rate": 1e-05, "loss": 0.9696, "step": 44225 }, { "epoch": 39.176262178919394, "grad_norm": 0.26738640666007996, "learning_rate": 1e-05, "loss": 1.011, "step": 44230 }, { "epoch": 39.180690876882196, "grad_norm": 0.26614123582839966, "learning_rate": 1e-05, "loss": 0.9714, "step": 44235 }, { "epoch": 39.185119574845, "grad_norm": 0.21896515786647797, "learning_rate": 1e-05, "loss": 0.9505, "step": 44240 }, { "epoch": 39.1895482728078, "grad_norm": 0.20820488035678864, "learning_rate": 1e-05, "loss": 0.9806, "step": 44245 }, { "epoch": 39.19397697077059, "grad_norm": 0.23873235285282135, "learning_rate": 1e-05, "loss": 1.001, "step": 44250 }, { "epoch": 39.19840566873339, "grad_norm": 0.22646278142929077, "learning_rate": 1e-05, "loss": 0.9205, "step": 44255 }, { "epoch": 39.202834366696194, "grad_norm": 0.23012956976890564, "learning_rate": 1e-05, "loss": 1.0508, "step": 44260 }, { "epoch": 39.20726306465899, "grad_norm": 0.23358553647994995, "learning_rate": 1e-05, "loss": 0.9401, "step": 44265 }, { "epoch": 39.21169176262179, "grad_norm": 0.22419176995754242, "learning_rate": 1e-05, "loss": 1.0257, "step": 44270 }, { "epoch": 39.21612046058459, "grad_norm": 0.29865843057632446, "learning_rate": 1e-05, "loss": 1.0029, "step": 44275 }, { "epoch": 39.220549158547385, "grad_norm": 0.23881004750728607, "learning_rate": 1e-05, "loss": 0.9748, "step": 44280 }, { "epoch": 39.224977856510186, "grad_norm": 0.2720155417919159, "learning_rate": 1e-05, "loss": 0.9877, "step": 44285 }, { "epoch": 39.22940655447299, "grad_norm": 0.2511255741119385, "learning_rate": 1e-05, "loss": 0.9799, "step": 44290 }, { "epoch": 39.23383525243578, "grad_norm": 0.24253632128238678, "learning_rate": 1e-05, "loss": 0.956, "step": 44295 }, { "epoch": 39.23826395039858, "grad_norm": 0.2567937970161438, "learning_rate": 1e-05, "loss": 0.9657, "step": 44300 }, { "epoch": 39.242692648361384, "grad_norm": 0.2816894054412842, "learning_rate": 1e-05, "loss": 0.9373, "step": 44305 }, { "epoch": 39.24712134632418, "grad_norm": 0.2455597072839737, "learning_rate": 1e-05, "loss": 0.9809, "step": 44310 }, { "epoch": 39.25155004428698, "grad_norm": 0.22847037017345428, "learning_rate": 1e-05, "loss": 0.9445, "step": 44315 }, { "epoch": 39.25597874224978, "grad_norm": 0.24603794515132904, "learning_rate": 1e-05, "loss": 0.9691, "step": 44320 }, { "epoch": 39.260407440212575, "grad_norm": 0.25504645705223083, "learning_rate": 1e-05, "loss": 0.9277, "step": 44325 }, { "epoch": 39.264836138175376, "grad_norm": 0.24797974526882172, "learning_rate": 1e-05, "loss": 1.0007, "step": 44330 }, { "epoch": 39.26926483613818, "grad_norm": 0.2609892785549164, "learning_rate": 1e-05, "loss": 1.0114, "step": 44335 }, { "epoch": 39.27369353410097, "grad_norm": 0.24699504673480988, "learning_rate": 1e-05, "loss": 0.9766, "step": 44340 }, { "epoch": 39.27812223206377, "grad_norm": 0.17874492704868317, "learning_rate": 1e-05, "loss": 0.9612, "step": 44345 }, { "epoch": 39.282550930026574, "grad_norm": 0.25838321447372437, "learning_rate": 1e-05, "loss": 0.9554, "step": 44350 }, { "epoch": 39.28697962798937, "grad_norm": 0.2813611328601837, "learning_rate": 1e-05, "loss": 0.9627, "step": 44355 }, { "epoch": 39.29140832595217, "grad_norm": 0.24154804646968842, "learning_rate": 1e-05, "loss": 0.9718, "step": 44360 }, { "epoch": 39.29583702391497, "grad_norm": 0.2507898211479187, "learning_rate": 1e-05, "loss": 1.0197, "step": 44365 }, { "epoch": 39.30026572187777, "grad_norm": 0.24096839129924774, "learning_rate": 1e-05, "loss": 0.9593, "step": 44370 }, { "epoch": 39.304694419840565, "grad_norm": 0.18299080431461334, "learning_rate": 1e-05, "loss": 1.0084, "step": 44375 }, { "epoch": 39.30912311780337, "grad_norm": 0.2190800905227661, "learning_rate": 1e-05, "loss": 0.9904, "step": 44380 }, { "epoch": 39.31355181576617, "grad_norm": 0.23172251880168915, "learning_rate": 1e-05, "loss": 0.9552, "step": 44385 }, { "epoch": 39.31798051372896, "grad_norm": 0.21666519343852997, "learning_rate": 1e-05, "loss": 0.9836, "step": 44390 }, { "epoch": 39.32240921169176, "grad_norm": 0.22334641218185425, "learning_rate": 1e-05, "loss": 0.9983, "step": 44395 }, { "epoch": 39.326837909654564, "grad_norm": 0.2638227045536041, "learning_rate": 1e-05, "loss": 0.972, "step": 44400 }, { "epoch": 39.33126660761736, "grad_norm": 0.23349282145500183, "learning_rate": 1e-05, "loss": 0.9822, "step": 44405 }, { "epoch": 39.33569530558016, "grad_norm": 0.24016672372817993, "learning_rate": 1e-05, "loss": 0.9785, "step": 44410 }, { "epoch": 39.34012400354296, "grad_norm": 0.23537923395633698, "learning_rate": 1e-05, "loss": 0.9533, "step": 44415 }, { "epoch": 39.344552701505755, "grad_norm": 0.3087572157382965, "learning_rate": 1e-05, "loss": 1.0216, "step": 44420 }, { "epoch": 39.348981399468556, "grad_norm": 0.28580746054649353, "learning_rate": 1e-05, "loss": 0.9798, "step": 44425 }, { "epoch": 39.35341009743136, "grad_norm": 0.22272680699825287, "learning_rate": 1e-05, "loss": 1.0028, "step": 44430 }, { "epoch": 39.35783879539415, "grad_norm": 0.2559676170349121, "learning_rate": 1e-05, "loss": 0.9856, "step": 44435 }, { "epoch": 39.36226749335695, "grad_norm": 0.23016200959682465, "learning_rate": 1e-05, "loss": 0.9639, "step": 44440 }, { "epoch": 39.366696191319754, "grad_norm": 0.25372010469436646, "learning_rate": 1e-05, "loss": 1.0639, "step": 44445 }, { "epoch": 39.37112488928255, "grad_norm": 0.25866615772247314, "learning_rate": 1e-05, "loss": 0.9657, "step": 44450 }, { "epoch": 39.37555358724535, "grad_norm": 0.21047712862491608, "learning_rate": 1e-05, "loss": 0.9781, "step": 44455 }, { "epoch": 39.37998228520815, "grad_norm": 0.2168559730052948, "learning_rate": 1e-05, "loss": 1.0129, "step": 44460 }, { "epoch": 39.384410983170945, "grad_norm": 0.21960103511810303, "learning_rate": 1e-05, "loss": 0.9376, "step": 44465 }, { "epoch": 39.388839681133746, "grad_norm": 0.2648416757583618, "learning_rate": 1e-05, "loss": 0.9795, "step": 44470 }, { "epoch": 39.39326837909655, "grad_norm": 0.24154886603355408, "learning_rate": 1e-05, "loss": 0.9725, "step": 44475 }, { "epoch": 39.39769707705934, "grad_norm": 0.22506415843963623, "learning_rate": 1e-05, "loss": 0.9077, "step": 44480 }, { "epoch": 39.40212577502214, "grad_norm": 0.21510253846645355, "learning_rate": 1e-05, "loss": 0.9802, "step": 44485 }, { "epoch": 39.40655447298494, "grad_norm": 0.24056263267993927, "learning_rate": 1e-05, "loss": 0.9704, "step": 44490 }, { "epoch": 39.410983170947745, "grad_norm": 0.2314099222421646, "learning_rate": 1e-05, "loss": 0.9827, "step": 44495 }, { "epoch": 39.41541186891054, "grad_norm": 0.2339179664850235, "learning_rate": 1e-05, "loss": 1.007, "step": 44500 }, { "epoch": 39.41984056687334, "grad_norm": 0.21904712915420532, "learning_rate": 1e-05, "loss": 0.9786, "step": 44505 }, { "epoch": 39.42426926483614, "grad_norm": 0.24298132956027985, "learning_rate": 1e-05, "loss": 1.016, "step": 44510 }, { "epoch": 39.428697962798935, "grad_norm": 0.2730855643749237, "learning_rate": 1e-05, "loss": 0.9692, "step": 44515 }, { "epoch": 39.43312666076174, "grad_norm": 0.276470422744751, "learning_rate": 1e-05, "loss": 0.9864, "step": 44520 }, { "epoch": 39.43755535872454, "grad_norm": 0.2324453443288803, "learning_rate": 1e-05, "loss": 0.9905, "step": 44525 }, { "epoch": 39.44198405668733, "grad_norm": 0.27155014872550964, "learning_rate": 1e-05, "loss": 0.9481, "step": 44530 }, { "epoch": 39.44641275465013, "grad_norm": 0.20579677820205688, "learning_rate": 1e-05, "loss": 0.9673, "step": 44535 }, { "epoch": 39.450841452612934, "grad_norm": 0.27589836716651917, "learning_rate": 1e-05, "loss": 0.9787, "step": 44540 }, { "epoch": 39.45527015057573, "grad_norm": 0.25818344950675964, "learning_rate": 1e-05, "loss": 0.9899, "step": 44545 }, { "epoch": 39.45969884853853, "grad_norm": 0.2616889774799347, "learning_rate": 1e-05, "loss": 0.9533, "step": 44550 }, { "epoch": 39.46412754650133, "grad_norm": 0.261217325925827, "learning_rate": 1e-05, "loss": 0.965, "step": 44555 }, { "epoch": 39.468556244464125, "grad_norm": 0.2635051906108856, "learning_rate": 1e-05, "loss": 0.9426, "step": 44560 }, { "epoch": 39.472984942426926, "grad_norm": 0.24376420676708221, "learning_rate": 1e-05, "loss": 0.9493, "step": 44565 }, { "epoch": 39.47741364038973, "grad_norm": 0.26818716526031494, "learning_rate": 1e-05, "loss": 0.9939, "step": 44570 }, { "epoch": 39.48184233835252, "grad_norm": 0.23902402818202972, "learning_rate": 1e-05, "loss": 0.9354, "step": 44575 }, { "epoch": 39.48627103631532, "grad_norm": 0.26013222336769104, "learning_rate": 1e-05, "loss": 1.0236, "step": 44580 }, { "epoch": 39.490699734278124, "grad_norm": 0.2219470739364624, "learning_rate": 1e-05, "loss": 0.9691, "step": 44585 }, { "epoch": 39.49512843224092, "grad_norm": 0.22296161949634552, "learning_rate": 1e-05, "loss": 0.9713, "step": 44590 }, { "epoch": 39.49955713020372, "grad_norm": 0.28529879450798035, "learning_rate": 1e-05, "loss": 0.9405, "step": 44595 }, { "epoch": 39.50398582816652, "grad_norm": 0.314669132232666, "learning_rate": 1e-05, "loss": 1.0405, "step": 44600 }, { "epoch": 39.508414526129314, "grad_norm": 0.2913729250431061, "learning_rate": 1e-05, "loss": 0.967, "step": 44605 }, { "epoch": 39.512843224092116, "grad_norm": 0.27496740221977234, "learning_rate": 1e-05, "loss": 0.9601, "step": 44610 }, { "epoch": 39.51727192205492, "grad_norm": 0.269618421792984, "learning_rate": 1e-05, "loss": 0.9676, "step": 44615 }, { "epoch": 39.52170062001772, "grad_norm": 0.2604779601097107, "learning_rate": 1e-05, "loss": 1.0021, "step": 44620 }, { "epoch": 39.52612931798051, "grad_norm": 0.23224522173404694, "learning_rate": 1e-05, "loss": 0.9441, "step": 44625 }, { "epoch": 39.53055801594331, "grad_norm": 0.2347531020641327, "learning_rate": 1e-05, "loss": 0.987, "step": 44630 }, { "epoch": 39.534986713906115, "grad_norm": 0.21391569077968597, "learning_rate": 1e-05, "loss": 1.0051, "step": 44635 }, { "epoch": 39.53941541186891, "grad_norm": 0.26430365443229675, "learning_rate": 1e-05, "loss": 0.976, "step": 44640 }, { "epoch": 39.54384410983171, "grad_norm": 0.250061571598053, "learning_rate": 1e-05, "loss": 1.0185, "step": 44645 }, { "epoch": 39.54827280779451, "grad_norm": 0.24326933920383453, "learning_rate": 1e-05, "loss": 0.9757, "step": 44650 }, { "epoch": 39.552701505757305, "grad_norm": 0.23102015256881714, "learning_rate": 1e-05, "loss": 0.9739, "step": 44655 }, { "epoch": 39.55713020372011, "grad_norm": 0.24936458468437195, "learning_rate": 1e-05, "loss": 0.9586, "step": 44660 }, { "epoch": 39.56155890168291, "grad_norm": 0.22950631380081177, "learning_rate": 1e-05, "loss": 0.9755, "step": 44665 }, { "epoch": 39.5659875996457, "grad_norm": 0.25710630416870117, "learning_rate": 1e-05, "loss": 0.9859, "step": 44670 }, { "epoch": 39.5704162976085, "grad_norm": 0.28747037053108215, "learning_rate": 1e-05, "loss": 0.9648, "step": 44675 }, { "epoch": 39.574844995571304, "grad_norm": 0.2632983922958374, "learning_rate": 1e-05, "loss": 0.9656, "step": 44680 }, { "epoch": 39.5792736935341, "grad_norm": 0.2290157526731491, "learning_rate": 1e-05, "loss": 0.974, "step": 44685 }, { "epoch": 39.5837023914969, "grad_norm": 0.24512092769145966, "learning_rate": 1e-05, "loss": 0.9594, "step": 44690 }, { "epoch": 39.5881310894597, "grad_norm": 0.2741369903087616, "learning_rate": 1e-05, "loss": 0.9661, "step": 44695 }, { "epoch": 39.592559787422495, "grad_norm": 0.24561484158039093, "learning_rate": 1e-05, "loss": 0.9458, "step": 44700 }, { "epoch": 39.596988485385296, "grad_norm": 0.24511860311031342, "learning_rate": 1e-05, "loss": 0.9692, "step": 44705 }, { "epoch": 39.6014171833481, "grad_norm": 0.2898678183555603, "learning_rate": 1e-05, "loss": 0.9643, "step": 44710 }, { "epoch": 39.60584588131089, "grad_norm": 0.22866323590278625, "learning_rate": 1e-05, "loss": 0.9789, "step": 44715 }, { "epoch": 39.61027457927369, "grad_norm": 0.2523864209651947, "learning_rate": 1e-05, "loss": 0.9886, "step": 44720 }, { "epoch": 39.614703277236494, "grad_norm": 0.25530171394348145, "learning_rate": 1e-05, "loss": 0.9893, "step": 44725 }, { "epoch": 39.619131975199295, "grad_norm": 0.2613741159439087, "learning_rate": 1e-05, "loss": 0.942, "step": 44730 }, { "epoch": 39.62356067316209, "grad_norm": 0.23648029565811157, "learning_rate": 1e-05, "loss": 0.9269, "step": 44735 }, { "epoch": 39.62798937112489, "grad_norm": 0.22205957770347595, "learning_rate": 1e-05, "loss": 0.903, "step": 44740 }, { "epoch": 39.63241806908769, "grad_norm": 0.228758305311203, "learning_rate": 1e-05, "loss": 0.9846, "step": 44745 }, { "epoch": 39.636846767050486, "grad_norm": 0.22042939066886902, "learning_rate": 1e-05, "loss": 1.0236, "step": 44750 }, { "epoch": 39.64127546501329, "grad_norm": 0.2286497950553894, "learning_rate": 1e-05, "loss": 0.9668, "step": 44755 }, { "epoch": 39.64570416297609, "grad_norm": 0.24070866405963898, "learning_rate": 1e-05, "loss": 0.9291, "step": 44760 }, { "epoch": 39.65013286093888, "grad_norm": 0.26237747073173523, "learning_rate": 1e-05, "loss": 0.9609, "step": 44765 }, { "epoch": 39.65456155890168, "grad_norm": 0.22717924416065216, "learning_rate": 1e-05, "loss": 0.9569, "step": 44770 }, { "epoch": 39.658990256864485, "grad_norm": 0.2244872748851776, "learning_rate": 1e-05, "loss": 0.9957, "step": 44775 }, { "epoch": 39.66341895482728, "grad_norm": 0.2297087013721466, "learning_rate": 1e-05, "loss": 0.9899, "step": 44780 }, { "epoch": 39.66784765279008, "grad_norm": 0.2541778087615967, "learning_rate": 1e-05, "loss": 1.0176, "step": 44785 }, { "epoch": 39.67227635075288, "grad_norm": 0.22266943752765656, "learning_rate": 1e-05, "loss": 0.9584, "step": 44790 }, { "epoch": 39.676705048715675, "grad_norm": 0.1932983547449112, "learning_rate": 1e-05, "loss": 0.9221, "step": 44795 }, { "epoch": 39.681133746678476, "grad_norm": 0.2179950326681137, "learning_rate": 1e-05, "loss": 0.9886, "step": 44800 }, { "epoch": 39.68556244464128, "grad_norm": 0.24509498476982117, "learning_rate": 1e-05, "loss": 0.9295, "step": 44805 }, { "epoch": 39.68999114260407, "grad_norm": 0.24425208568572998, "learning_rate": 1e-05, "loss": 0.9637, "step": 44810 }, { "epoch": 39.69441984056687, "grad_norm": 0.21673190593719482, "learning_rate": 1e-05, "loss": 0.9644, "step": 44815 }, { "epoch": 39.698848538529674, "grad_norm": 0.2017115205526352, "learning_rate": 1e-05, "loss": 0.9479, "step": 44820 }, { "epoch": 39.70327723649247, "grad_norm": 0.28510695695877075, "learning_rate": 1e-05, "loss": 0.9552, "step": 44825 }, { "epoch": 39.70770593445527, "grad_norm": 0.2330552488565445, "learning_rate": 1e-05, "loss": 0.9689, "step": 44830 }, { "epoch": 39.71213463241807, "grad_norm": 0.31918102502822876, "learning_rate": 1e-05, "loss": 1.0037, "step": 44835 }, { "epoch": 39.716563330380865, "grad_norm": 0.27928364276885986, "learning_rate": 1e-05, "loss": 0.9765, "step": 44840 }, { "epoch": 39.720992028343666, "grad_norm": 0.20045199990272522, "learning_rate": 1e-05, "loss": 0.9589, "step": 44845 }, { "epoch": 39.72542072630647, "grad_norm": 0.28553086519241333, "learning_rate": 1e-05, "loss": 0.9964, "step": 44850 }, { "epoch": 39.72984942426926, "grad_norm": 0.26095589995384216, "learning_rate": 1e-05, "loss": 0.9847, "step": 44855 }, { "epoch": 39.73427812223206, "grad_norm": 0.19937722384929657, "learning_rate": 1e-05, "loss": 1.0095, "step": 44860 }, { "epoch": 39.738706820194864, "grad_norm": 0.2984931766986847, "learning_rate": 1e-05, "loss": 0.9689, "step": 44865 }, { "epoch": 39.743135518157665, "grad_norm": 0.22275038063526154, "learning_rate": 1e-05, "loss": 0.9456, "step": 44870 }, { "epoch": 39.74756421612046, "grad_norm": 0.237240731716156, "learning_rate": 1e-05, "loss": 1.0505, "step": 44875 }, { "epoch": 39.75199291408326, "grad_norm": 0.25869351625442505, "learning_rate": 1e-05, "loss": 0.9638, "step": 44880 }, { "epoch": 39.75642161204606, "grad_norm": 0.2375112920999527, "learning_rate": 1e-05, "loss": 1.001, "step": 44885 }, { "epoch": 39.760850310008856, "grad_norm": 0.28122302889823914, "learning_rate": 1e-05, "loss": 0.9643, "step": 44890 }, { "epoch": 39.76527900797166, "grad_norm": 0.2714742422103882, "learning_rate": 1e-05, "loss": 0.9934, "step": 44895 }, { "epoch": 39.76970770593446, "grad_norm": 0.25567108392715454, "learning_rate": 1e-05, "loss": 1.0136, "step": 44900 }, { "epoch": 39.77413640389725, "grad_norm": 0.2884957790374756, "learning_rate": 1e-05, "loss": 1.0103, "step": 44905 }, { "epoch": 39.77856510186005, "grad_norm": 0.20873017609119415, "learning_rate": 1e-05, "loss": 1.0075, "step": 44910 }, { "epoch": 39.782993799822854, "grad_norm": 0.27229395508766174, "learning_rate": 1e-05, "loss": 0.9594, "step": 44915 }, { "epoch": 39.78742249778565, "grad_norm": 0.23368670046329498, "learning_rate": 1e-05, "loss": 0.9542, "step": 44920 }, { "epoch": 39.79185119574845, "grad_norm": 0.28879836201667786, "learning_rate": 1e-05, "loss": 0.9523, "step": 44925 }, { "epoch": 39.79627989371125, "grad_norm": 0.25292420387268066, "learning_rate": 1e-05, "loss": 0.9758, "step": 44930 }, { "epoch": 39.800708591674045, "grad_norm": 0.22302865982055664, "learning_rate": 1e-05, "loss": 0.9775, "step": 44935 }, { "epoch": 39.805137289636846, "grad_norm": 0.22722569108009338, "learning_rate": 1e-05, "loss": 0.9368, "step": 44940 }, { "epoch": 39.80956598759965, "grad_norm": 0.23947083950042725, "learning_rate": 1e-05, "loss": 0.9681, "step": 44945 }, { "epoch": 39.81399468556244, "grad_norm": 0.20041674375534058, "learning_rate": 1e-05, "loss": 1.0202, "step": 44950 }, { "epoch": 39.81842338352524, "grad_norm": 0.23563252389431, "learning_rate": 1e-05, "loss": 0.9664, "step": 44955 }, { "epoch": 39.822852081488044, "grad_norm": 0.2579123377799988, "learning_rate": 1e-05, "loss": 0.9686, "step": 44960 }, { "epoch": 39.82728077945084, "grad_norm": 0.22976310551166534, "learning_rate": 1e-05, "loss": 0.9978, "step": 44965 }, { "epoch": 39.83170947741364, "grad_norm": 0.24820353090763092, "learning_rate": 1e-05, "loss": 1.0031, "step": 44970 }, { "epoch": 39.83613817537644, "grad_norm": 0.25754719972610474, "learning_rate": 1e-05, "loss": 0.9725, "step": 44975 }, { "epoch": 39.84056687333924, "grad_norm": 0.2591226100921631, "learning_rate": 1e-05, "loss": 1.0233, "step": 44980 }, { "epoch": 39.844995571302036, "grad_norm": 0.2403859794139862, "learning_rate": 1e-05, "loss": 0.9988, "step": 44985 }, { "epoch": 39.84942426926484, "grad_norm": 0.23662184178829193, "learning_rate": 1e-05, "loss": 0.9581, "step": 44990 }, { "epoch": 39.85385296722764, "grad_norm": 0.24221083521842957, "learning_rate": 1e-05, "loss": 0.9424, "step": 44995 }, { "epoch": 39.85828166519043, "grad_norm": 0.2532975375652313, "learning_rate": 1e-05, "loss": 0.9808, "step": 45000 }, { "epoch": 39.862710363153234, "grad_norm": 0.27036911249160767, "learning_rate": 1e-05, "loss": 0.9717, "step": 45005 }, { "epoch": 39.867139061116035, "grad_norm": 0.2022770345211029, "learning_rate": 1e-05, "loss": 0.9151, "step": 45010 }, { "epoch": 39.87156775907883, "grad_norm": 0.21985706686973572, "learning_rate": 1e-05, "loss": 0.9686, "step": 45015 }, { "epoch": 39.87599645704163, "grad_norm": 0.22863242030143738, "learning_rate": 1e-05, "loss": 0.9753, "step": 45020 }, { "epoch": 39.88042515500443, "grad_norm": 0.2397555112838745, "learning_rate": 1e-05, "loss": 0.9697, "step": 45025 }, { "epoch": 39.884853852967225, "grad_norm": 0.21645426750183105, "learning_rate": 1e-05, "loss": 0.9929, "step": 45030 }, { "epoch": 39.88928255093003, "grad_norm": 0.2565898597240448, "learning_rate": 1e-05, "loss": 1.0036, "step": 45035 }, { "epoch": 39.89371124889283, "grad_norm": 0.21214188635349274, "learning_rate": 1e-05, "loss": 0.957, "step": 45040 }, { "epoch": 39.89813994685562, "grad_norm": 0.2289351522922516, "learning_rate": 1e-05, "loss": 0.9918, "step": 45045 }, { "epoch": 39.90256864481842, "grad_norm": 0.23046784102916718, "learning_rate": 1e-05, "loss": 0.9453, "step": 45050 }, { "epoch": 39.906997342781224, "grad_norm": 0.20969471335411072, "learning_rate": 1e-05, "loss": 1.01, "step": 45055 }, { "epoch": 39.91142604074402, "grad_norm": 0.21135160326957703, "learning_rate": 1e-05, "loss": 0.9793, "step": 45060 }, { "epoch": 39.91585473870682, "grad_norm": 0.23426280915737152, "learning_rate": 1e-05, "loss": 0.9676, "step": 45065 }, { "epoch": 39.92028343666962, "grad_norm": 0.24642334878444672, "learning_rate": 1e-05, "loss": 0.9665, "step": 45070 }, { "epoch": 39.924712134632415, "grad_norm": 0.23854975402355194, "learning_rate": 1e-05, "loss": 1.036, "step": 45075 }, { "epoch": 39.929140832595216, "grad_norm": 0.24300029873847961, "learning_rate": 1e-05, "loss": 0.9728, "step": 45080 }, { "epoch": 39.93356953055802, "grad_norm": 0.23616068065166473, "learning_rate": 1e-05, "loss": 0.9567, "step": 45085 }, { "epoch": 39.93799822852081, "grad_norm": 0.2391427606344223, "learning_rate": 1e-05, "loss": 0.964, "step": 45090 }, { "epoch": 39.94242692648361, "grad_norm": 0.23161187767982483, "learning_rate": 1e-05, "loss": 0.9791, "step": 45095 }, { "epoch": 39.946855624446414, "grad_norm": 0.23924826085567474, "learning_rate": 1e-05, "loss": 0.9701, "step": 45100 }, { "epoch": 39.951284322409215, "grad_norm": 0.25415587425231934, "learning_rate": 1e-05, "loss": 1.0133, "step": 45105 }, { "epoch": 39.95571302037201, "grad_norm": 0.28035393357276917, "learning_rate": 1e-05, "loss": 0.9612, "step": 45110 }, { "epoch": 39.96014171833481, "grad_norm": 0.22854293882846832, "learning_rate": 1e-05, "loss": 1.0048, "step": 45115 }, { "epoch": 39.96457041629761, "grad_norm": 0.20898598432540894, "learning_rate": 1e-05, "loss": 1.0042, "step": 45120 }, { "epoch": 39.968999114260406, "grad_norm": 0.2618158757686615, "learning_rate": 1e-05, "loss": 0.979, "step": 45125 }, { "epoch": 39.97342781222321, "grad_norm": 0.26269733905792236, "learning_rate": 1e-05, "loss": 1.0246, "step": 45130 }, { "epoch": 39.97785651018601, "grad_norm": 0.269710898399353, "learning_rate": 1e-05, "loss": 0.9841, "step": 45135 }, { "epoch": 39.9822852081488, "grad_norm": 0.2545463740825653, "learning_rate": 1e-05, "loss": 0.9435, "step": 45140 }, { "epoch": 39.9867139061116, "grad_norm": 0.23376058042049408, "learning_rate": 1e-05, "loss": 0.9437, "step": 45145 }, { "epoch": 39.991142604074405, "grad_norm": 0.2609884738922119, "learning_rate": 1e-05, "loss": 0.9928, "step": 45150 }, { "epoch": 39.9955713020372, "grad_norm": 0.22352980077266693, "learning_rate": 1e-05, "loss": 0.9811, "step": 45155 }, { "epoch": 40.0, "grad_norm": 0.25171589851379395, "learning_rate": 1e-05, "loss": 0.9763, "step": 45160 }, { "epoch": 40.0044286979628, "grad_norm": 0.27114593982696533, "learning_rate": 1e-05, "loss": 0.9814, "step": 45165 }, { "epoch": 40.008857395925595, "grad_norm": 0.27931538224220276, "learning_rate": 1e-05, "loss": 1.0052, "step": 45170 }, { "epoch": 40.0132860938884, "grad_norm": 0.2411375492811203, "learning_rate": 1e-05, "loss": 0.9836, "step": 45175 }, { "epoch": 40.0177147918512, "grad_norm": 0.2151385247707367, "learning_rate": 1e-05, "loss": 0.9305, "step": 45180 }, { "epoch": 40.02214348981399, "grad_norm": 0.23496542870998383, "learning_rate": 1e-05, "loss": 0.9703, "step": 45185 }, { "epoch": 40.02657218777679, "grad_norm": 0.22750380635261536, "learning_rate": 1e-05, "loss": 0.9721, "step": 45190 }, { "epoch": 40.031000885739594, "grad_norm": 0.2113644927740097, "learning_rate": 1e-05, "loss": 0.9577, "step": 45195 }, { "epoch": 40.03542958370239, "grad_norm": 0.25214388966560364, "learning_rate": 1e-05, "loss": 0.9867, "step": 45200 }, { "epoch": 40.03985828166519, "grad_norm": 0.22801996767520905, "learning_rate": 1e-05, "loss": 0.9973, "step": 45205 }, { "epoch": 40.04428697962799, "grad_norm": 0.20237503945827484, "learning_rate": 1e-05, "loss": 0.9818, "step": 45210 }, { "epoch": 40.048715677590785, "grad_norm": 0.24248719215393066, "learning_rate": 1e-05, "loss": 1.0204, "step": 45215 }, { "epoch": 40.053144375553586, "grad_norm": 0.21527644991874695, "learning_rate": 1e-05, "loss": 1.0119, "step": 45220 }, { "epoch": 40.05757307351639, "grad_norm": 0.22583015263080597, "learning_rate": 1e-05, "loss": 1.0002, "step": 45225 }, { "epoch": 40.06200177147919, "grad_norm": 0.24841605126857758, "learning_rate": 1e-05, "loss": 0.9452, "step": 45230 }, { "epoch": 40.06643046944198, "grad_norm": 0.23951411247253418, "learning_rate": 1e-05, "loss": 0.99, "step": 45235 }, { "epoch": 40.070859167404784, "grad_norm": 0.27444639801979065, "learning_rate": 1e-05, "loss": 0.9861, "step": 45240 }, { "epoch": 40.075287865367585, "grad_norm": 0.2693447768688202, "learning_rate": 1e-05, "loss": 0.9686, "step": 45245 }, { "epoch": 40.07971656333038, "grad_norm": 0.226759672164917, "learning_rate": 1e-05, "loss": 0.9638, "step": 45250 }, { "epoch": 40.08414526129318, "grad_norm": 0.2614608108997345, "learning_rate": 1e-05, "loss": 0.9712, "step": 45255 }, { "epoch": 40.08857395925598, "grad_norm": 0.27981653809547424, "learning_rate": 1e-05, "loss": 0.9565, "step": 45260 }, { "epoch": 40.093002657218776, "grad_norm": 0.24565313756465912, "learning_rate": 1e-05, "loss": 0.9448, "step": 45265 }, { "epoch": 40.09743135518158, "grad_norm": 0.24127835035324097, "learning_rate": 1e-05, "loss": 0.9428, "step": 45270 }, { "epoch": 40.10186005314438, "grad_norm": 0.25702059268951416, "learning_rate": 1e-05, "loss": 0.9445, "step": 45275 }, { "epoch": 40.10628875110717, "grad_norm": 0.21388256549835205, "learning_rate": 1e-05, "loss": 0.9748, "step": 45280 }, { "epoch": 40.11071744906997, "grad_norm": 0.26808103919029236, "learning_rate": 1e-05, "loss": 0.9953, "step": 45285 }, { "epoch": 40.115146147032775, "grad_norm": 0.2567979395389557, "learning_rate": 1e-05, "loss": 0.9941, "step": 45290 }, { "epoch": 40.11957484499557, "grad_norm": 0.25704577565193176, "learning_rate": 1e-05, "loss": 1.0309, "step": 45295 }, { "epoch": 40.12400354295837, "grad_norm": 0.25858715176582336, "learning_rate": 1e-05, "loss": 0.9821, "step": 45300 }, { "epoch": 40.12843224092117, "grad_norm": 0.245374396443367, "learning_rate": 1e-05, "loss": 0.976, "step": 45305 }, { "epoch": 40.132860938883965, "grad_norm": 0.27113077044487, "learning_rate": 1e-05, "loss": 0.965, "step": 45310 }, { "epoch": 40.137289636846766, "grad_norm": 0.299653023481369, "learning_rate": 1e-05, "loss": 0.9638, "step": 45315 }, { "epoch": 40.14171833480957, "grad_norm": 0.2712441086769104, "learning_rate": 1e-05, "loss": 1.0347, "step": 45320 }, { "epoch": 40.14614703277236, "grad_norm": 0.2394365668296814, "learning_rate": 1e-05, "loss": 0.9956, "step": 45325 }, { "epoch": 40.15057573073516, "grad_norm": 0.24231483042240143, "learning_rate": 1e-05, "loss": 1.0075, "step": 45330 }, { "epoch": 40.155004428697964, "grad_norm": 0.2390504628419876, "learning_rate": 1e-05, "loss": 0.9401, "step": 45335 }, { "epoch": 40.15943312666076, "grad_norm": 0.22679764032363892, "learning_rate": 1e-05, "loss": 1.0093, "step": 45340 }, { "epoch": 40.16386182462356, "grad_norm": 0.230784609913826, "learning_rate": 1e-05, "loss": 0.9431, "step": 45345 }, { "epoch": 40.16829052258636, "grad_norm": 0.2817385792732239, "learning_rate": 1e-05, "loss": 0.9994, "step": 45350 }, { "epoch": 40.17271922054916, "grad_norm": 0.24811577796936035, "learning_rate": 1e-05, "loss": 0.988, "step": 45355 }, { "epoch": 40.177147918511956, "grad_norm": 0.2963844835758209, "learning_rate": 1e-05, "loss": 0.9246, "step": 45360 }, { "epoch": 40.18157661647476, "grad_norm": 0.2743324935436249, "learning_rate": 1e-05, "loss": 0.9698, "step": 45365 }, { "epoch": 40.18600531443756, "grad_norm": 0.2275199294090271, "learning_rate": 1e-05, "loss": 1.0166, "step": 45370 }, { "epoch": 40.19043401240035, "grad_norm": 0.3600218892097473, "learning_rate": 1e-05, "loss": 0.9564, "step": 45375 }, { "epoch": 40.194862710363154, "grad_norm": 0.2674665153026581, "learning_rate": 1e-05, "loss": 0.9072, "step": 45380 }, { "epoch": 40.199291408325955, "grad_norm": 0.2581815719604492, "learning_rate": 1e-05, "loss": 0.9829, "step": 45385 }, { "epoch": 40.20372010628875, "grad_norm": 0.24808089435100555, "learning_rate": 1e-05, "loss": 0.94, "step": 45390 }, { "epoch": 40.20814880425155, "grad_norm": 0.2820883095264435, "learning_rate": 1e-05, "loss": 0.9665, "step": 45395 }, { "epoch": 40.21257750221435, "grad_norm": 0.25388291478157043, "learning_rate": 1e-05, "loss": 0.9533, "step": 45400 }, { "epoch": 40.217006200177146, "grad_norm": 0.2618962228298187, "learning_rate": 1e-05, "loss": 0.9692, "step": 45405 }, { "epoch": 40.22143489813995, "grad_norm": 0.25588858127593994, "learning_rate": 1e-05, "loss": 0.951, "step": 45410 }, { "epoch": 40.22586359610275, "grad_norm": 0.23774223029613495, "learning_rate": 1e-05, "loss": 0.959, "step": 45415 }, { "epoch": 40.23029229406554, "grad_norm": 0.29644522070884705, "learning_rate": 1e-05, "loss": 0.9451, "step": 45420 }, { "epoch": 40.23472099202834, "grad_norm": 0.24150510132312775, "learning_rate": 1e-05, "loss": 0.9991, "step": 45425 }, { "epoch": 40.239149689991144, "grad_norm": 0.23090475797653198, "learning_rate": 1e-05, "loss": 0.9813, "step": 45430 }, { "epoch": 40.24357838795394, "grad_norm": 0.22343404591083527, "learning_rate": 1e-05, "loss": 0.9918, "step": 45435 }, { "epoch": 40.24800708591674, "grad_norm": 0.21493154764175415, "learning_rate": 1e-05, "loss": 0.9667, "step": 45440 }, { "epoch": 40.25243578387954, "grad_norm": 0.26069170236587524, "learning_rate": 1e-05, "loss": 0.9279, "step": 45445 }, { "epoch": 40.256864481842335, "grad_norm": 0.24710218608379364, "learning_rate": 1e-05, "loss": 0.9363, "step": 45450 }, { "epoch": 40.261293179805136, "grad_norm": 0.25690266489982605, "learning_rate": 1e-05, "loss": 0.9382, "step": 45455 }, { "epoch": 40.26572187776794, "grad_norm": 0.23312783241271973, "learning_rate": 1e-05, "loss": 1.016, "step": 45460 }, { "epoch": 40.27015057573073, "grad_norm": 0.27539923787117004, "learning_rate": 1e-05, "loss": 0.9616, "step": 45465 }, { "epoch": 40.27457927369353, "grad_norm": 0.20210836827754974, "learning_rate": 1e-05, "loss": 0.955, "step": 45470 }, { "epoch": 40.279007971656334, "grad_norm": 0.26728883385658264, "learning_rate": 1e-05, "loss": 0.9483, "step": 45475 }, { "epoch": 40.283436669619135, "grad_norm": 0.2526356279850006, "learning_rate": 1e-05, "loss": 1.0231, "step": 45480 }, { "epoch": 40.28786536758193, "grad_norm": 0.23037558794021606, "learning_rate": 1e-05, "loss": 0.9614, "step": 45485 }, { "epoch": 40.29229406554473, "grad_norm": 0.2466672956943512, "learning_rate": 1e-05, "loss": 0.9786, "step": 45490 }, { "epoch": 40.29672276350753, "grad_norm": 0.22266316413879395, "learning_rate": 1e-05, "loss": 1.039, "step": 45495 }, { "epoch": 40.301151461470326, "grad_norm": 0.24533483386039734, "learning_rate": 1e-05, "loss": 0.9461, "step": 45500 }, { "epoch": 40.30558015943313, "grad_norm": 0.2344052493572235, "learning_rate": 1e-05, "loss": 0.9627, "step": 45505 }, { "epoch": 40.31000885739593, "grad_norm": 0.24981491267681122, "learning_rate": 1e-05, "loss": 0.9911, "step": 45510 }, { "epoch": 40.31443755535872, "grad_norm": 0.23269736766815186, "learning_rate": 1e-05, "loss": 0.9805, "step": 45515 }, { "epoch": 40.318866253321524, "grad_norm": 0.22463656961917877, "learning_rate": 1e-05, "loss": 1.0022, "step": 45520 }, { "epoch": 40.323294951284325, "grad_norm": 0.2597326636314392, "learning_rate": 1e-05, "loss": 0.9238, "step": 45525 }, { "epoch": 40.32772364924712, "grad_norm": 0.23905695974826813, "learning_rate": 1e-05, "loss": 0.9697, "step": 45530 }, { "epoch": 40.33215234720992, "grad_norm": 0.22376959025859833, "learning_rate": 1e-05, "loss": 1.0595, "step": 45535 }, { "epoch": 40.33658104517272, "grad_norm": 0.28511202335357666, "learning_rate": 1e-05, "loss": 0.9117, "step": 45540 }, { "epoch": 40.341009743135515, "grad_norm": 0.2111230343580246, "learning_rate": 1e-05, "loss": 0.9589, "step": 45545 }, { "epoch": 40.34543844109832, "grad_norm": 0.23622570931911469, "learning_rate": 1e-05, "loss": 0.986, "step": 45550 }, { "epoch": 40.34986713906112, "grad_norm": 0.257079154253006, "learning_rate": 1e-05, "loss": 1.0077, "step": 45555 }, { "epoch": 40.35429583702391, "grad_norm": 0.25046560168266296, "learning_rate": 1e-05, "loss": 0.9917, "step": 45560 }, { "epoch": 40.35872453498671, "grad_norm": 0.24342671036720276, "learning_rate": 1e-05, "loss": 0.9656, "step": 45565 }, { "epoch": 40.363153232949514, "grad_norm": 0.24034452438354492, "learning_rate": 1e-05, "loss": 0.9821, "step": 45570 }, { "epoch": 40.36758193091231, "grad_norm": 0.22056357562541962, "learning_rate": 1e-05, "loss": 0.988, "step": 45575 }, { "epoch": 40.37201062887511, "grad_norm": 0.26125818490982056, "learning_rate": 1e-05, "loss": 0.967, "step": 45580 }, { "epoch": 40.37643932683791, "grad_norm": 0.23894613981246948, "learning_rate": 1e-05, "loss": 1.0033, "step": 45585 }, { "epoch": 40.380868024800705, "grad_norm": 0.3081366717815399, "learning_rate": 1e-05, "loss": 1.0554, "step": 45590 }, { "epoch": 40.385296722763506, "grad_norm": 0.299436092376709, "learning_rate": 1e-05, "loss": 0.9823, "step": 45595 }, { "epoch": 40.38972542072631, "grad_norm": 0.23816809058189392, "learning_rate": 1e-05, "loss": 0.9162, "step": 45600 }, { "epoch": 40.39415411868911, "grad_norm": 0.262315034866333, "learning_rate": 1e-05, "loss": 0.9951, "step": 45605 }, { "epoch": 40.3985828166519, "grad_norm": 0.24232378602027893, "learning_rate": 1e-05, "loss": 1.0427, "step": 45610 }, { "epoch": 40.403011514614704, "grad_norm": 0.21910181641578674, "learning_rate": 1e-05, "loss": 0.9544, "step": 45615 }, { "epoch": 40.407440212577505, "grad_norm": 0.2129480093717575, "learning_rate": 1e-05, "loss": 0.8943, "step": 45620 }, { "epoch": 40.4118689105403, "grad_norm": 0.23968534171581268, "learning_rate": 1e-05, "loss": 0.9541, "step": 45625 }, { "epoch": 40.4162976085031, "grad_norm": 0.24089445173740387, "learning_rate": 1e-05, "loss": 0.9753, "step": 45630 }, { "epoch": 40.4207263064659, "grad_norm": 0.2468075007200241, "learning_rate": 1e-05, "loss": 0.9348, "step": 45635 }, { "epoch": 40.425155004428696, "grad_norm": 0.2679606080055237, "learning_rate": 1e-05, "loss": 1.0112, "step": 45640 }, { "epoch": 40.4295837023915, "grad_norm": 0.23917730152606964, "learning_rate": 1e-05, "loss": 0.9963, "step": 45645 }, { "epoch": 40.4340124003543, "grad_norm": 0.24306005239486694, "learning_rate": 1e-05, "loss": 0.9481, "step": 45650 }, { "epoch": 40.43844109831709, "grad_norm": 0.20310689508914948, "learning_rate": 1e-05, "loss": 0.9238, "step": 45655 }, { "epoch": 40.44286979627989, "grad_norm": 0.230911985039711, "learning_rate": 1e-05, "loss": 0.9443, "step": 45660 }, { "epoch": 40.447298494242695, "grad_norm": 0.2517818510532379, "learning_rate": 1e-05, "loss": 0.9905, "step": 45665 }, { "epoch": 40.45172719220549, "grad_norm": 0.236632838845253, "learning_rate": 1e-05, "loss": 0.9762, "step": 45670 }, { "epoch": 40.45615589016829, "grad_norm": 0.24472634494304657, "learning_rate": 1e-05, "loss": 0.9779, "step": 45675 }, { "epoch": 40.46058458813109, "grad_norm": 0.26011422276496887, "learning_rate": 1e-05, "loss": 0.924, "step": 45680 }, { "epoch": 40.465013286093885, "grad_norm": 0.22335882484912872, "learning_rate": 1e-05, "loss": 1.0818, "step": 45685 }, { "epoch": 40.46944198405669, "grad_norm": 0.22788962721824646, "learning_rate": 1e-05, "loss": 1.0175, "step": 45690 }, { "epoch": 40.47387068201949, "grad_norm": 0.211948961019516, "learning_rate": 1e-05, "loss": 1.0448, "step": 45695 }, { "epoch": 40.47829937998228, "grad_norm": 0.23885637521743774, "learning_rate": 1e-05, "loss": 0.9751, "step": 45700 }, { "epoch": 40.48272807794508, "grad_norm": 0.20586352050304413, "learning_rate": 1e-05, "loss": 0.9713, "step": 45705 }, { "epoch": 40.487156775907884, "grad_norm": 0.2192770391702652, "learning_rate": 1e-05, "loss": 0.9508, "step": 45710 }, { "epoch": 40.491585473870686, "grad_norm": 0.22180071473121643, "learning_rate": 1e-05, "loss": 0.9892, "step": 45715 }, { "epoch": 40.49601417183348, "grad_norm": 0.2147900015115738, "learning_rate": 1e-05, "loss": 0.9232, "step": 45720 }, { "epoch": 40.50044286979628, "grad_norm": 0.2357667237520218, "learning_rate": 1e-05, "loss": 1.0306, "step": 45725 }, { "epoch": 40.50487156775908, "grad_norm": 0.27332115173339844, "learning_rate": 1e-05, "loss": 1.0309, "step": 45730 }, { "epoch": 40.509300265721876, "grad_norm": 0.23799879848957062, "learning_rate": 1e-05, "loss": 0.9416, "step": 45735 }, { "epoch": 40.51372896368468, "grad_norm": 0.24116341769695282, "learning_rate": 1e-05, "loss": 1.0344, "step": 45740 }, { "epoch": 40.51815766164748, "grad_norm": 0.2594386041164398, "learning_rate": 1e-05, "loss": 0.9514, "step": 45745 }, { "epoch": 40.52258635961027, "grad_norm": 0.22958551347255707, "learning_rate": 1e-05, "loss": 0.988, "step": 45750 }, { "epoch": 40.527015057573074, "grad_norm": 0.24454988539218903, "learning_rate": 1e-05, "loss": 0.9674, "step": 45755 }, { "epoch": 40.531443755535875, "grad_norm": 0.2570692002773285, "learning_rate": 1e-05, "loss": 0.9418, "step": 45760 }, { "epoch": 40.53587245349867, "grad_norm": 0.21165458858013153, "learning_rate": 1e-05, "loss": 0.9345, "step": 45765 }, { "epoch": 40.54030115146147, "grad_norm": 0.27606502175331116, "learning_rate": 1e-05, "loss": 0.9445, "step": 45770 }, { "epoch": 40.54472984942427, "grad_norm": 0.19953720271587372, "learning_rate": 1e-05, "loss": 0.9321, "step": 45775 }, { "epoch": 40.549158547387066, "grad_norm": 0.23468749225139618, "learning_rate": 1e-05, "loss": 0.9884, "step": 45780 }, { "epoch": 40.55358724534987, "grad_norm": 0.22528351843357086, "learning_rate": 1e-05, "loss": 0.9291, "step": 45785 }, { "epoch": 40.55801594331267, "grad_norm": 0.20409393310546875, "learning_rate": 1e-05, "loss": 0.9801, "step": 45790 }, { "epoch": 40.56244464127546, "grad_norm": 0.3014966547489166, "learning_rate": 1e-05, "loss": 0.9883, "step": 45795 }, { "epoch": 40.56687333923826, "grad_norm": 0.26219120621681213, "learning_rate": 1e-05, "loss": 1.0427, "step": 45800 }, { "epoch": 40.571302037201065, "grad_norm": 0.3385133147239685, "learning_rate": 1e-05, "loss": 0.938, "step": 45805 }, { "epoch": 40.57573073516386, "grad_norm": 0.22322909533977509, "learning_rate": 1e-05, "loss": 1.0274, "step": 45810 }, { "epoch": 40.58015943312666, "grad_norm": 3.132784605026245, "learning_rate": 1e-05, "loss": 1.0196, "step": 45815 }, { "epoch": 40.58458813108946, "grad_norm": 0.26024380326271057, "learning_rate": 1e-05, "loss": 0.9282, "step": 45820 }, { "epoch": 40.589016829052255, "grad_norm": 0.24028509855270386, "learning_rate": 1e-05, "loss": 0.9329, "step": 45825 }, { "epoch": 40.59344552701506, "grad_norm": 0.2672039270401001, "learning_rate": 1e-05, "loss": 1.012, "step": 45830 }, { "epoch": 40.59787422497786, "grad_norm": 0.30118849873542786, "learning_rate": 1e-05, "loss": 0.9543, "step": 45835 }, { "epoch": 40.60230292294066, "grad_norm": 0.24499301612377167, "learning_rate": 1e-05, "loss": 0.9557, "step": 45840 }, { "epoch": 40.60673162090345, "grad_norm": 0.2375156283378601, "learning_rate": 1e-05, "loss": 0.9702, "step": 45845 }, { "epoch": 40.611160318866254, "grad_norm": 0.20132434368133545, "learning_rate": 1e-05, "loss": 0.9659, "step": 45850 }, { "epoch": 40.615589016829055, "grad_norm": 0.2787694036960602, "learning_rate": 1e-05, "loss": 0.9984, "step": 45855 }, { "epoch": 40.62001771479185, "grad_norm": 0.2578490972518921, "learning_rate": 1e-05, "loss": 0.9218, "step": 45860 }, { "epoch": 40.62444641275465, "grad_norm": 0.26339927315711975, "learning_rate": 1e-05, "loss": 0.9816, "step": 45865 }, { "epoch": 40.62887511071745, "grad_norm": 0.23578201234340668, "learning_rate": 1e-05, "loss": 0.9769, "step": 45870 }, { "epoch": 40.633303808680246, "grad_norm": 0.25542744994163513, "learning_rate": 1e-05, "loss": 0.9144, "step": 45875 }, { "epoch": 40.63773250664305, "grad_norm": 0.2786359488964081, "learning_rate": 1e-05, "loss": 1.0166, "step": 45880 }, { "epoch": 40.64216120460585, "grad_norm": 0.2705237865447998, "learning_rate": 1e-05, "loss": 0.9986, "step": 45885 }, { "epoch": 40.64658990256864, "grad_norm": 0.2309940904378891, "learning_rate": 1e-05, "loss": 0.9455, "step": 45890 }, { "epoch": 40.651018600531444, "grad_norm": 0.2568571865558624, "learning_rate": 1e-05, "loss": 1.003, "step": 45895 }, { "epoch": 40.655447298494245, "grad_norm": 0.20694012939929962, "learning_rate": 1e-05, "loss": 0.9767, "step": 45900 }, { "epoch": 40.65987599645704, "grad_norm": 0.259566992521286, "learning_rate": 1e-05, "loss": 0.9582, "step": 45905 }, { "epoch": 40.66430469441984, "grad_norm": 0.27784380316734314, "learning_rate": 1e-05, "loss": 0.9839, "step": 45910 }, { "epoch": 40.66873339238264, "grad_norm": 0.2501930892467499, "learning_rate": 1e-05, "loss": 0.927, "step": 45915 }, { "epoch": 40.673162090345436, "grad_norm": 0.22544093430042267, "learning_rate": 1e-05, "loss": 1.0296, "step": 45920 }, { "epoch": 40.67759078830824, "grad_norm": 0.3154388666152954, "learning_rate": 1e-05, "loss": 1.0132, "step": 45925 }, { "epoch": 40.68201948627104, "grad_norm": 0.20174498856067657, "learning_rate": 1e-05, "loss": 0.9315, "step": 45930 }, { "epoch": 40.68644818423383, "grad_norm": 0.24484170973300934, "learning_rate": 1e-05, "loss": 0.9574, "step": 45935 }, { "epoch": 40.69087688219663, "grad_norm": 0.22526822984218597, "learning_rate": 1e-05, "loss": 0.9502, "step": 45940 }, { "epoch": 40.695305580159435, "grad_norm": 0.2262207567691803, "learning_rate": 1e-05, "loss": 0.9969, "step": 45945 }, { "epoch": 40.69973427812223, "grad_norm": 0.2243526428937912, "learning_rate": 1e-05, "loss": 0.9902, "step": 45950 }, { "epoch": 40.70416297608503, "grad_norm": 0.2703973650932312, "learning_rate": 1e-05, "loss": 0.9652, "step": 45955 }, { "epoch": 40.70859167404783, "grad_norm": 0.2283085584640503, "learning_rate": 1e-05, "loss": 0.9617, "step": 45960 }, { "epoch": 40.71302037201063, "grad_norm": 0.2553483247756958, "learning_rate": 1e-05, "loss": 1.0253, "step": 45965 }, { "epoch": 40.717449069973426, "grad_norm": 0.24605821073055267, "learning_rate": 1e-05, "loss": 0.9065, "step": 45970 }, { "epoch": 40.72187776793623, "grad_norm": 0.21523445844650269, "learning_rate": 1e-05, "loss": 0.926, "step": 45975 }, { "epoch": 40.72630646589903, "grad_norm": 0.2937183678150177, "learning_rate": 1e-05, "loss": 0.9436, "step": 45980 }, { "epoch": 40.73073516386182, "grad_norm": 0.2621420621871948, "learning_rate": 1e-05, "loss": 0.9552, "step": 45985 }, { "epoch": 40.735163861824624, "grad_norm": 0.23658692836761475, "learning_rate": 1e-05, "loss": 0.9736, "step": 45990 }, { "epoch": 40.739592559787425, "grad_norm": 0.2258015275001526, "learning_rate": 1e-05, "loss": 1.0693, "step": 45995 }, { "epoch": 40.74402125775022, "grad_norm": 0.24768316745758057, "learning_rate": 1e-05, "loss": 0.9458, "step": 46000 }, { "epoch": 40.74844995571302, "grad_norm": 0.19924908876419067, "learning_rate": 1e-05, "loss": 0.9512, "step": 46005 }, { "epoch": 40.75287865367582, "grad_norm": 0.28765299916267395, "learning_rate": 1e-05, "loss": 0.9737, "step": 46010 }, { "epoch": 40.757307351638616, "grad_norm": 0.26688098907470703, "learning_rate": 1e-05, "loss": 0.9924, "step": 46015 }, { "epoch": 40.76173604960142, "grad_norm": 0.2504536807537079, "learning_rate": 1e-05, "loss": 0.9916, "step": 46020 }, { "epoch": 40.76616474756422, "grad_norm": 0.25557008385658264, "learning_rate": 1e-05, "loss": 1.0221, "step": 46025 }, { "epoch": 40.77059344552701, "grad_norm": 0.25966283679008484, "learning_rate": 1e-05, "loss": 1.0075, "step": 46030 }, { "epoch": 40.775022143489814, "grad_norm": 0.2781597077846527, "learning_rate": 1e-05, "loss": 0.9508, "step": 46035 }, { "epoch": 40.779450841452615, "grad_norm": 0.28727951645851135, "learning_rate": 1e-05, "loss": 0.9428, "step": 46040 }, { "epoch": 40.78387953941541, "grad_norm": 0.22551856935024261, "learning_rate": 1e-05, "loss": 0.9387, "step": 46045 }, { "epoch": 40.78830823737821, "grad_norm": 0.19982650876045227, "learning_rate": 1e-05, "loss": 0.9267, "step": 46050 }, { "epoch": 40.79273693534101, "grad_norm": 0.2332746833562851, "learning_rate": 1e-05, "loss": 0.979, "step": 46055 }, { "epoch": 40.797165633303806, "grad_norm": 0.2241213172674179, "learning_rate": 1e-05, "loss": 0.9871, "step": 46060 }, { "epoch": 40.80159433126661, "grad_norm": 0.24330177903175354, "learning_rate": 1e-05, "loss": 0.9156, "step": 46065 }, { "epoch": 40.80602302922941, "grad_norm": 0.2048812061548233, "learning_rate": 1e-05, "loss": 0.9781, "step": 46070 }, { "epoch": 40.8104517271922, "grad_norm": 0.23944677412509918, "learning_rate": 1e-05, "loss": 0.9786, "step": 46075 }, { "epoch": 40.814880425155, "grad_norm": 0.2650429308414459, "learning_rate": 1e-05, "loss": 1.0223, "step": 46080 }, { "epoch": 40.819309123117804, "grad_norm": 0.2534618079662323, "learning_rate": 1e-05, "loss": 0.9634, "step": 46085 }, { "epoch": 40.823737821080606, "grad_norm": 0.2590779960155487, "learning_rate": 1e-05, "loss": 0.9636, "step": 46090 }, { "epoch": 40.8281665190434, "grad_norm": 0.241688534617424, "learning_rate": 1e-05, "loss": 0.9562, "step": 46095 }, { "epoch": 40.8325952170062, "grad_norm": 0.2734193503856659, "learning_rate": 1e-05, "loss": 1.0013, "step": 46100 }, { "epoch": 40.837023914969, "grad_norm": 0.21250741183757782, "learning_rate": 1e-05, "loss": 0.9658, "step": 46105 }, { "epoch": 40.841452612931796, "grad_norm": 0.2548217475414276, "learning_rate": 1e-05, "loss": 0.9118, "step": 46110 }, { "epoch": 40.8458813108946, "grad_norm": 0.2470664530992508, "learning_rate": 1e-05, "loss": 0.9884, "step": 46115 }, { "epoch": 40.8503100088574, "grad_norm": 0.22192998230457306, "learning_rate": 1e-05, "loss": 0.9648, "step": 46120 }, { "epoch": 40.85473870682019, "grad_norm": 0.24488016963005066, "learning_rate": 1e-05, "loss": 0.9657, "step": 46125 }, { "epoch": 40.859167404782994, "grad_norm": 0.24498888850212097, "learning_rate": 1e-05, "loss": 0.9543, "step": 46130 }, { "epoch": 40.863596102745795, "grad_norm": 0.25724685192108154, "learning_rate": 1e-05, "loss": 1.0114, "step": 46135 }, { "epoch": 40.86802480070859, "grad_norm": 0.25136297941207886, "learning_rate": 1e-05, "loss": 0.9346, "step": 46140 }, { "epoch": 40.87245349867139, "grad_norm": 0.21111196279525757, "learning_rate": 1e-05, "loss": 0.977, "step": 46145 }, { "epoch": 40.87688219663419, "grad_norm": 0.25202807784080505, "learning_rate": 1e-05, "loss": 0.9698, "step": 46150 }, { "epoch": 40.881310894596986, "grad_norm": 0.2228946089744568, "learning_rate": 1e-05, "loss": 0.9858, "step": 46155 }, { "epoch": 40.88573959255979, "grad_norm": 0.2557080090045929, "learning_rate": 1e-05, "loss": 0.9933, "step": 46160 }, { "epoch": 40.89016829052259, "grad_norm": 0.24860051274299622, "learning_rate": 1e-05, "loss": 0.9759, "step": 46165 }, { "epoch": 40.89459698848538, "grad_norm": 0.23795831203460693, "learning_rate": 1e-05, "loss": 0.9986, "step": 46170 }, { "epoch": 40.899025686448184, "grad_norm": 0.27710476517677307, "learning_rate": 1e-05, "loss": 1.0, "step": 46175 }, { "epoch": 40.903454384410985, "grad_norm": 0.22504942119121552, "learning_rate": 1e-05, "loss": 0.9599, "step": 46180 }, { "epoch": 40.90788308237378, "grad_norm": 0.2174450010061264, "learning_rate": 1e-05, "loss": 1.0205, "step": 46185 }, { "epoch": 40.91231178033658, "grad_norm": 0.21438786387443542, "learning_rate": 1e-05, "loss": 0.9941, "step": 46190 }, { "epoch": 40.91674047829938, "grad_norm": 0.23867042362689972, "learning_rate": 1e-05, "loss": 0.9894, "step": 46195 }, { "epoch": 40.921169176262175, "grad_norm": 0.21805495023727417, "learning_rate": 1e-05, "loss": 0.9586, "step": 46200 }, { "epoch": 40.92559787422498, "grad_norm": 0.18959949910640717, "learning_rate": 1e-05, "loss": 0.9699, "step": 46205 }, { "epoch": 40.93002657218778, "grad_norm": 0.2630828619003296, "learning_rate": 1e-05, "loss": 0.9502, "step": 46210 }, { "epoch": 40.93445527015058, "grad_norm": 0.255587100982666, "learning_rate": 1e-05, "loss": 0.9403, "step": 46215 }, { "epoch": 40.93888396811337, "grad_norm": 0.2215876579284668, "learning_rate": 1e-05, "loss": 0.912, "step": 46220 }, { "epoch": 40.943312666076174, "grad_norm": 0.28492793440818787, "learning_rate": 1e-05, "loss": 0.9336, "step": 46225 }, { "epoch": 40.947741364038976, "grad_norm": 0.2775213420391083, "learning_rate": 1e-05, "loss": 1.0361, "step": 46230 }, { "epoch": 40.95217006200177, "grad_norm": 0.19882459938526154, "learning_rate": 1e-05, "loss": 0.9806, "step": 46235 }, { "epoch": 40.95659875996457, "grad_norm": 0.21408990025520325, "learning_rate": 1e-05, "loss": 1.0204, "step": 46240 }, { "epoch": 40.96102745792737, "grad_norm": 0.22075635194778442, "learning_rate": 1e-05, "loss": 0.9354, "step": 46245 }, { "epoch": 40.965456155890166, "grad_norm": 0.26780733466148376, "learning_rate": 1e-05, "loss": 0.952, "step": 46250 }, { "epoch": 40.96988485385297, "grad_norm": 0.19951346516609192, "learning_rate": 1e-05, "loss": 0.9693, "step": 46255 }, { "epoch": 40.97431355181577, "grad_norm": 0.23445382714271545, "learning_rate": 1e-05, "loss": 1.0112, "step": 46260 }, { "epoch": 40.97874224977856, "grad_norm": 0.24156714975833893, "learning_rate": 1e-05, "loss": 0.9968, "step": 46265 }, { "epoch": 40.983170947741364, "grad_norm": 0.22810764610767365, "learning_rate": 1e-05, "loss": 0.9761, "step": 46270 }, { "epoch": 40.987599645704165, "grad_norm": 0.25293776392936707, "learning_rate": 1e-05, "loss": 0.9567, "step": 46275 }, { "epoch": 40.99202834366696, "grad_norm": 0.2452917844057083, "learning_rate": 1e-05, "loss": 0.9847, "step": 46280 }, { "epoch": 40.99645704162976, "grad_norm": 0.25986042618751526, "learning_rate": 1e-05, "loss": 0.9807, "step": 46285 }, { "epoch": 41.00088573959256, "grad_norm": 0.255265474319458, "learning_rate": 1e-05, "loss": 0.9875, "step": 46290 }, { "epoch": 41.005314437555356, "grad_norm": 0.22851619124412537, "learning_rate": 1e-05, "loss": 0.9576, "step": 46295 }, { "epoch": 41.00974313551816, "grad_norm": 0.2514009475708008, "learning_rate": 1e-05, "loss": 1.0191, "step": 46300 }, { "epoch": 41.01417183348096, "grad_norm": 0.2622789442539215, "learning_rate": 1e-05, "loss": 0.9428, "step": 46305 }, { "epoch": 41.01860053144375, "grad_norm": 0.23823486268520355, "learning_rate": 1e-05, "loss": 1.0336, "step": 46310 }, { "epoch": 41.02302922940655, "grad_norm": 0.23992542922496796, "learning_rate": 1e-05, "loss": 0.987, "step": 46315 }, { "epoch": 41.027457927369355, "grad_norm": 0.2484361082315445, "learning_rate": 1e-05, "loss": 0.9531, "step": 46320 }, { "epoch": 41.03188662533215, "grad_norm": 0.27358919382095337, "learning_rate": 1e-05, "loss": 1.0017, "step": 46325 }, { "epoch": 41.03631532329495, "grad_norm": 0.25942134857177734, "learning_rate": 1e-05, "loss": 1.0169, "step": 46330 }, { "epoch": 41.04074402125775, "grad_norm": 0.2241154909133911, "learning_rate": 1e-05, "loss": 0.9762, "step": 46335 }, { "epoch": 41.04517271922055, "grad_norm": 0.22613605856895447, "learning_rate": 1e-05, "loss": 0.9339, "step": 46340 }, { "epoch": 41.04960141718335, "grad_norm": 0.29719963669776917, "learning_rate": 1e-05, "loss": 0.9841, "step": 46345 }, { "epoch": 41.05403011514615, "grad_norm": 0.3122774064540863, "learning_rate": 1e-05, "loss": 0.9315, "step": 46350 }, { "epoch": 41.05845881310895, "grad_norm": 0.2574634850025177, "learning_rate": 1e-05, "loss": 0.9348, "step": 46355 }, { "epoch": 41.06288751107174, "grad_norm": 0.2685568928718567, "learning_rate": 1e-05, "loss": 0.9411, "step": 46360 }, { "epoch": 41.067316209034544, "grad_norm": 0.2701598107814789, "learning_rate": 1e-05, "loss": 0.9648, "step": 46365 }, { "epoch": 41.071744906997345, "grad_norm": 0.2244083136320114, "learning_rate": 1e-05, "loss": 0.9149, "step": 46370 }, { "epoch": 41.07617360496014, "grad_norm": 0.2717266380786896, "learning_rate": 1e-05, "loss": 0.9816, "step": 46375 }, { "epoch": 41.08060230292294, "grad_norm": 0.23584267497062683, "learning_rate": 1e-05, "loss": 1.0192, "step": 46380 }, { "epoch": 41.08503100088574, "grad_norm": 0.27556976675987244, "learning_rate": 1e-05, "loss": 0.9768, "step": 46385 }, { "epoch": 41.089459698848536, "grad_norm": 0.23604941368103027, "learning_rate": 1e-05, "loss": 0.9678, "step": 46390 }, { "epoch": 41.09388839681134, "grad_norm": 0.2693665623664856, "learning_rate": 1e-05, "loss": 0.9983, "step": 46395 }, { "epoch": 41.09831709477414, "grad_norm": 0.2334788739681244, "learning_rate": 1e-05, "loss": 0.9607, "step": 46400 }, { "epoch": 41.10274579273693, "grad_norm": 0.2547294795513153, "learning_rate": 1e-05, "loss": 0.9927, "step": 46405 }, { "epoch": 41.107174490699734, "grad_norm": 0.23960816860198975, "learning_rate": 1e-05, "loss": 0.9302, "step": 46410 }, { "epoch": 41.111603188662535, "grad_norm": 0.1880325824022293, "learning_rate": 1e-05, "loss": 0.9601, "step": 46415 }, { "epoch": 41.11603188662533, "grad_norm": 0.21492323279380798, "learning_rate": 1e-05, "loss": 0.9733, "step": 46420 }, { "epoch": 41.12046058458813, "grad_norm": 0.23511657118797302, "learning_rate": 1e-05, "loss": 0.9955, "step": 46425 }, { "epoch": 41.12488928255093, "grad_norm": 0.2740682363510132, "learning_rate": 1e-05, "loss": 0.9796, "step": 46430 }, { "epoch": 41.129317980513726, "grad_norm": 0.22582025825977325, "learning_rate": 1e-05, "loss": 0.9686, "step": 46435 }, { "epoch": 41.13374667847653, "grad_norm": 0.2642294466495514, "learning_rate": 1e-05, "loss": 0.9759, "step": 46440 }, { "epoch": 41.13817537643933, "grad_norm": 0.27966028451919556, "learning_rate": 1e-05, "loss": 1.0128, "step": 46445 }, { "epoch": 41.14260407440213, "grad_norm": 0.22969523072242737, "learning_rate": 1e-05, "loss": 0.9556, "step": 46450 }, { "epoch": 41.14703277236492, "grad_norm": 0.24469727277755737, "learning_rate": 1e-05, "loss": 0.986, "step": 46455 }, { "epoch": 41.151461470327725, "grad_norm": 0.20409634709358215, "learning_rate": 1e-05, "loss": 1.0021, "step": 46460 }, { "epoch": 41.155890168290526, "grad_norm": 0.24594646692276, "learning_rate": 1e-05, "loss": 1.0045, "step": 46465 }, { "epoch": 41.16031886625332, "grad_norm": 0.2107960283756256, "learning_rate": 1e-05, "loss": 0.9841, "step": 46470 }, { "epoch": 41.16474756421612, "grad_norm": 0.23458872735500336, "learning_rate": 1e-05, "loss": 0.9538, "step": 46475 }, { "epoch": 41.16917626217892, "grad_norm": 0.22560983896255493, "learning_rate": 1e-05, "loss": 0.9485, "step": 46480 }, { "epoch": 41.173604960141716, "grad_norm": 0.2523638904094696, "learning_rate": 1e-05, "loss": 0.9758, "step": 46485 }, { "epoch": 41.17803365810452, "grad_norm": 0.23163332045078278, "learning_rate": 1e-05, "loss": 0.9816, "step": 46490 }, { "epoch": 41.18246235606732, "grad_norm": 0.2533573508262634, "learning_rate": 1e-05, "loss": 0.9609, "step": 46495 }, { "epoch": 41.18689105403011, "grad_norm": 0.20747590065002441, "learning_rate": 1e-05, "loss": 0.9876, "step": 46500 }, { "epoch": 41.191319751992914, "grad_norm": 0.2377549111843109, "learning_rate": 1e-05, "loss": 0.9744, "step": 46505 }, { "epoch": 41.195748449955715, "grad_norm": 0.24137932062149048, "learning_rate": 1e-05, "loss": 0.9601, "step": 46510 }, { "epoch": 41.20017714791851, "grad_norm": 0.2392067313194275, "learning_rate": 1e-05, "loss": 0.9684, "step": 46515 }, { "epoch": 41.20460584588131, "grad_norm": 0.24320088326931, "learning_rate": 1e-05, "loss": 0.9755, "step": 46520 }, { "epoch": 41.20903454384411, "grad_norm": 0.2416033297777176, "learning_rate": 1e-05, "loss": 1.033, "step": 46525 }, { "epoch": 41.213463241806906, "grad_norm": 0.2187662422657013, "learning_rate": 1e-05, "loss": 0.996, "step": 46530 }, { "epoch": 41.21789193976971, "grad_norm": 0.21655720472335815, "learning_rate": 1e-05, "loss": 0.9838, "step": 46535 }, { "epoch": 41.22232063773251, "grad_norm": 0.2829791009426117, "learning_rate": 1e-05, "loss": 0.9631, "step": 46540 }, { "epoch": 41.2267493356953, "grad_norm": 0.2016926407814026, "learning_rate": 1e-05, "loss": 1.0187, "step": 46545 }, { "epoch": 41.231178033658104, "grad_norm": 0.24520066380500793, "learning_rate": 1e-05, "loss": 0.9947, "step": 46550 }, { "epoch": 41.235606731620905, "grad_norm": 0.2519649267196655, "learning_rate": 1e-05, "loss": 0.9827, "step": 46555 }, { "epoch": 41.2400354295837, "grad_norm": 0.2173595130443573, "learning_rate": 1e-05, "loss": 0.9784, "step": 46560 }, { "epoch": 41.2444641275465, "grad_norm": 0.2718411386013031, "learning_rate": 1e-05, "loss": 0.9699, "step": 46565 }, { "epoch": 41.2488928255093, "grad_norm": 0.29615840315818787, "learning_rate": 1e-05, "loss": 1.0153, "step": 46570 }, { "epoch": 41.2533215234721, "grad_norm": 0.21959950029850006, "learning_rate": 1e-05, "loss": 0.964, "step": 46575 }, { "epoch": 41.2577502214349, "grad_norm": 0.2201385349035263, "learning_rate": 1e-05, "loss": 0.9601, "step": 46580 }, { "epoch": 41.2621789193977, "grad_norm": 0.2673913240432739, "learning_rate": 1e-05, "loss": 0.9819, "step": 46585 }, { "epoch": 41.2666076173605, "grad_norm": 0.31370651721954346, "learning_rate": 1e-05, "loss": 1.0212, "step": 46590 }, { "epoch": 41.27103631532329, "grad_norm": 0.22648300230503082, "learning_rate": 1e-05, "loss": 0.9562, "step": 46595 }, { "epoch": 41.275465013286095, "grad_norm": 0.2525435984134674, "learning_rate": 1e-05, "loss": 0.9896, "step": 46600 }, { "epoch": 41.279893711248896, "grad_norm": 0.2291669100522995, "learning_rate": 1e-05, "loss": 1.0178, "step": 46605 }, { "epoch": 41.28432240921169, "grad_norm": 0.23526166379451752, "learning_rate": 1e-05, "loss": 0.9922, "step": 46610 }, { "epoch": 41.28875110717449, "grad_norm": 0.2228817492723465, "learning_rate": 1e-05, "loss": 0.9885, "step": 46615 }, { "epoch": 41.29317980513729, "grad_norm": 0.23752111196517944, "learning_rate": 1e-05, "loss": 0.9807, "step": 46620 }, { "epoch": 41.297608503100086, "grad_norm": 0.2538985311985016, "learning_rate": 1e-05, "loss": 0.9457, "step": 46625 }, { "epoch": 41.30203720106289, "grad_norm": 0.20848622918128967, "learning_rate": 1e-05, "loss": 1.0014, "step": 46630 }, { "epoch": 41.30646589902569, "grad_norm": 0.20615798234939575, "learning_rate": 1e-05, "loss": 1.0151, "step": 46635 }, { "epoch": 41.31089459698848, "grad_norm": 0.30227023363113403, "learning_rate": 1e-05, "loss": 0.9673, "step": 46640 }, { "epoch": 41.315323294951284, "grad_norm": 0.2978127896785736, "learning_rate": 1e-05, "loss": 0.9354, "step": 46645 }, { "epoch": 41.319751992914085, "grad_norm": 0.25392451882362366, "learning_rate": 1e-05, "loss": 0.978, "step": 46650 }, { "epoch": 41.32418069087688, "grad_norm": 0.23451435565948486, "learning_rate": 1e-05, "loss": 0.9501, "step": 46655 }, { "epoch": 41.32860938883968, "grad_norm": 0.22238238155841827, "learning_rate": 1e-05, "loss": 0.9717, "step": 46660 }, { "epoch": 41.33303808680248, "grad_norm": 0.2178657203912735, "learning_rate": 1e-05, "loss": 0.9646, "step": 46665 }, { "epoch": 41.337466784765276, "grad_norm": 0.19798831641674042, "learning_rate": 1e-05, "loss": 0.9769, "step": 46670 }, { "epoch": 41.34189548272808, "grad_norm": 0.26017433404922485, "learning_rate": 1e-05, "loss": 1.0125, "step": 46675 }, { "epoch": 41.34632418069088, "grad_norm": 0.2545863389968872, "learning_rate": 1e-05, "loss": 0.9925, "step": 46680 }, { "epoch": 41.35075287865367, "grad_norm": 0.2588142156600952, "learning_rate": 1e-05, "loss": 1.004, "step": 46685 }, { "epoch": 41.355181576616474, "grad_norm": 0.2616218328475952, "learning_rate": 1e-05, "loss": 0.9772, "step": 46690 }, { "epoch": 41.359610274579275, "grad_norm": 0.3297959864139557, "learning_rate": 1e-05, "loss": 0.9922, "step": 46695 }, { "epoch": 41.364038972542076, "grad_norm": 0.24753610789775848, "learning_rate": 1e-05, "loss": 1.0145, "step": 46700 }, { "epoch": 41.36846767050487, "grad_norm": 0.28048592805862427, "learning_rate": 1e-05, "loss": 0.9763, "step": 46705 }, { "epoch": 41.37289636846767, "grad_norm": 0.22273565828800201, "learning_rate": 1e-05, "loss": 0.9752, "step": 46710 }, { "epoch": 41.37732506643047, "grad_norm": 0.24823904037475586, "learning_rate": 1e-05, "loss": 0.9431, "step": 46715 }, { "epoch": 41.38175376439327, "grad_norm": 0.24420718848705292, "learning_rate": 1e-05, "loss": 0.9726, "step": 46720 }, { "epoch": 41.38618246235607, "grad_norm": 0.2724437415599823, "learning_rate": 1e-05, "loss": 1.0083, "step": 46725 }, { "epoch": 41.39061116031887, "grad_norm": 0.250664621591568, "learning_rate": 1e-05, "loss": 0.9371, "step": 46730 }, { "epoch": 41.39503985828166, "grad_norm": 0.20065896213054657, "learning_rate": 1e-05, "loss": 1.0323, "step": 46735 }, { "epoch": 41.399468556244464, "grad_norm": 0.26644062995910645, "learning_rate": 1e-05, "loss": 0.9609, "step": 46740 }, { "epoch": 41.403897254207266, "grad_norm": 0.20965181291103363, "learning_rate": 1e-05, "loss": 0.9473, "step": 46745 }, { "epoch": 41.40832595217006, "grad_norm": 0.22881768643856049, "learning_rate": 1e-05, "loss": 0.9962, "step": 46750 }, { "epoch": 41.41275465013286, "grad_norm": 0.25017255544662476, "learning_rate": 1e-05, "loss": 1.0036, "step": 46755 }, { "epoch": 41.41718334809566, "grad_norm": 0.222843736410141, "learning_rate": 1e-05, "loss": 0.9979, "step": 46760 }, { "epoch": 41.421612046058456, "grad_norm": 0.24611032009124756, "learning_rate": 1e-05, "loss": 0.9806, "step": 46765 }, { "epoch": 41.42604074402126, "grad_norm": 0.23981942236423492, "learning_rate": 1e-05, "loss": 0.9138, "step": 46770 }, { "epoch": 41.43046944198406, "grad_norm": 0.24829858541488647, "learning_rate": 1e-05, "loss": 0.9527, "step": 46775 }, { "epoch": 41.43489813994685, "grad_norm": 0.2527877390384674, "learning_rate": 1e-05, "loss": 0.9605, "step": 46780 }, { "epoch": 41.439326837909654, "grad_norm": 0.21177981793880463, "learning_rate": 1e-05, "loss": 0.961, "step": 46785 }, { "epoch": 41.443755535872455, "grad_norm": 0.21250608563423157, "learning_rate": 1e-05, "loss": 0.977, "step": 46790 }, { "epoch": 41.44818423383525, "grad_norm": 0.21410933136940002, "learning_rate": 1e-05, "loss": 0.9917, "step": 46795 }, { "epoch": 41.45261293179805, "grad_norm": 0.2529192864894867, "learning_rate": 1e-05, "loss": 0.9713, "step": 46800 }, { "epoch": 41.45704162976085, "grad_norm": 0.23823131620883942, "learning_rate": 1e-05, "loss": 1.0282, "step": 46805 }, { "epoch": 41.461470327723646, "grad_norm": 0.24617500603199005, "learning_rate": 1e-05, "loss": 0.9784, "step": 46810 }, { "epoch": 41.46589902568645, "grad_norm": 0.2661856710910797, "learning_rate": 1e-05, "loss": 0.9675, "step": 46815 }, { "epoch": 41.47032772364925, "grad_norm": 0.2571755647659302, "learning_rate": 1e-05, "loss": 1.0337, "step": 46820 }, { "epoch": 41.47475642161205, "grad_norm": 0.20305150747299194, "learning_rate": 1e-05, "loss": 0.9397, "step": 46825 }, { "epoch": 41.479185119574844, "grad_norm": 0.24163159728050232, "learning_rate": 1e-05, "loss": 0.9732, "step": 46830 }, { "epoch": 41.483613817537645, "grad_norm": 0.2743411660194397, "learning_rate": 1e-05, "loss": 0.9653, "step": 46835 }, { "epoch": 41.488042515500446, "grad_norm": 0.24505265057086945, "learning_rate": 1e-05, "loss": 0.9635, "step": 46840 }, { "epoch": 41.49247121346324, "grad_norm": 0.24050122499465942, "learning_rate": 1e-05, "loss": 0.9794, "step": 46845 }, { "epoch": 41.49689991142604, "grad_norm": 0.2746000289916992, "learning_rate": 1e-05, "loss": 0.9519, "step": 46850 }, { "epoch": 41.50132860938884, "grad_norm": 0.22860535979270935, "learning_rate": 1e-05, "loss": 0.9947, "step": 46855 }, { "epoch": 41.50575730735164, "grad_norm": 0.21988466382026672, "learning_rate": 1e-05, "loss": 0.9864, "step": 46860 }, { "epoch": 41.51018600531444, "grad_norm": 0.24702921509742737, "learning_rate": 1e-05, "loss": 0.9644, "step": 46865 }, { "epoch": 41.51461470327724, "grad_norm": 0.24761199951171875, "learning_rate": 1e-05, "loss": 0.9523, "step": 46870 }, { "epoch": 41.51904340124003, "grad_norm": 0.2534991204738617, "learning_rate": 1e-05, "loss": 0.9911, "step": 46875 }, { "epoch": 41.523472099202834, "grad_norm": 0.23372985422611237, "learning_rate": 1e-05, "loss": 0.9732, "step": 46880 }, { "epoch": 41.527900797165636, "grad_norm": 0.2518102526664734, "learning_rate": 1e-05, "loss": 0.9349, "step": 46885 }, { "epoch": 41.53232949512843, "grad_norm": 0.24182449281215668, "learning_rate": 1e-05, "loss": 0.9305, "step": 46890 }, { "epoch": 41.53675819309123, "grad_norm": 0.23864220082759857, "learning_rate": 1e-05, "loss": 1.018, "step": 46895 }, { "epoch": 41.54118689105403, "grad_norm": 0.23089124262332916, "learning_rate": 1e-05, "loss": 0.944, "step": 46900 }, { "epoch": 41.545615589016826, "grad_norm": 0.26446375250816345, "learning_rate": 1e-05, "loss": 0.9575, "step": 46905 }, { "epoch": 41.55004428697963, "grad_norm": 0.23237252235412598, "learning_rate": 1e-05, "loss": 0.978, "step": 46910 }, { "epoch": 41.55447298494243, "grad_norm": 0.22160682082176208, "learning_rate": 1e-05, "loss": 1.03, "step": 46915 }, { "epoch": 41.55890168290522, "grad_norm": 0.22599604725837708, "learning_rate": 1e-05, "loss": 0.9653, "step": 46920 }, { "epoch": 41.563330380868024, "grad_norm": 0.2181016355752945, "learning_rate": 1e-05, "loss": 0.9595, "step": 46925 }, { "epoch": 41.567759078830825, "grad_norm": 0.28839850425720215, "learning_rate": 1e-05, "loss": 0.9657, "step": 46930 }, { "epoch": 41.57218777679362, "grad_norm": 0.21832482516765594, "learning_rate": 1e-05, "loss": 0.9955, "step": 46935 }, { "epoch": 41.57661647475642, "grad_norm": 0.23229339718818665, "learning_rate": 1e-05, "loss": 1.0121, "step": 46940 }, { "epoch": 41.58104517271922, "grad_norm": 0.22854207456111908, "learning_rate": 1e-05, "loss": 0.9995, "step": 46945 }, { "epoch": 41.58547387068202, "grad_norm": 0.2785719633102417, "learning_rate": 1e-05, "loss": 1.0019, "step": 46950 }, { "epoch": 41.58990256864482, "grad_norm": 0.26977190375328064, "learning_rate": 1e-05, "loss": 1.0031, "step": 46955 }, { "epoch": 41.59433126660762, "grad_norm": 0.23604361712932587, "learning_rate": 1e-05, "loss": 1.0026, "step": 46960 }, { "epoch": 41.59875996457042, "grad_norm": 0.19699259102344513, "learning_rate": 1e-05, "loss": 0.9926, "step": 46965 }, { "epoch": 41.60318866253321, "grad_norm": 0.2620716691017151, "learning_rate": 1e-05, "loss": 0.9784, "step": 46970 }, { "epoch": 41.607617360496015, "grad_norm": 0.2337796688079834, "learning_rate": 1e-05, "loss": 0.956, "step": 46975 }, { "epoch": 41.612046058458816, "grad_norm": 0.22643381357192993, "learning_rate": 1e-05, "loss": 0.9628, "step": 46980 }, { "epoch": 41.61647475642161, "grad_norm": 0.2471427023410797, "learning_rate": 1e-05, "loss": 0.9835, "step": 46985 }, { "epoch": 41.62090345438441, "grad_norm": 0.27139613032341003, "learning_rate": 1e-05, "loss": 0.9965, "step": 46990 }, { "epoch": 41.62533215234721, "grad_norm": 0.26607316732406616, "learning_rate": 1e-05, "loss": 0.9376, "step": 46995 }, { "epoch": 41.62976085031001, "grad_norm": 0.24246305227279663, "learning_rate": 1e-05, "loss": 0.9828, "step": 47000 }, { "epoch": 41.63418954827281, "grad_norm": 0.2571573853492737, "learning_rate": 1e-05, "loss": 0.9811, "step": 47005 }, { "epoch": 41.63861824623561, "grad_norm": 0.20895442366600037, "learning_rate": 1e-05, "loss": 0.9798, "step": 47010 }, { "epoch": 41.6430469441984, "grad_norm": 0.2300744354724884, "learning_rate": 1e-05, "loss": 0.9454, "step": 47015 }, { "epoch": 41.647475642161204, "grad_norm": 0.23912738263607025, "learning_rate": 1e-05, "loss": 0.9273, "step": 47020 }, { "epoch": 41.651904340124005, "grad_norm": 0.20212675631046295, "learning_rate": 1e-05, "loss": 0.9488, "step": 47025 }, { "epoch": 41.6563330380868, "grad_norm": 0.24596139788627625, "learning_rate": 1e-05, "loss": 0.9696, "step": 47030 }, { "epoch": 41.6607617360496, "grad_norm": 0.27843809127807617, "learning_rate": 1e-05, "loss": 1.0358, "step": 47035 }, { "epoch": 41.6651904340124, "grad_norm": 0.2495543658733368, "learning_rate": 1e-05, "loss": 0.9528, "step": 47040 }, { "epoch": 41.669619131975196, "grad_norm": 0.24879902601242065, "learning_rate": 1e-05, "loss": 0.967, "step": 47045 }, { "epoch": 41.674047829938, "grad_norm": 0.20944470167160034, "learning_rate": 1e-05, "loss": 1.0376, "step": 47050 }, { "epoch": 41.6784765279008, "grad_norm": 0.26936009526252747, "learning_rate": 1e-05, "loss": 0.9712, "step": 47055 }, { "epoch": 41.68290522586359, "grad_norm": 0.28627660870552063, "learning_rate": 1e-05, "loss": 0.9811, "step": 47060 }, { "epoch": 41.687333923826394, "grad_norm": 0.2799301743507385, "learning_rate": 1e-05, "loss": 0.97, "step": 47065 }, { "epoch": 41.691762621789195, "grad_norm": 0.2388470321893692, "learning_rate": 1e-05, "loss": 1.0303, "step": 47070 }, { "epoch": 41.696191319751996, "grad_norm": 0.19623346626758575, "learning_rate": 1e-05, "loss": 0.9765, "step": 47075 }, { "epoch": 41.70062001771479, "grad_norm": 0.24366050958633423, "learning_rate": 1e-05, "loss": 0.9087, "step": 47080 }, { "epoch": 41.70504871567759, "grad_norm": 0.2991557717323303, "learning_rate": 1e-05, "loss": 1.0023, "step": 47085 }, { "epoch": 41.70947741364039, "grad_norm": 0.21608667075634003, "learning_rate": 1e-05, "loss": 0.9712, "step": 47090 }, { "epoch": 41.71390611160319, "grad_norm": 0.23013699054718018, "learning_rate": 1e-05, "loss": 0.9767, "step": 47095 }, { "epoch": 41.71833480956599, "grad_norm": 0.21956031024456024, "learning_rate": 1e-05, "loss": 1.0843, "step": 47100 }, { "epoch": 41.72276350752879, "grad_norm": 0.2651800215244293, "learning_rate": 1e-05, "loss": 0.9428, "step": 47105 }, { "epoch": 41.72719220549158, "grad_norm": 0.22979427874088287, "learning_rate": 1e-05, "loss": 0.9639, "step": 47110 }, { "epoch": 41.731620903454385, "grad_norm": 0.21967464685440063, "learning_rate": 1e-05, "loss": 0.9618, "step": 47115 }, { "epoch": 41.736049601417186, "grad_norm": 0.24914321303367615, "learning_rate": 1e-05, "loss": 0.9379, "step": 47120 }, { "epoch": 41.74047829937998, "grad_norm": 0.2248576134443283, "learning_rate": 1e-05, "loss": 0.9456, "step": 47125 }, { "epoch": 41.74490699734278, "grad_norm": 0.2356075793504715, "learning_rate": 1e-05, "loss": 0.9447, "step": 47130 }, { "epoch": 41.74933569530558, "grad_norm": 0.2672087550163269, "learning_rate": 1e-05, "loss": 0.9711, "step": 47135 }, { "epoch": 41.753764393268376, "grad_norm": 0.290438711643219, "learning_rate": 1e-05, "loss": 0.9623, "step": 47140 }, { "epoch": 41.75819309123118, "grad_norm": 0.25266802310943604, "learning_rate": 1e-05, "loss": 0.946, "step": 47145 }, { "epoch": 41.76262178919398, "grad_norm": 0.23994064331054688, "learning_rate": 1e-05, "loss": 0.9391, "step": 47150 }, { "epoch": 41.76705048715677, "grad_norm": 0.24676162004470825, "learning_rate": 1e-05, "loss": 0.9695, "step": 47155 }, { "epoch": 41.771479185119574, "grad_norm": 0.23655670881271362, "learning_rate": 1e-05, "loss": 0.9823, "step": 47160 }, { "epoch": 41.775907883082375, "grad_norm": 0.21515819430351257, "learning_rate": 1e-05, "loss": 0.9257, "step": 47165 }, { "epoch": 41.78033658104517, "grad_norm": 0.2658790051937103, "learning_rate": 1e-05, "loss": 0.9679, "step": 47170 }, { "epoch": 41.78476527900797, "grad_norm": 0.26729583740234375, "learning_rate": 1e-05, "loss": 0.9544, "step": 47175 }, { "epoch": 41.78919397697077, "grad_norm": 0.24590493738651276, "learning_rate": 1e-05, "loss": 0.9646, "step": 47180 }, { "epoch": 41.79362267493357, "grad_norm": 0.23929621279239655, "learning_rate": 1e-05, "loss": 0.9401, "step": 47185 }, { "epoch": 41.79805137289637, "grad_norm": 0.22001193463802338, "learning_rate": 1e-05, "loss": 0.981, "step": 47190 }, { "epoch": 41.80248007085917, "grad_norm": 0.24116064608097076, "learning_rate": 1e-05, "loss": 0.9601, "step": 47195 }, { "epoch": 41.80690876882197, "grad_norm": 0.20740506052970886, "learning_rate": 1e-05, "loss": 0.9475, "step": 47200 }, { "epoch": 41.811337466784764, "grad_norm": 0.23944616317749023, "learning_rate": 1e-05, "loss": 0.9806, "step": 47205 }, { "epoch": 41.815766164747565, "grad_norm": 0.24343454837799072, "learning_rate": 1e-05, "loss": 1.0264, "step": 47210 }, { "epoch": 41.820194862710366, "grad_norm": 0.2211264818906784, "learning_rate": 1e-05, "loss": 1.0187, "step": 47215 }, { "epoch": 41.82462356067316, "grad_norm": 0.2685020864009857, "learning_rate": 1e-05, "loss": 1.0212, "step": 47220 }, { "epoch": 41.82905225863596, "grad_norm": 0.24459992349147797, "learning_rate": 1e-05, "loss": 0.9793, "step": 47225 }, { "epoch": 41.83348095659876, "grad_norm": 0.21922965347766876, "learning_rate": 1e-05, "loss": 0.9486, "step": 47230 }, { "epoch": 41.83790965456156, "grad_norm": 0.238324373960495, "learning_rate": 1e-05, "loss": 0.9061, "step": 47235 }, { "epoch": 41.84233835252436, "grad_norm": 0.22449564933776855, "learning_rate": 1e-05, "loss": 0.9911, "step": 47240 }, { "epoch": 41.84676705048716, "grad_norm": 0.20336972177028656, "learning_rate": 1e-05, "loss": 1.0357, "step": 47245 }, { "epoch": 41.85119574844995, "grad_norm": 0.2726646065711975, "learning_rate": 1e-05, "loss": 0.9404, "step": 47250 }, { "epoch": 41.855624446412754, "grad_norm": 0.23348622024059296, "learning_rate": 1e-05, "loss": 0.9435, "step": 47255 }, { "epoch": 41.860053144375556, "grad_norm": 0.24239620566368103, "learning_rate": 1e-05, "loss": 1.0078, "step": 47260 }, { "epoch": 41.86448184233835, "grad_norm": 0.21317002177238464, "learning_rate": 1e-05, "loss": 1.0129, "step": 47265 }, { "epoch": 41.86891054030115, "grad_norm": 0.2872420847415924, "learning_rate": 1e-05, "loss": 0.9337, "step": 47270 }, { "epoch": 41.87333923826395, "grad_norm": 0.31914740800857544, "learning_rate": 1e-05, "loss": 0.9955, "step": 47275 }, { "epoch": 41.877767936226746, "grad_norm": 0.2558329105377197, "learning_rate": 1e-05, "loss": 0.9885, "step": 47280 }, { "epoch": 41.88219663418955, "grad_norm": 0.21335332095623016, "learning_rate": 1e-05, "loss": 1.0036, "step": 47285 }, { "epoch": 41.88662533215235, "grad_norm": 0.24892573058605194, "learning_rate": 1e-05, "loss": 0.9858, "step": 47290 }, { "epoch": 41.89105403011514, "grad_norm": 0.24181511998176575, "learning_rate": 1e-05, "loss": 0.9595, "step": 47295 }, { "epoch": 41.895482728077944, "grad_norm": 0.2369491308927536, "learning_rate": 1e-05, "loss": 0.94, "step": 47300 }, { "epoch": 41.899911426040745, "grad_norm": 0.23254704475402832, "learning_rate": 1e-05, "loss": 0.9992, "step": 47305 }, { "epoch": 41.90434012400354, "grad_norm": 0.22457970678806305, "learning_rate": 1e-05, "loss": 0.9618, "step": 47310 }, { "epoch": 41.90876882196634, "grad_norm": 0.2847231924533844, "learning_rate": 1e-05, "loss": 0.9516, "step": 47315 }, { "epoch": 41.91319751992914, "grad_norm": 0.2645552456378937, "learning_rate": 1e-05, "loss": 0.9468, "step": 47320 }, { "epoch": 41.91762621789194, "grad_norm": 0.23353981971740723, "learning_rate": 1e-05, "loss": 1.0181, "step": 47325 }, { "epoch": 41.92205491585474, "grad_norm": 0.25501686334609985, "learning_rate": 1e-05, "loss": 0.9855, "step": 47330 }, { "epoch": 41.92648361381754, "grad_norm": 0.2782838046550751, "learning_rate": 1e-05, "loss": 0.9474, "step": 47335 }, { "epoch": 41.93091231178034, "grad_norm": 0.24164332449436188, "learning_rate": 1e-05, "loss": 0.9923, "step": 47340 }, { "epoch": 41.935341009743134, "grad_norm": 0.22004728019237518, "learning_rate": 1e-05, "loss": 0.9947, "step": 47345 }, { "epoch": 41.939769707705935, "grad_norm": 0.2279643565416336, "learning_rate": 1e-05, "loss": 1.0019, "step": 47350 }, { "epoch": 41.944198405668736, "grad_norm": 0.26370400190353394, "learning_rate": 1e-05, "loss": 0.9658, "step": 47355 }, { "epoch": 41.94862710363153, "grad_norm": 0.22065910696983337, "learning_rate": 1e-05, "loss": 0.9795, "step": 47360 }, { "epoch": 41.95305580159433, "grad_norm": 0.22977013885974884, "learning_rate": 1e-05, "loss": 0.9308, "step": 47365 }, { "epoch": 41.95748449955713, "grad_norm": 0.2535136938095093, "learning_rate": 1e-05, "loss": 0.9386, "step": 47370 }, { "epoch": 41.96191319751993, "grad_norm": 0.2568632960319519, "learning_rate": 1e-05, "loss": 1.0058, "step": 47375 }, { "epoch": 41.96634189548273, "grad_norm": 0.23541945219039917, "learning_rate": 1e-05, "loss": 0.9885, "step": 47380 }, { "epoch": 41.97077059344553, "grad_norm": 0.28623607754707336, "learning_rate": 1e-05, "loss": 0.9339, "step": 47385 }, { "epoch": 41.97519929140832, "grad_norm": 0.2416672557592392, "learning_rate": 1e-05, "loss": 1.0164, "step": 47390 }, { "epoch": 41.979627989371124, "grad_norm": 0.24725818634033203, "learning_rate": 1e-05, "loss": 1.0389, "step": 47395 }, { "epoch": 41.984056687333926, "grad_norm": 0.24128949642181396, "learning_rate": 1e-05, "loss": 0.9643, "step": 47400 }, { "epoch": 41.98848538529672, "grad_norm": 0.23148638010025024, "learning_rate": 1e-05, "loss": 0.9659, "step": 47405 }, { "epoch": 41.99291408325952, "grad_norm": 0.2444465309381485, "learning_rate": 1e-05, "loss": 1.0282, "step": 47410 }, { "epoch": 41.99734278122232, "grad_norm": 0.2789562940597534, "learning_rate": 1e-05, "loss": 0.9942, "step": 47415 }, { "epoch": 42.001771479185116, "grad_norm": 0.23833613097667694, "learning_rate": 1e-05, "loss": 0.9573, "step": 47420 }, { "epoch": 42.00620017714792, "grad_norm": 0.26139411330223083, "learning_rate": 1e-05, "loss": 0.9693, "step": 47425 }, { "epoch": 42.01062887511072, "grad_norm": 0.2773147523403168, "learning_rate": 1e-05, "loss": 0.9964, "step": 47430 }, { "epoch": 42.01505757307352, "grad_norm": 0.2189108431339264, "learning_rate": 1e-05, "loss": 0.9624, "step": 47435 }, { "epoch": 42.019486271036314, "grad_norm": 0.20204530656337738, "learning_rate": 1e-05, "loss": 0.9735, "step": 47440 }, { "epoch": 42.023914968999115, "grad_norm": 0.2207849770784378, "learning_rate": 1e-05, "loss": 1.0106, "step": 47445 }, { "epoch": 42.028343666961916, "grad_norm": 0.28971290588378906, "learning_rate": 1e-05, "loss": 0.9679, "step": 47450 }, { "epoch": 42.03277236492471, "grad_norm": 0.2455006092786789, "learning_rate": 1e-05, "loss": 1.0003, "step": 47455 }, { "epoch": 42.03720106288751, "grad_norm": 0.2976686656475067, "learning_rate": 1e-05, "loss": 0.9749, "step": 47460 }, { "epoch": 42.04162976085031, "grad_norm": 0.2381303608417511, "learning_rate": 1e-05, "loss": 0.9569, "step": 47465 }, { "epoch": 42.04605845881311, "grad_norm": 0.26214805245399475, "learning_rate": 1e-05, "loss": 0.9793, "step": 47470 }, { "epoch": 42.05048715677591, "grad_norm": 0.23552539944648743, "learning_rate": 1e-05, "loss": 1.0147, "step": 47475 }, { "epoch": 42.05491585473871, "grad_norm": 0.23731185495853424, "learning_rate": 1e-05, "loss": 0.995, "step": 47480 }, { "epoch": 42.0593445527015, "grad_norm": 0.2604026198387146, "learning_rate": 1e-05, "loss": 0.927, "step": 47485 }, { "epoch": 42.063773250664305, "grad_norm": 0.29012829065322876, "learning_rate": 1e-05, "loss": 0.9932, "step": 47490 }, { "epoch": 42.068201948627106, "grad_norm": 0.24425168335437775, "learning_rate": 1e-05, "loss": 0.9484, "step": 47495 }, { "epoch": 42.0726306465899, "grad_norm": 0.26090261340141296, "learning_rate": 1e-05, "loss": 0.9468, "step": 47500 }, { "epoch": 42.0770593445527, "grad_norm": 0.23037965595722198, "learning_rate": 1e-05, "loss": 0.9632, "step": 47505 }, { "epoch": 42.0814880425155, "grad_norm": 0.2707884609699249, "learning_rate": 1e-05, "loss": 0.9831, "step": 47510 }, { "epoch": 42.0859167404783, "grad_norm": 0.2334928959608078, "learning_rate": 1e-05, "loss": 0.9912, "step": 47515 }, { "epoch": 42.0903454384411, "grad_norm": 0.22577998042106628, "learning_rate": 1e-05, "loss": 0.953, "step": 47520 }, { "epoch": 42.0947741364039, "grad_norm": 0.24617010354995728, "learning_rate": 1e-05, "loss": 1.0112, "step": 47525 }, { "epoch": 42.09920283436669, "grad_norm": 0.27806687355041504, "learning_rate": 1e-05, "loss": 0.9689, "step": 47530 }, { "epoch": 42.103631532329494, "grad_norm": 0.2106403261423111, "learning_rate": 1e-05, "loss": 0.9832, "step": 47535 }, { "epoch": 42.108060230292296, "grad_norm": 0.2449704259634018, "learning_rate": 1e-05, "loss": 0.966, "step": 47540 }, { "epoch": 42.11248892825509, "grad_norm": 0.21838924288749695, "learning_rate": 1e-05, "loss": 1.0085, "step": 47545 }, { "epoch": 42.11691762621789, "grad_norm": 0.23402725160121918, "learning_rate": 1e-05, "loss": 1.01, "step": 47550 }, { "epoch": 42.12134632418069, "grad_norm": 0.2137393206357956, "learning_rate": 1e-05, "loss": 0.9847, "step": 47555 }, { "epoch": 42.12577502214349, "grad_norm": 0.2771582007408142, "learning_rate": 1e-05, "loss": 0.9532, "step": 47560 }, { "epoch": 42.13020372010629, "grad_norm": 0.22381120920181274, "learning_rate": 1e-05, "loss": 0.9126, "step": 47565 }, { "epoch": 42.13463241806909, "grad_norm": 0.24643754959106445, "learning_rate": 1e-05, "loss": 1.0045, "step": 47570 }, { "epoch": 42.13906111603189, "grad_norm": 0.22045674920082092, "learning_rate": 1e-05, "loss": 0.9923, "step": 47575 }, { "epoch": 42.143489813994684, "grad_norm": 0.2493283599615097, "learning_rate": 1e-05, "loss": 0.949, "step": 47580 }, { "epoch": 42.147918511957485, "grad_norm": 0.3196389377117157, "learning_rate": 1e-05, "loss": 0.977, "step": 47585 }, { "epoch": 42.152347209920286, "grad_norm": 0.3015509843826294, "learning_rate": 1e-05, "loss": 0.9724, "step": 47590 }, { "epoch": 42.15677590788308, "grad_norm": 0.25217247009277344, "learning_rate": 1e-05, "loss": 0.9576, "step": 47595 }, { "epoch": 42.16120460584588, "grad_norm": 0.24276064336299896, "learning_rate": 1e-05, "loss": 0.9258, "step": 47600 }, { "epoch": 42.16563330380868, "grad_norm": 0.21076928079128265, "learning_rate": 1e-05, "loss": 0.9981, "step": 47605 }, { "epoch": 42.17006200177148, "grad_norm": 0.21807684004306793, "learning_rate": 1e-05, "loss": 1.0003, "step": 47610 }, { "epoch": 42.17449069973428, "grad_norm": 0.25889700651168823, "learning_rate": 1e-05, "loss": 0.9578, "step": 47615 }, { "epoch": 42.17891939769708, "grad_norm": 0.2158796638250351, "learning_rate": 1e-05, "loss": 0.9592, "step": 47620 }, { "epoch": 42.18334809565987, "grad_norm": 0.24467463791370392, "learning_rate": 1e-05, "loss": 1.0143, "step": 47625 }, { "epoch": 42.187776793622675, "grad_norm": 0.22895513474941254, "learning_rate": 1e-05, "loss": 1.0072, "step": 47630 }, { "epoch": 42.192205491585476, "grad_norm": 0.2406184822320938, "learning_rate": 1e-05, "loss": 0.863, "step": 47635 }, { "epoch": 42.19663418954827, "grad_norm": 0.22785741090774536, "learning_rate": 1e-05, "loss": 0.9637, "step": 47640 }, { "epoch": 42.20106288751107, "grad_norm": 0.20653823018074036, "learning_rate": 1e-05, "loss": 1.0427, "step": 47645 }, { "epoch": 42.20549158547387, "grad_norm": 0.22851921617984772, "learning_rate": 1e-05, "loss": 0.9899, "step": 47650 }, { "epoch": 42.20992028343667, "grad_norm": 0.22391921281814575, "learning_rate": 1e-05, "loss": 0.9853, "step": 47655 }, { "epoch": 42.21434898139947, "grad_norm": 0.2410077452659607, "learning_rate": 1e-05, "loss": 0.9159, "step": 47660 }, { "epoch": 42.21877767936227, "grad_norm": 0.2572147250175476, "learning_rate": 1e-05, "loss": 0.9955, "step": 47665 }, { "epoch": 42.22320637732506, "grad_norm": 0.23231694102287292, "learning_rate": 1e-05, "loss": 0.9372, "step": 47670 }, { "epoch": 42.227635075287864, "grad_norm": 0.2209526002407074, "learning_rate": 1e-05, "loss": 0.9636, "step": 47675 }, { "epoch": 42.232063773250665, "grad_norm": 0.23461313545703888, "learning_rate": 1e-05, "loss": 1.0068, "step": 47680 }, { "epoch": 42.23649247121347, "grad_norm": 0.2990756034851074, "learning_rate": 1e-05, "loss": 0.9881, "step": 47685 }, { "epoch": 42.24092116917626, "grad_norm": 0.2768782079219818, "learning_rate": 1e-05, "loss": 1.0237, "step": 47690 }, { "epoch": 42.24534986713906, "grad_norm": 0.22221550345420837, "learning_rate": 1e-05, "loss": 0.9675, "step": 47695 }, { "epoch": 42.24977856510186, "grad_norm": 0.2784070074558258, "learning_rate": 1e-05, "loss": 1.0797, "step": 47700 }, { "epoch": 42.25420726306466, "grad_norm": 0.22987860441207886, "learning_rate": 1e-05, "loss": 0.9508, "step": 47705 }, { "epoch": 42.25863596102746, "grad_norm": 0.30230504274368286, "learning_rate": 1e-05, "loss": 1.0465, "step": 47710 }, { "epoch": 42.26306465899026, "grad_norm": 0.22940903902053833, "learning_rate": 1e-05, "loss": 0.9695, "step": 47715 }, { "epoch": 42.267493356953054, "grad_norm": 0.24579615890979767, "learning_rate": 1e-05, "loss": 1.0302, "step": 47720 }, { "epoch": 42.271922054915855, "grad_norm": 0.2738560140132904, "learning_rate": 1e-05, "loss": 1.0234, "step": 47725 }, { "epoch": 42.276350752878656, "grad_norm": 0.2987234592437744, "learning_rate": 1e-05, "loss": 0.9588, "step": 47730 }, { "epoch": 42.28077945084145, "grad_norm": 0.21220698952674866, "learning_rate": 1e-05, "loss": 0.9697, "step": 47735 }, { "epoch": 42.28520814880425, "grad_norm": 0.22221818566322327, "learning_rate": 1e-05, "loss": 0.9798, "step": 47740 }, { "epoch": 42.28963684676705, "grad_norm": 0.22247502207756042, "learning_rate": 1e-05, "loss": 0.9805, "step": 47745 }, { "epoch": 42.29406554472985, "grad_norm": 0.2361268401145935, "learning_rate": 1e-05, "loss": 1.002, "step": 47750 }, { "epoch": 42.29849424269265, "grad_norm": 0.2383842170238495, "learning_rate": 1e-05, "loss": 0.9855, "step": 47755 }, { "epoch": 42.30292294065545, "grad_norm": 0.221732035279274, "learning_rate": 1e-05, "loss": 0.9898, "step": 47760 }, { "epoch": 42.30735163861824, "grad_norm": 0.25873833894729614, "learning_rate": 1e-05, "loss": 1.0272, "step": 47765 }, { "epoch": 42.311780336581045, "grad_norm": 0.21419979631900787, "learning_rate": 1e-05, "loss": 1.0096, "step": 47770 }, { "epoch": 42.316209034543846, "grad_norm": 0.2709173262119293, "learning_rate": 1e-05, "loss": 1.0041, "step": 47775 }, { "epoch": 42.32063773250664, "grad_norm": 0.228935107588768, "learning_rate": 1e-05, "loss": 1.0217, "step": 47780 }, { "epoch": 42.32506643046944, "grad_norm": 0.2365936040878296, "learning_rate": 1e-05, "loss": 0.9582, "step": 47785 }, { "epoch": 42.32949512843224, "grad_norm": 0.22589780390262604, "learning_rate": 1e-05, "loss": 0.9215, "step": 47790 }, { "epoch": 42.333923826395036, "grad_norm": 0.2352641224861145, "learning_rate": 1e-05, "loss": 0.9766, "step": 47795 }, { "epoch": 42.33835252435784, "grad_norm": 0.2637138366699219, "learning_rate": 1e-05, "loss": 0.953, "step": 47800 }, { "epoch": 42.34278122232064, "grad_norm": 0.25251033902168274, "learning_rate": 1e-05, "loss": 1.001, "step": 47805 }, { "epoch": 42.34720992028344, "grad_norm": 0.25087177753448486, "learning_rate": 1e-05, "loss": 1.0158, "step": 47810 }, { "epoch": 42.351638618246234, "grad_norm": 0.26238250732421875, "learning_rate": 1e-05, "loss": 0.9051, "step": 47815 }, { "epoch": 42.356067316209035, "grad_norm": 0.2517799437046051, "learning_rate": 1e-05, "loss": 0.9891, "step": 47820 }, { "epoch": 42.36049601417184, "grad_norm": 0.27053138613700867, "learning_rate": 1e-05, "loss": 0.9495, "step": 47825 }, { "epoch": 42.36492471213463, "grad_norm": 0.25898489356040955, "learning_rate": 1e-05, "loss": 0.9603, "step": 47830 }, { "epoch": 42.36935341009743, "grad_norm": 0.22970426082611084, "learning_rate": 1e-05, "loss": 1.0134, "step": 47835 }, { "epoch": 42.37378210806023, "grad_norm": 0.22901815176010132, "learning_rate": 1e-05, "loss": 0.9683, "step": 47840 }, { "epoch": 42.37821080602303, "grad_norm": 0.2113620787858963, "learning_rate": 1e-05, "loss": 0.9467, "step": 47845 }, { "epoch": 42.38263950398583, "grad_norm": 0.24157191812992096, "learning_rate": 1e-05, "loss": 1.0052, "step": 47850 }, { "epoch": 42.38706820194863, "grad_norm": 0.25221458077430725, "learning_rate": 1e-05, "loss": 0.9751, "step": 47855 }, { "epoch": 42.391496899911424, "grad_norm": 0.22772446274757385, "learning_rate": 1e-05, "loss": 0.9962, "step": 47860 }, { "epoch": 42.395925597874225, "grad_norm": 0.21750780940055847, "learning_rate": 1e-05, "loss": 0.9432, "step": 47865 }, { "epoch": 42.400354295837026, "grad_norm": 0.2313288152217865, "learning_rate": 1e-05, "loss": 0.9803, "step": 47870 }, { "epoch": 42.40478299379982, "grad_norm": 0.26093485951423645, "learning_rate": 1e-05, "loss": 0.9877, "step": 47875 }, { "epoch": 42.40921169176262, "grad_norm": 0.23907415568828583, "learning_rate": 1e-05, "loss": 0.9767, "step": 47880 }, { "epoch": 42.41364038972542, "grad_norm": 0.2607579827308655, "learning_rate": 1e-05, "loss": 0.9256, "step": 47885 }, { "epoch": 42.41806908768822, "grad_norm": 0.22821852564811707, "learning_rate": 1e-05, "loss": 0.9549, "step": 47890 }, { "epoch": 42.42249778565102, "grad_norm": 0.209013432264328, "learning_rate": 1e-05, "loss": 0.966, "step": 47895 }, { "epoch": 42.42692648361382, "grad_norm": 0.22845275700092316, "learning_rate": 1e-05, "loss": 0.9192, "step": 47900 }, { "epoch": 42.43135518157661, "grad_norm": 0.21673406660556793, "learning_rate": 1e-05, "loss": 0.9549, "step": 47905 }, { "epoch": 42.435783879539414, "grad_norm": 0.22812657058238983, "learning_rate": 1e-05, "loss": 0.9709, "step": 47910 }, { "epoch": 42.440212577502216, "grad_norm": 0.239319309592247, "learning_rate": 1e-05, "loss": 0.9905, "step": 47915 }, { "epoch": 42.44464127546502, "grad_norm": 0.21700850129127502, "learning_rate": 1e-05, "loss": 0.9599, "step": 47920 }, { "epoch": 42.44906997342781, "grad_norm": 0.26806920766830444, "learning_rate": 1e-05, "loss": 0.9611, "step": 47925 }, { "epoch": 42.45349867139061, "grad_norm": 0.27256736159324646, "learning_rate": 1e-05, "loss": 1.0132, "step": 47930 }, { "epoch": 42.45792736935341, "grad_norm": 0.23432551324367523, "learning_rate": 1e-05, "loss": 0.9818, "step": 47935 }, { "epoch": 42.46235606731621, "grad_norm": 0.23915445804595947, "learning_rate": 1e-05, "loss": 0.9964, "step": 47940 }, { "epoch": 42.46678476527901, "grad_norm": 0.28664112091064453, "learning_rate": 1e-05, "loss": 0.9846, "step": 47945 }, { "epoch": 42.47121346324181, "grad_norm": 0.22265088558197021, "learning_rate": 1e-05, "loss": 0.9816, "step": 47950 }, { "epoch": 42.475642161204604, "grad_norm": 0.23771323263645172, "learning_rate": 1e-05, "loss": 0.9705, "step": 47955 }, { "epoch": 42.480070859167405, "grad_norm": 0.25379106402397156, "learning_rate": 1e-05, "loss": 0.9888, "step": 47960 }, { "epoch": 42.484499557130206, "grad_norm": 0.2564697861671448, "learning_rate": 1e-05, "loss": 0.9955, "step": 47965 }, { "epoch": 42.488928255093, "grad_norm": 0.2633035182952881, "learning_rate": 1e-05, "loss": 0.9791, "step": 47970 }, { "epoch": 42.4933569530558, "grad_norm": 0.28511014580726624, "learning_rate": 1e-05, "loss": 0.9978, "step": 47975 }, { "epoch": 42.4977856510186, "grad_norm": 0.30362117290496826, "learning_rate": 1e-05, "loss": 0.9452, "step": 47980 }, { "epoch": 42.5022143489814, "grad_norm": 0.2917020618915558, "learning_rate": 1e-05, "loss": 0.9929, "step": 47985 }, { "epoch": 42.5066430469442, "grad_norm": 0.2303413599729538, "learning_rate": 1e-05, "loss": 1.0036, "step": 47990 }, { "epoch": 42.511071744907, "grad_norm": 0.2529556453227997, "learning_rate": 1e-05, "loss": 0.9442, "step": 47995 }, { "epoch": 42.515500442869794, "grad_norm": 0.23653215169906616, "learning_rate": 1e-05, "loss": 0.98, "step": 48000 }, { "epoch": 42.519929140832595, "grad_norm": 0.2300071120262146, "learning_rate": 1e-05, "loss": 0.9675, "step": 48005 }, { "epoch": 42.524357838795396, "grad_norm": 0.24020180106163025, "learning_rate": 1e-05, "loss": 0.9961, "step": 48010 }, { "epoch": 42.52878653675819, "grad_norm": 0.26055529713630676, "learning_rate": 1e-05, "loss": 0.9629, "step": 48015 }, { "epoch": 42.53321523472099, "grad_norm": 0.23629535734653473, "learning_rate": 1e-05, "loss": 0.9079, "step": 48020 }, { "epoch": 42.53764393268379, "grad_norm": 0.22570590674877167, "learning_rate": 1e-05, "loss": 1.0078, "step": 48025 }, { "epoch": 42.54207263064659, "grad_norm": 0.21297363936901093, "learning_rate": 1e-05, "loss": 1.0286, "step": 48030 }, { "epoch": 42.54650132860939, "grad_norm": 0.24435722827911377, "learning_rate": 1e-05, "loss": 1.0166, "step": 48035 }, { "epoch": 42.55093002657219, "grad_norm": 0.21717384457588196, "learning_rate": 1e-05, "loss": 0.9983, "step": 48040 }, { "epoch": 42.55535872453498, "grad_norm": 0.2731130123138428, "learning_rate": 1e-05, "loss": 1.0082, "step": 48045 }, { "epoch": 42.559787422497784, "grad_norm": 0.27486127614974976, "learning_rate": 1e-05, "loss": 0.9964, "step": 48050 }, { "epoch": 42.564216120460586, "grad_norm": 0.24913103878498077, "learning_rate": 1e-05, "loss": 0.9823, "step": 48055 }, { "epoch": 42.56864481842339, "grad_norm": 0.2720363140106201, "learning_rate": 1e-05, "loss": 1.0523, "step": 48060 }, { "epoch": 42.57307351638618, "grad_norm": 0.2424292266368866, "learning_rate": 1e-05, "loss": 0.9904, "step": 48065 }, { "epoch": 42.57750221434898, "grad_norm": 0.26393935084342957, "learning_rate": 1e-05, "loss": 0.9732, "step": 48070 }, { "epoch": 42.58193091231178, "grad_norm": 0.2699756920337677, "learning_rate": 1e-05, "loss": 1.0089, "step": 48075 }, { "epoch": 42.58635961027458, "grad_norm": 0.2601114809513092, "learning_rate": 1e-05, "loss": 0.9698, "step": 48080 }, { "epoch": 42.59078830823738, "grad_norm": 0.2980334460735321, "learning_rate": 1e-05, "loss": 1.0064, "step": 48085 }, { "epoch": 42.59521700620018, "grad_norm": 0.2408342808485031, "learning_rate": 1e-05, "loss": 1.0461, "step": 48090 }, { "epoch": 42.599645704162974, "grad_norm": 0.2292022556066513, "learning_rate": 1e-05, "loss": 1.0064, "step": 48095 }, { "epoch": 42.604074402125775, "grad_norm": 0.2296779453754425, "learning_rate": 1e-05, "loss": 1.0202, "step": 48100 }, { "epoch": 42.608503100088576, "grad_norm": 0.21083290874958038, "learning_rate": 1e-05, "loss": 0.9725, "step": 48105 }, { "epoch": 42.61293179805137, "grad_norm": 0.2390083372592926, "learning_rate": 1e-05, "loss": 0.9815, "step": 48110 }, { "epoch": 42.61736049601417, "grad_norm": 0.23495185375213623, "learning_rate": 1e-05, "loss": 0.9751, "step": 48115 }, { "epoch": 42.62178919397697, "grad_norm": 0.2481905072927475, "learning_rate": 1e-05, "loss": 0.9715, "step": 48120 }, { "epoch": 42.62621789193977, "grad_norm": 0.254972904920578, "learning_rate": 1e-05, "loss": 1.0497, "step": 48125 }, { "epoch": 42.63064658990257, "grad_norm": 0.2681357264518738, "learning_rate": 1e-05, "loss": 0.9415, "step": 48130 }, { "epoch": 42.63507528786537, "grad_norm": 0.23341068625450134, "learning_rate": 1e-05, "loss": 0.9506, "step": 48135 }, { "epoch": 42.63950398582816, "grad_norm": 0.2620530128479004, "learning_rate": 1e-05, "loss": 0.999, "step": 48140 }, { "epoch": 42.643932683790965, "grad_norm": 0.22171328961849213, "learning_rate": 1e-05, "loss": 0.9742, "step": 48145 }, { "epoch": 42.648361381753766, "grad_norm": 0.22144654393196106, "learning_rate": 1e-05, "loss": 0.9488, "step": 48150 }, { "epoch": 42.65279007971656, "grad_norm": 0.3005954921245575, "learning_rate": 1e-05, "loss": 0.9384, "step": 48155 }, { "epoch": 42.65721877767936, "grad_norm": 0.2432592660188675, "learning_rate": 1e-05, "loss": 0.9893, "step": 48160 }, { "epoch": 42.66164747564216, "grad_norm": 0.2259034365415573, "learning_rate": 1e-05, "loss": 0.939, "step": 48165 }, { "epoch": 42.666076173604964, "grad_norm": 0.25215601921081543, "learning_rate": 1e-05, "loss": 0.9965, "step": 48170 }, { "epoch": 42.67050487156776, "grad_norm": 0.2721492350101471, "learning_rate": 1e-05, "loss": 0.9562, "step": 48175 }, { "epoch": 42.67493356953056, "grad_norm": 0.21123404800891876, "learning_rate": 1e-05, "loss": 0.9555, "step": 48180 }, { "epoch": 42.67936226749336, "grad_norm": 0.2151031792163849, "learning_rate": 1e-05, "loss": 0.9873, "step": 48185 }, { "epoch": 42.683790965456154, "grad_norm": 0.22641023993492126, "learning_rate": 1e-05, "loss": 0.95, "step": 48190 }, { "epoch": 42.688219663418955, "grad_norm": 0.3041313588619232, "learning_rate": 1e-05, "loss": 0.9374, "step": 48195 }, { "epoch": 42.69264836138176, "grad_norm": 0.24665305018424988, "learning_rate": 1e-05, "loss": 0.9251, "step": 48200 }, { "epoch": 42.69707705934455, "grad_norm": 0.27113863825798035, "learning_rate": 1e-05, "loss": 0.9706, "step": 48205 }, { "epoch": 42.70150575730735, "grad_norm": 0.24964125454425812, "learning_rate": 1e-05, "loss": 0.9436, "step": 48210 }, { "epoch": 42.70593445527015, "grad_norm": 0.24823129177093506, "learning_rate": 1e-05, "loss": 0.9606, "step": 48215 }, { "epoch": 42.71036315323295, "grad_norm": 0.24530290067195892, "learning_rate": 1e-05, "loss": 0.9741, "step": 48220 }, { "epoch": 42.71479185119575, "grad_norm": 0.28934958577156067, "learning_rate": 1e-05, "loss": 0.9636, "step": 48225 }, { "epoch": 42.71922054915855, "grad_norm": 0.24014613032341003, "learning_rate": 1e-05, "loss": 0.9548, "step": 48230 }, { "epoch": 42.723649247121344, "grad_norm": 0.27102765440940857, "learning_rate": 1e-05, "loss": 0.9508, "step": 48235 }, { "epoch": 42.728077945084145, "grad_norm": 0.2744861841201782, "learning_rate": 1e-05, "loss": 1.001, "step": 48240 }, { "epoch": 42.732506643046946, "grad_norm": 0.23346906900405884, "learning_rate": 1e-05, "loss": 1.0093, "step": 48245 }, { "epoch": 42.73693534100974, "grad_norm": 0.2375788688659668, "learning_rate": 1e-05, "loss": 0.8964, "step": 48250 }, { "epoch": 42.74136403897254, "grad_norm": 0.25419747829437256, "learning_rate": 1e-05, "loss": 0.9977, "step": 48255 }, { "epoch": 42.74579273693534, "grad_norm": 0.339944988489151, "learning_rate": 1e-05, "loss": 0.951, "step": 48260 }, { "epoch": 42.75022143489814, "grad_norm": 0.26090502738952637, "learning_rate": 1e-05, "loss": 0.9642, "step": 48265 }, { "epoch": 42.75465013286094, "grad_norm": 0.2445857673883438, "learning_rate": 1e-05, "loss": 1.0115, "step": 48270 }, { "epoch": 42.75907883082374, "grad_norm": 0.25257250666618347, "learning_rate": 1e-05, "loss": 0.9834, "step": 48275 }, { "epoch": 42.76350752878653, "grad_norm": 0.26221832633018494, "learning_rate": 1e-05, "loss": 1.0031, "step": 48280 }, { "epoch": 42.767936226749335, "grad_norm": 0.2826215624809265, "learning_rate": 1e-05, "loss": 0.9925, "step": 48285 }, { "epoch": 42.772364924712136, "grad_norm": 0.24633489549160004, "learning_rate": 1e-05, "loss": 1.0043, "step": 48290 }, { "epoch": 42.77679362267494, "grad_norm": 0.2523399889469147, "learning_rate": 1e-05, "loss": 0.9374, "step": 48295 }, { "epoch": 42.78122232063773, "grad_norm": 0.23993588984012604, "learning_rate": 1e-05, "loss": 1.0033, "step": 48300 }, { "epoch": 42.78565101860053, "grad_norm": 0.24645495414733887, "learning_rate": 1e-05, "loss": 0.9957, "step": 48305 }, { "epoch": 42.79007971656333, "grad_norm": 0.2708395719528198, "learning_rate": 1e-05, "loss": 0.938, "step": 48310 }, { "epoch": 42.79450841452613, "grad_norm": 0.24099871516227722, "learning_rate": 1e-05, "loss": 0.9403, "step": 48315 }, { "epoch": 42.79893711248893, "grad_norm": 0.25115475058555603, "learning_rate": 1e-05, "loss": 0.9884, "step": 48320 }, { "epoch": 42.80336581045173, "grad_norm": 0.25144869089126587, "learning_rate": 1e-05, "loss": 0.9546, "step": 48325 }, { "epoch": 42.807794508414524, "grad_norm": 0.21381622552871704, "learning_rate": 1e-05, "loss": 0.9789, "step": 48330 }, { "epoch": 42.812223206377325, "grad_norm": 0.23303286731243134, "learning_rate": 1e-05, "loss": 0.9536, "step": 48335 }, { "epoch": 42.81665190434013, "grad_norm": 0.19767539203166962, "learning_rate": 1e-05, "loss": 0.9542, "step": 48340 }, { "epoch": 42.82108060230292, "grad_norm": 0.29785776138305664, "learning_rate": 1e-05, "loss": 1.0142, "step": 48345 }, { "epoch": 42.82550930026572, "grad_norm": 0.23110856115818024, "learning_rate": 1e-05, "loss": 0.883, "step": 48350 }, { "epoch": 42.82993799822852, "grad_norm": 0.27621594071388245, "learning_rate": 1e-05, "loss": 0.9986, "step": 48355 }, { "epoch": 42.83436669619132, "grad_norm": 0.2695445120334625, "learning_rate": 1e-05, "loss": 0.9744, "step": 48360 }, { "epoch": 42.83879539415412, "grad_norm": 0.25764527916908264, "learning_rate": 1e-05, "loss": 0.9601, "step": 48365 }, { "epoch": 42.84322409211692, "grad_norm": 0.2536025941371918, "learning_rate": 1e-05, "loss": 0.9321, "step": 48370 }, { "epoch": 42.847652790079714, "grad_norm": 0.28541821241378784, "learning_rate": 1e-05, "loss": 1.0083, "step": 48375 }, { "epoch": 42.852081488042515, "grad_norm": 0.24982883036136627, "learning_rate": 1e-05, "loss": 0.9587, "step": 48380 }, { "epoch": 42.856510186005316, "grad_norm": 0.23558664321899414, "learning_rate": 1e-05, "loss": 1.0075, "step": 48385 }, { "epoch": 42.86093888396811, "grad_norm": 0.24839410185813904, "learning_rate": 1e-05, "loss": 0.9366, "step": 48390 }, { "epoch": 42.86536758193091, "grad_norm": 0.2402290403842926, "learning_rate": 1e-05, "loss": 0.9487, "step": 48395 }, { "epoch": 42.86979627989371, "grad_norm": 0.21183155477046967, "learning_rate": 1e-05, "loss": 1.008, "step": 48400 }, { "epoch": 42.87422497785651, "grad_norm": 0.21802331507205963, "learning_rate": 1e-05, "loss": 0.9933, "step": 48405 }, { "epoch": 42.87865367581931, "grad_norm": 0.19899477064609528, "learning_rate": 1e-05, "loss": 1.0564, "step": 48410 }, { "epoch": 42.88308237378211, "grad_norm": 0.25269243121147156, "learning_rate": 1e-05, "loss": 0.9756, "step": 48415 }, { "epoch": 42.88751107174491, "grad_norm": 0.25537973642349243, "learning_rate": 1e-05, "loss": 0.9643, "step": 48420 }, { "epoch": 42.891939769707704, "grad_norm": 0.21263942122459412, "learning_rate": 1e-05, "loss": 0.9756, "step": 48425 }, { "epoch": 42.896368467670506, "grad_norm": 0.22235079109668732, "learning_rate": 1e-05, "loss": 0.9816, "step": 48430 }, { "epoch": 42.90079716563331, "grad_norm": 0.22799000144004822, "learning_rate": 1e-05, "loss": 1.0098, "step": 48435 }, { "epoch": 42.9052258635961, "grad_norm": 0.21052277088165283, "learning_rate": 1e-05, "loss": 0.9361, "step": 48440 }, { "epoch": 42.9096545615589, "grad_norm": 0.31826117634773254, "learning_rate": 1e-05, "loss": 0.9988, "step": 48445 }, { "epoch": 42.9140832595217, "grad_norm": 0.2638929486274719, "learning_rate": 1e-05, "loss": 0.9846, "step": 48450 }, { "epoch": 42.9185119574845, "grad_norm": 0.2635389268398285, "learning_rate": 1e-05, "loss": 0.9416, "step": 48455 }, { "epoch": 42.9229406554473, "grad_norm": 0.22469651699066162, "learning_rate": 1e-05, "loss": 1.0448, "step": 48460 }, { "epoch": 42.9273693534101, "grad_norm": 0.2392352670431137, "learning_rate": 1e-05, "loss": 0.9863, "step": 48465 }, { "epoch": 42.931798051372894, "grad_norm": 0.2513946294784546, "learning_rate": 1e-05, "loss": 1.009, "step": 48470 }, { "epoch": 42.936226749335695, "grad_norm": 0.2198619693517685, "learning_rate": 1e-05, "loss": 0.9838, "step": 48475 }, { "epoch": 42.9406554472985, "grad_norm": 0.3263920843601227, "learning_rate": 1e-05, "loss": 0.9521, "step": 48480 }, { "epoch": 42.94508414526129, "grad_norm": 0.26297903060913086, "learning_rate": 1e-05, "loss": 0.9988, "step": 48485 }, { "epoch": 42.94951284322409, "grad_norm": 0.23898695409297943, "learning_rate": 1e-05, "loss": 0.9481, "step": 48490 }, { "epoch": 42.95394154118689, "grad_norm": 0.2855616807937622, "learning_rate": 1e-05, "loss": 0.9402, "step": 48495 }, { "epoch": 42.95837023914969, "grad_norm": 0.26662975549697876, "learning_rate": 1e-05, "loss": 0.9831, "step": 48500 }, { "epoch": 42.96279893711249, "grad_norm": 0.22201858460903168, "learning_rate": 1e-05, "loss": 0.9562, "step": 48505 }, { "epoch": 42.96722763507529, "grad_norm": 0.318624347448349, "learning_rate": 1e-05, "loss": 0.9733, "step": 48510 }, { "epoch": 42.971656333038084, "grad_norm": 0.254317969083786, "learning_rate": 1e-05, "loss": 0.9975, "step": 48515 }, { "epoch": 42.976085031000885, "grad_norm": 0.2682293653488159, "learning_rate": 1e-05, "loss": 0.9979, "step": 48520 }, { "epoch": 42.980513728963686, "grad_norm": 0.22450757026672363, "learning_rate": 1e-05, "loss": 0.9647, "step": 48525 }, { "epoch": 42.98494242692648, "grad_norm": 0.23072472214698792, "learning_rate": 1e-05, "loss": 0.9921, "step": 48530 }, { "epoch": 42.98937112488928, "grad_norm": 0.2410205453634262, "learning_rate": 1e-05, "loss": 0.894, "step": 48535 }, { "epoch": 42.99379982285208, "grad_norm": 0.21451111137866974, "learning_rate": 1e-05, "loss": 0.9847, "step": 48540 }, { "epoch": 42.998228520814884, "grad_norm": 0.2471478134393692, "learning_rate": 1e-05, "loss": 0.9689, "step": 48545 }, { "epoch": 43.00265721877768, "grad_norm": 0.24021108448505402, "learning_rate": 1e-05, "loss": 0.9397, "step": 48550 }, { "epoch": 43.00708591674048, "grad_norm": 0.2044842541217804, "learning_rate": 1e-05, "loss": 0.9772, "step": 48555 }, { "epoch": 43.01151461470328, "grad_norm": 0.2626218795776367, "learning_rate": 1e-05, "loss": 0.9674, "step": 48560 }, { "epoch": 43.015943312666074, "grad_norm": 0.19783316552639008, "learning_rate": 1e-05, "loss": 0.9755, "step": 48565 }, { "epoch": 43.020372010628876, "grad_norm": 0.23617658019065857, "learning_rate": 1e-05, "loss": 0.9867, "step": 48570 }, { "epoch": 43.02480070859168, "grad_norm": 0.22546809911727905, "learning_rate": 1e-05, "loss": 0.953, "step": 48575 }, { "epoch": 43.02922940655447, "grad_norm": 0.23770242929458618, "learning_rate": 1e-05, "loss": 1.0435, "step": 48580 }, { "epoch": 43.03365810451727, "grad_norm": 0.2602790594100952, "learning_rate": 1e-05, "loss": 1.0056, "step": 48585 }, { "epoch": 43.03808680248007, "grad_norm": 0.22907041013240814, "learning_rate": 1e-05, "loss": 1.0221, "step": 48590 }, { "epoch": 43.04251550044287, "grad_norm": 0.25333258509635925, "learning_rate": 1e-05, "loss": 0.99, "step": 48595 }, { "epoch": 43.04694419840567, "grad_norm": 0.2384660691022873, "learning_rate": 1e-05, "loss": 0.9595, "step": 48600 }, { "epoch": 43.05137289636847, "grad_norm": 0.2633801996707916, "learning_rate": 1e-05, "loss": 1.0074, "step": 48605 }, { "epoch": 43.055801594331264, "grad_norm": 0.2746524214744568, "learning_rate": 1e-05, "loss": 0.9398, "step": 48610 }, { "epoch": 43.060230292294065, "grad_norm": 0.30926743149757385, "learning_rate": 1e-05, "loss": 0.9539, "step": 48615 }, { "epoch": 43.064658990256866, "grad_norm": 0.24673625826835632, "learning_rate": 1e-05, "loss": 0.9522, "step": 48620 }, { "epoch": 43.06908768821966, "grad_norm": 0.24810610711574554, "learning_rate": 1e-05, "loss": 0.9724, "step": 48625 }, { "epoch": 43.07351638618246, "grad_norm": 0.2859072685241699, "learning_rate": 1e-05, "loss": 0.9684, "step": 48630 }, { "epoch": 43.07794508414526, "grad_norm": 0.2518060505390167, "learning_rate": 1e-05, "loss": 0.978, "step": 48635 }, { "epoch": 43.08237378210806, "grad_norm": 0.26977333426475525, "learning_rate": 1e-05, "loss": 0.9812, "step": 48640 }, { "epoch": 43.08680248007086, "grad_norm": 0.26873618364334106, "learning_rate": 1e-05, "loss": 0.9955, "step": 48645 }, { "epoch": 43.09123117803366, "grad_norm": 0.23334825038909912, "learning_rate": 1e-05, "loss": 0.9771, "step": 48650 }, { "epoch": 43.09565987599645, "grad_norm": 0.24947552382946014, "learning_rate": 1e-05, "loss": 0.904, "step": 48655 }, { "epoch": 43.100088573959255, "grad_norm": 0.2441912442445755, "learning_rate": 1e-05, "loss": 1.0088, "step": 48660 }, { "epoch": 43.104517271922056, "grad_norm": 0.26713937520980835, "learning_rate": 1e-05, "loss": 0.9502, "step": 48665 }, { "epoch": 43.10894596988486, "grad_norm": 0.23476269841194153, "learning_rate": 1e-05, "loss": 0.9637, "step": 48670 }, { "epoch": 43.11337466784765, "grad_norm": 0.22453244030475616, "learning_rate": 1e-05, "loss": 0.9801, "step": 48675 }, { "epoch": 43.11780336581045, "grad_norm": 0.2722662687301636, "learning_rate": 1e-05, "loss": 1.0173, "step": 48680 }, { "epoch": 43.122232063773254, "grad_norm": 0.24403497576713562, "learning_rate": 1e-05, "loss": 0.9495, "step": 48685 }, { "epoch": 43.12666076173605, "grad_norm": 0.23619729280471802, "learning_rate": 1e-05, "loss": 0.9375, "step": 48690 }, { "epoch": 43.13108945969885, "grad_norm": 0.2588210105895996, "learning_rate": 1e-05, "loss": 0.9967, "step": 48695 }, { "epoch": 43.13551815766165, "grad_norm": 0.2949660122394562, "learning_rate": 1e-05, "loss": 0.9967, "step": 48700 }, { "epoch": 43.139946855624444, "grad_norm": 0.2258627861738205, "learning_rate": 1e-05, "loss": 0.967, "step": 48705 }, { "epoch": 43.144375553587246, "grad_norm": 0.2544408142566681, "learning_rate": 1e-05, "loss": 0.9628, "step": 48710 }, { "epoch": 43.14880425155005, "grad_norm": 0.24393408000469208, "learning_rate": 1e-05, "loss": 0.9899, "step": 48715 }, { "epoch": 43.15323294951284, "grad_norm": 0.2392231673002243, "learning_rate": 1e-05, "loss": 0.9766, "step": 48720 }, { "epoch": 43.15766164747564, "grad_norm": 0.23040099442005157, "learning_rate": 1e-05, "loss": 1.0032, "step": 48725 }, { "epoch": 43.16209034543844, "grad_norm": 0.2195800244808197, "learning_rate": 1e-05, "loss": 0.9996, "step": 48730 }, { "epoch": 43.16651904340124, "grad_norm": 0.22495730221271515, "learning_rate": 1e-05, "loss": 1.0008, "step": 48735 }, { "epoch": 43.17094774136404, "grad_norm": 0.2142936736345291, "learning_rate": 1e-05, "loss": 1.0015, "step": 48740 }, { "epoch": 43.17537643932684, "grad_norm": 0.21201835572719574, "learning_rate": 1e-05, "loss": 0.9768, "step": 48745 }, { "epoch": 43.179805137289634, "grad_norm": 0.2537386119365692, "learning_rate": 1e-05, "loss": 1.0184, "step": 48750 }, { "epoch": 43.184233835252435, "grad_norm": 0.24516579508781433, "learning_rate": 1e-05, "loss": 1.0151, "step": 48755 }, { "epoch": 43.188662533215236, "grad_norm": 0.22724126279354095, "learning_rate": 1e-05, "loss": 0.9212, "step": 48760 }, { "epoch": 43.19309123117803, "grad_norm": 0.2207074910402298, "learning_rate": 1e-05, "loss": 0.9621, "step": 48765 }, { "epoch": 43.19751992914083, "grad_norm": 0.23460142314434052, "learning_rate": 1e-05, "loss": 0.9666, "step": 48770 }, { "epoch": 43.20194862710363, "grad_norm": 0.27136340737342834, "learning_rate": 1e-05, "loss": 0.9997, "step": 48775 }, { "epoch": 43.20637732506643, "grad_norm": 0.241058811545372, "learning_rate": 1e-05, "loss": 0.9584, "step": 48780 }, { "epoch": 43.21080602302923, "grad_norm": 0.2589196562767029, "learning_rate": 1e-05, "loss": 1.0185, "step": 48785 }, { "epoch": 43.21523472099203, "grad_norm": 0.221303790807724, "learning_rate": 1e-05, "loss": 0.9937, "step": 48790 }, { "epoch": 43.21966341895483, "grad_norm": 0.21532781422138214, "learning_rate": 1e-05, "loss": 0.9729, "step": 48795 }, { "epoch": 43.224092116917625, "grad_norm": 0.2571173906326294, "learning_rate": 1e-05, "loss": 0.9938, "step": 48800 }, { "epoch": 43.228520814880426, "grad_norm": 0.24057568609714508, "learning_rate": 1e-05, "loss": 0.9216, "step": 48805 }, { "epoch": 43.23294951284323, "grad_norm": 0.25805070996284485, "learning_rate": 1e-05, "loss": 0.9897, "step": 48810 }, { "epoch": 43.23737821080602, "grad_norm": 0.2347421497106552, "learning_rate": 1e-05, "loss": 1.0071, "step": 48815 }, { "epoch": 43.24180690876882, "grad_norm": 0.2759992182254791, "learning_rate": 1e-05, "loss": 1.0187, "step": 48820 }, { "epoch": 43.246235606731624, "grad_norm": 0.23638631403446198, "learning_rate": 1e-05, "loss": 0.9877, "step": 48825 }, { "epoch": 43.25066430469442, "grad_norm": 0.29003486037254333, "learning_rate": 1e-05, "loss": 1.0082, "step": 48830 }, { "epoch": 43.25509300265722, "grad_norm": 0.2998713552951813, "learning_rate": 1e-05, "loss": 0.9953, "step": 48835 }, { "epoch": 43.25952170062002, "grad_norm": 0.23364496231079102, "learning_rate": 1e-05, "loss": 0.9629, "step": 48840 }, { "epoch": 43.263950398582814, "grad_norm": 0.3096825182437897, "learning_rate": 1e-05, "loss": 0.985, "step": 48845 }, { "epoch": 43.268379096545615, "grad_norm": 0.20321637392044067, "learning_rate": 1e-05, "loss": 1.0081, "step": 48850 }, { "epoch": 43.27280779450842, "grad_norm": 0.2535122334957123, "learning_rate": 1e-05, "loss": 0.9845, "step": 48855 }, { "epoch": 43.27723649247121, "grad_norm": 0.29232820868492126, "learning_rate": 1e-05, "loss": 0.9731, "step": 48860 }, { "epoch": 43.28166519043401, "grad_norm": 0.2545989453792572, "learning_rate": 1e-05, "loss": 1.0264, "step": 48865 }, { "epoch": 43.28609388839681, "grad_norm": 0.2291771024465561, "learning_rate": 1e-05, "loss": 1.0075, "step": 48870 }, { "epoch": 43.29052258635961, "grad_norm": 0.23977535963058472, "learning_rate": 1e-05, "loss": 1.0148, "step": 48875 }, { "epoch": 43.29495128432241, "grad_norm": 0.22587409615516663, "learning_rate": 1e-05, "loss": 0.9942, "step": 48880 }, { "epoch": 43.29937998228521, "grad_norm": 0.25296398997306824, "learning_rate": 1e-05, "loss": 0.9302, "step": 48885 }, { "epoch": 43.303808680248004, "grad_norm": 0.23332521319389343, "learning_rate": 1e-05, "loss": 0.9833, "step": 48890 }, { "epoch": 43.308237378210805, "grad_norm": 0.2421172559261322, "learning_rate": 1e-05, "loss": 1.013, "step": 48895 }, { "epoch": 43.312666076173606, "grad_norm": 0.2455790489912033, "learning_rate": 1e-05, "loss": 0.9326, "step": 48900 }, { "epoch": 43.31709477413641, "grad_norm": 0.25551503896713257, "learning_rate": 1e-05, "loss": 0.9596, "step": 48905 }, { "epoch": 43.3215234720992, "grad_norm": 0.23959125578403473, "learning_rate": 1e-05, "loss": 0.9714, "step": 48910 }, { "epoch": 43.325952170062, "grad_norm": 0.25829899311065674, "learning_rate": 1e-05, "loss": 0.9469, "step": 48915 }, { "epoch": 43.330380868024804, "grad_norm": 0.25805699825286865, "learning_rate": 1e-05, "loss": 0.9113, "step": 48920 }, { "epoch": 43.3348095659876, "grad_norm": 0.28306129574775696, "learning_rate": 1e-05, "loss": 0.9674, "step": 48925 }, { "epoch": 43.3392382639504, "grad_norm": 0.2557009160518646, "learning_rate": 1e-05, "loss": 0.9676, "step": 48930 }, { "epoch": 43.3436669619132, "grad_norm": 0.21177564561367035, "learning_rate": 1e-05, "loss": 1.015, "step": 48935 }, { "epoch": 43.348095659875995, "grad_norm": 0.27018865942955017, "learning_rate": 1e-05, "loss": 1.0131, "step": 48940 }, { "epoch": 43.352524357838796, "grad_norm": 0.2154812514781952, "learning_rate": 1e-05, "loss": 0.9699, "step": 48945 }, { "epoch": 43.3569530558016, "grad_norm": 0.27615541219711304, "learning_rate": 1e-05, "loss": 0.998, "step": 48950 }, { "epoch": 43.36138175376439, "grad_norm": 0.2018997073173523, "learning_rate": 1e-05, "loss": 1.0089, "step": 48955 }, { "epoch": 43.36581045172719, "grad_norm": 0.22869576513767242, "learning_rate": 1e-05, "loss": 0.971, "step": 48960 }, { "epoch": 43.37023914968999, "grad_norm": 0.24255912005901337, "learning_rate": 1e-05, "loss": 0.9708, "step": 48965 }, { "epoch": 43.37466784765279, "grad_norm": 0.2782955765724182, "learning_rate": 1e-05, "loss": 0.9598, "step": 48970 }, { "epoch": 43.37909654561559, "grad_norm": 0.24312376976013184, "learning_rate": 1e-05, "loss": 0.9862, "step": 48975 }, { "epoch": 43.38352524357839, "grad_norm": 0.213237002491951, "learning_rate": 1e-05, "loss": 1.0081, "step": 48980 }, { "epoch": 43.387953941541184, "grad_norm": 0.2938636839389801, "learning_rate": 1e-05, "loss": 1.0027, "step": 48985 }, { "epoch": 43.392382639503985, "grad_norm": 0.21172958612442017, "learning_rate": 1e-05, "loss": 0.9496, "step": 48990 }, { "epoch": 43.39681133746679, "grad_norm": 0.27214258909225464, "learning_rate": 1e-05, "loss": 0.9999, "step": 48995 }, { "epoch": 43.40124003542958, "grad_norm": 0.26839783787727356, "learning_rate": 1e-05, "loss": 1.0364, "step": 49000 }, { "epoch": 43.40566873339238, "grad_norm": 0.2545689046382904, "learning_rate": 1e-05, "loss": 0.9817, "step": 49005 }, { "epoch": 43.41009743135518, "grad_norm": 0.2299758344888687, "learning_rate": 1e-05, "loss": 0.9759, "step": 49010 }, { "epoch": 43.41452612931798, "grad_norm": 0.25038138031959534, "learning_rate": 1e-05, "loss": 0.9928, "step": 49015 }, { "epoch": 43.41895482728078, "grad_norm": 0.230449840426445, "learning_rate": 1e-05, "loss": 0.958, "step": 49020 }, { "epoch": 43.42338352524358, "grad_norm": 0.23532941937446594, "learning_rate": 1e-05, "loss": 0.9365, "step": 49025 }, { "epoch": 43.42781222320638, "grad_norm": 0.21966420114040375, "learning_rate": 1e-05, "loss": 0.9942, "step": 49030 }, { "epoch": 43.432240921169175, "grad_norm": 0.2668024003505707, "learning_rate": 1e-05, "loss": 0.9845, "step": 49035 }, { "epoch": 43.436669619131976, "grad_norm": 0.253147691488266, "learning_rate": 1e-05, "loss": 0.9765, "step": 49040 }, { "epoch": 43.44109831709478, "grad_norm": 0.22405873239040375, "learning_rate": 1e-05, "loss": 0.9796, "step": 49045 }, { "epoch": 43.44552701505757, "grad_norm": 0.2223701924085617, "learning_rate": 1e-05, "loss": 0.9754, "step": 49050 }, { "epoch": 43.44995571302037, "grad_norm": 0.24943919479846954, "learning_rate": 1e-05, "loss": 0.9504, "step": 49055 }, { "epoch": 43.454384410983174, "grad_norm": 0.20813694596290588, "learning_rate": 1e-05, "loss": 1.0062, "step": 49060 }, { "epoch": 43.45881310894597, "grad_norm": 0.2050868570804596, "learning_rate": 1e-05, "loss": 0.9608, "step": 49065 }, { "epoch": 43.46324180690877, "grad_norm": 0.2796988785266876, "learning_rate": 1e-05, "loss": 0.9156, "step": 49070 }, { "epoch": 43.46767050487157, "grad_norm": 0.24679169058799744, "learning_rate": 1e-05, "loss": 0.9652, "step": 49075 }, { "epoch": 43.472099202834364, "grad_norm": 0.27094992995262146, "learning_rate": 1e-05, "loss": 0.8916, "step": 49080 }, { "epoch": 43.476527900797166, "grad_norm": 0.23632866144180298, "learning_rate": 1e-05, "loss": 0.9406, "step": 49085 }, { "epoch": 43.48095659875997, "grad_norm": 0.2382800132036209, "learning_rate": 1e-05, "loss": 0.9843, "step": 49090 }, { "epoch": 43.48538529672276, "grad_norm": 0.2205362170934677, "learning_rate": 1e-05, "loss": 0.9245, "step": 49095 }, { "epoch": 43.48981399468556, "grad_norm": 0.3077715039253235, "learning_rate": 1e-05, "loss": 0.9732, "step": 49100 }, { "epoch": 43.49424269264836, "grad_norm": 0.24289454519748688, "learning_rate": 1e-05, "loss": 0.9328, "step": 49105 }, { "epoch": 43.49867139061116, "grad_norm": 0.27689915895462036, "learning_rate": 1e-05, "loss": 0.9945, "step": 49110 }, { "epoch": 43.50310008857396, "grad_norm": 0.2686055302619934, "learning_rate": 1e-05, "loss": 0.9581, "step": 49115 }, { "epoch": 43.50752878653676, "grad_norm": 0.2293749451637268, "learning_rate": 1e-05, "loss": 0.923, "step": 49120 }, { "epoch": 43.511957484499554, "grad_norm": 0.21473269164562225, "learning_rate": 1e-05, "loss": 0.9685, "step": 49125 }, { "epoch": 43.516386182462355, "grad_norm": 0.21567752957344055, "learning_rate": 1e-05, "loss": 1.0146, "step": 49130 }, { "epoch": 43.520814880425156, "grad_norm": 0.23887348175048828, "learning_rate": 1e-05, "loss": 1.015, "step": 49135 }, { "epoch": 43.52524357838795, "grad_norm": 0.2501644790172577, "learning_rate": 1e-05, "loss": 0.9698, "step": 49140 }, { "epoch": 43.52967227635075, "grad_norm": 0.2574361264705658, "learning_rate": 1e-05, "loss": 0.999, "step": 49145 }, { "epoch": 43.53410097431355, "grad_norm": 0.2448074221611023, "learning_rate": 1e-05, "loss": 1.0149, "step": 49150 }, { "epoch": 43.538529672276354, "grad_norm": 0.26575735211372375, "learning_rate": 1e-05, "loss": 1.0274, "step": 49155 }, { "epoch": 43.54295837023915, "grad_norm": 0.23696841299533844, "learning_rate": 1e-05, "loss": 0.9917, "step": 49160 }, { "epoch": 43.54738706820195, "grad_norm": 0.2419581562280655, "learning_rate": 1e-05, "loss": 1.0006, "step": 49165 }, { "epoch": 43.55181576616475, "grad_norm": 0.2198808342218399, "learning_rate": 1e-05, "loss": 0.9765, "step": 49170 }, { "epoch": 43.556244464127545, "grad_norm": 0.24984802305698395, "learning_rate": 1e-05, "loss": 0.9449, "step": 49175 }, { "epoch": 43.560673162090346, "grad_norm": 0.23892787098884583, "learning_rate": 1e-05, "loss": 0.9422, "step": 49180 }, { "epoch": 43.56510186005315, "grad_norm": 0.2153821438550949, "learning_rate": 1e-05, "loss": 1.0395, "step": 49185 }, { "epoch": 43.56953055801594, "grad_norm": 0.2657289206981659, "learning_rate": 1e-05, "loss": 0.9597, "step": 49190 }, { "epoch": 43.57395925597874, "grad_norm": 0.2194409966468811, "learning_rate": 1e-05, "loss": 0.9899, "step": 49195 }, { "epoch": 43.578387953941544, "grad_norm": 0.2293374240398407, "learning_rate": 1e-05, "loss": 0.9462, "step": 49200 }, { "epoch": 43.58281665190434, "grad_norm": 0.24256011843681335, "learning_rate": 1e-05, "loss": 0.9529, "step": 49205 }, { "epoch": 43.58724534986714, "grad_norm": 0.27824878692626953, "learning_rate": 1e-05, "loss": 0.9873, "step": 49210 }, { "epoch": 43.59167404782994, "grad_norm": 0.2267044335603714, "learning_rate": 1e-05, "loss": 0.9514, "step": 49215 }, { "epoch": 43.596102745792734, "grad_norm": 0.2308742105960846, "learning_rate": 1e-05, "loss": 1.0285, "step": 49220 }, { "epoch": 43.600531443755536, "grad_norm": 0.23583976924419403, "learning_rate": 1e-05, "loss": 1.0095, "step": 49225 }, { "epoch": 43.60496014171834, "grad_norm": 0.266474187374115, "learning_rate": 1e-05, "loss": 0.9204, "step": 49230 }, { "epoch": 43.60938883968113, "grad_norm": 0.2643164396286011, "learning_rate": 1e-05, "loss": 0.9993, "step": 49235 }, { "epoch": 43.61381753764393, "grad_norm": 0.23752117156982422, "learning_rate": 1e-05, "loss": 1.0172, "step": 49240 }, { "epoch": 43.61824623560673, "grad_norm": 0.27046123147010803, "learning_rate": 1e-05, "loss": 0.9749, "step": 49245 }, { "epoch": 43.62267493356953, "grad_norm": 0.21744512021541595, "learning_rate": 1e-05, "loss": 0.9668, "step": 49250 }, { "epoch": 43.62710363153233, "grad_norm": 0.2618338465690613, "learning_rate": 1e-05, "loss": 0.9869, "step": 49255 }, { "epoch": 43.63153232949513, "grad_norm": 0.28749337792396545, "learning_rate": 1e-05, "loss": 0.9178, "step": 49260 }, { "epoch": 43.635961027457924, "grad_norm": 0.22716765105724335, "learning_rate": 1e-05, "loss": 0.9835, "step": 49265 }, { "epoch": 43.640389725420725, "grad_norm": 0.20710992813110352, "learning_rate": 1e-05, "loss": 0.9903, "step": 49270 }, { "epoch": 43.644818423383526, "grad_norm": 0.24519795179367065, "learning_rate": 1e-05, "loss": 0.9749, "step": 49275 }, { "epoch": 43.64924712134633, "grad_norm": 0.23651088774204254, "learning_rate": 1e-05, "loss": 0.9723, "step": 49280 }, { "epoch": 43.65367581930912, "grad_norm": 0.23251208662986755, "learning_rate": 1e-05, "loss": 1.016, "step": 49285 }, { "epoch": 43.65810451727192, "grad_norm": 0.2389114499092102, "learning_rate": 1e-05, "loss": 0.9535, "step": 49290 }, { "epoch": 43.662533215234724, "grad_norm": 0.25229859352111816, "learning_rate": 1e-05, "loss": 0.9604, "step": 49295 }, { "epoch": 43.66696191319752, "grad_norm": 0.2183249592781067, "learning_rate": 1e-05, "loss": 1.0296, "step": 49300 }, { "epoch": 43.67139061116032, "grad_norm": 0.24305486679077148, "learning_rate": 1e-05, "loss": 0.9521, "step": 49305 }, { "epoch": 43.67581930912312, "grad_norm": 0.26113682985305786, "learning_rate": 1e-05, "loss": 0.9765, "step": 49310 }, { "epoch": 43.680248007085915, "grad_norm": 0.21981877088546753, "learning_rate": 1e-05, "loss": 0.9714, "step": 49315 }, { "epoch": 43.684676705048716, "grad_norm": 0.23341485857963562, "learning_rate": 1e-05, "loss": 0.9385, "step": 49320 }, { "epoch": 43.68910540301152, "grad_norm": 0.2075640857219696, "learning_rate": 1e-05, "loss": 0.9971, "step": 49325 }, { "epoch": 43.69353410097431, "grad_norm": 0.18894685804843903, "learning_rate": 1e-05, "loss": 0.9828, "step": 49330 }, { "epoch": 43.69796279893711, "grad_norm": 0.2324332296848297, "learning_rate": 1e-05, "loss": 0.9095, "step": 49335 }, { "epoch": 43.702391496899914, "grad_norm": 0.22342362999916077, "learning_rate": 1e-05, "loss": 0.9536, "step": 49340 }, { "epoch": 43.70682019486271, "grad_norm": 0.2466936856508255, "learning_rate": 1e-05, "loss": 0.9929, "step": 49345 }, { "epoch": 43.71124889282551, "grad_norm": 0.27215689420700073, "learning_rate": 1e-05, "loss": 1.0029, "step": 49350 }, { "epoch": 43.71567759078831, "grad_norm": 0.30186519026756287, "learning_rate": 1e-05, "loss": 0.9702, "step": 49355 }, { "epoch": 43.720106288751104, "grad_norm": 0.24005964398384094, "learning_rate": 1e-05, "loss": 0.9366, "step": 49360 }, { "epoch": 43.724534986713905, "grad_norm": 0.279868483543396, "learning_rate": 1e-05, "loss": 0.9593, "step": 49365 }, { "epoch": 43.72896368467671, "grad_norm": 0.21704097092151642, "learning_rate": 1e-05, "loss": 0.9542, "step": 49370 }, { "epoch": 43.7333923826395, "grad_norm": 0.23223647475242615, "learning_rate": 1e-05, "loss": 0.9634, "step": 49375 }, { "epoch": 43.7378210806023, "grad_norm": 0.2283603549003601, "learning_rate": 1e-05, "loss": 1.019, "step": 49380 }, { "epoch": 43.7422497785651, "grad_norm": 0.25047197937965393, "learning_rate": 1e-05, "loss": 0.9751, "step": 49385 }, { "epoch": 43.7466784765279, "grad_norm": 0.23389460146427155, "learning_rate": 1e-05, "loss": 1.002, "step": 49390 }, { "epoch": 43.7511071744907, "grad_norm": 0.2856042683124542, "learning_rate": 1e-05, "loss": 0.9769, "step": 49395 }, { "epoch": 43.7555358724535, "grad_norm": 0.21818546950817108, "learning_rate": 1e-05, "loss": 0.9901, "step": 49400 }, { "epoch": 43.7599645704163, "grad_norm": 0.22605416178703308, "learning_rate": 1e-05, "loss": 0.9612, "step": 49405 }, { "epoch": 43.764393268379095, "grad_norm": 0.24196910858154297, "learning_rate": 1e-05, "loss": 0.9565, "step": 49410 }, { "epoch": 43.768821966341896, "grad_norm": 0.27956026792526245, "learning_rate": 1e-05, "loss": 0.9848, "step": 49415 }, { "epoch": 43.7732506643047, "grad_norm": 0.23798374831676483, "learning_rate": 1e-05, "loss": 0.9917, "step": 49420 }, { "epoch": 43.77767936226749, "grad_norm": 0.24072261154651642, "learning_rate": 1e-05, "loss": 0.9784, "step": 49425 }, { "epoch": 43.78210806023029, "grad_norm": 0.19725191593170166, "learning_rate": 1e-05, "loss": 0.9934, "step": 49430 }, { "epoch": 43.786536758193094, "grad_norm": 0.24050062894821167, "learning_rate": 1e-05, "loss": 0.9799, "step": 49435 }, { "epoch": 43.79096545615589, "grad_norm": 0.2125917375087738, "learning_rate": 1e-05, "loss": 0.9696, "step": 49440 }, { "epoch": 43.79539415411869, "grad_norm": 0.25900572538375854, "learning_rate": 1e-05, "loss": 0.9793, "step": 49445 }, { "epoch": 43.79982285208149, "grad_norm": 0.21378600597381592, "learning_rate": 1e-05, "loss": 1.0092, "step": 49450 }, { "epoch": 43.804251550044285, "grad_norm": 0.2513464689254761, "learning_rate": 1e-05, "loss": 0.9591, "step": 49455 }, { "epoch": 43.808680248007086, "grad_norm": 0.2141907960176468, "learning_rate": 1e-05, "loss": 0.9598, "step": 49460 }, { "epoch": 43.81310894596989, "grad_norm": 0.26176124811172485, "learning_rate": 1e-05, "loss": 0.9416, "step": 49465 }, { "epoch": 43.81753764393268, "grad_norm": 0.30373674631118774, "learning_rate": 1e-05, "loss": 0.9633, "step": 49470 }, { "epoch": 43.82196634189548, "grad_norm": 0.26641765236854553, "learning_rate": 1e-05, "loss": 0.9917, "step": 49475 }, { "epoch": 43.826395039858284, "grad_norm": 0.25141939520835876, "learning_rate": 1e-05, "loss": 0.9487, "step": 49480 }, { "epoch": 43.83082373782108, "grad_norm": 0.23124532401561737, "learning_rate": 1e-05, "loss": 0.9934, "step": 49485 }, { "epoch": 43.83525243578388, "grad_norm": 0.23579838871955872, "learning_rate": 1e-05, "loss": 0.9577, "step": 49490 }, { "epoch": 43.83968113374668, "grad_norm": 0.23973248898983002, "learning_rate": 1e-05, "loss": 1.002, "step": 49495 }, { "epoch": 43.844109831709474, "grad_norm": 0.22211840748786926, "learning_rate": 1e-05, "loss": 0.9784, "step": 49500 }, { "epoch": 43.848538529672275, "grad_norm": 0.24297572672367096, "learning_rate": 1e-05, "loss": 1.0078, "step": 49505 }, { "epoch": 43.85296722763508, "grad_norm": 0.22632776200771332, "learning_rate": 1e-05, "loss": 0.9899, "step": 49510 }, { "epoch": 43.85739592559787, "grad_norm": 0.24866943061351776, "learning_rate": 1e-05, "loss": 1.012, "step": 49515 }, { "epoch": 43.86182462356067, "grad_norm": 0.2901684641838074, "learning_rate": 1e-05, "loss": 0.9864, "step": 49520 }, { "epoch": 43.86625332152347, "grad_norm": 0.2154412716627121, "learning_rate": 1e-05, "loss": 1.0126, "step": 49525 }, { "epoch": 43.870682019486274, "grad_norm": 0.22443675994873047, "learning_rate": 1e-05, "loss": 0.9967, "step": 49530 }, { "epoch": 43.87511071744907, "grad_norm": 0.23283013701438904, "learning_rate": 1e-05, "loss": 1.0224, "step": 49535 }, { "epoch": 43.87953941541187, "grad_norm": 0.26056212186813354, "learning_rate": 1e-05, "loss": 0.9999, "step": 49540 }, { "epoch": 43.88396811337467, "grad_norm": 0.24146683514118195, "learning_rate": 1e-05, "loss": 0.9376, "step": 49545 }, { "epoch": 43.888396811337465, "grad_norm": 0.2114381194114685, "learning_rate": 1e-05, "loss": 0.9489, "step": 49550 }, { "epoch": 43.892825509300266, "grad_norm": 0.2068805694580078, "learning_rate": 1e-05, "loss": 0.9583, "step": 49555 }, { "epoch": 43.89725420726307, "grad_norm": 0.2212674766778946, "learning_rate": 1e-05, "loss": 0.9767, "step": 49560 }, { "epoch": 43.90168290522586, "grad_norm": 0.2440878301858902, "learning_rate": 1e-05, "loss": 0.9543, "step": 49565 }, { "epoch": 43.90611160318866, "grad_norm": 0.2579110562801361, "learning_rate": 1e-05, "loss": 0.9513, "step": 49570 }, { "epoch": 43.910540301151464, "grad_norm": 0.2541361153125763, "learning_rate": 1e-05, "loss": 1.0305, "step": 49575 }, { "epoch": 43.91496899911426, "grad_norm": 0.24004533886909485, "learning_rate": 1e-05, "loss": 0.9994, "step": 49580 }, { "epoch": 43.91939769707706, "grad_norm": 0.23401138186454773, "learning_rate": 1e-05, "loss": 0.9989, "step": 49585 }, { "epoch": 43.92382639503986, "grad_norm": 0.28666403889656067, "learning_rate": 1e-05, "loss": 0.9152, "step": 49590 }, { "epoch": 43.928255093002655, "grad_norm": 0.2494373917579651, "learning_rate": 1e-05, "loss": 0.9918, "step": 49595 }, { "epoch": 43.932683790965456, "grad_norm": 0.2411811649799347, "learning_rate": 1e-05, "loss": 0.9656, "step": 49600 }, { "epoch": 43.93711248892826, "grad_norm": 0.22462542355060577, "learning_rate": 1e-05, "loss": 0.9795, "step": 49605 }, { "epoch": 43.94154118689105, "grad_norm": 0.2938650846481323, "learning_rate": 1e-05, "loss": 0.9032, "step": 49610 }, { "epoch": 43.94596988485385, "grad_norm": 0.24739117920398712, "learning_rate": 1e-05, "loss": 0.9662, "step": 49615 }, { "epoch": 43.95039858281665, "grad_norm": 0.22504116594791412, "learning_rate": 1e-05, "loss": 0.9472, "step": 49620 }, { "epoch": 43.95482728077945, "grad_norm": 0.2561790943145752, "learning_rate": 1e-05, "loss": 0.9857, "step": 49625 }, { "epoch": 43.95925597874225, "grad_norm": 0.22255125641822815, "learning_rate": 1e-05, "loss": 0.9883, "step": 49630 }, { "epoch": 43.96368467670505, "grad_norm": 0.21574559807777405, "learning_rate": 1e-05, "loss": 1.0109, "step": 49635 }, { "epoch": 43.96811337466785, "grad_norm": 0.2735796272754669, "learning_rate": 1e-05, "loss": 0.9532, "step": 49640 }, { "epoch": 43.972542072630645, "grad_norm": 0.24575546383857727, "learning_rate": 1e-05, "loss": 0.9881, "step": 49645 }, { "epoch": 43.97697077059345, "grad_norm": 0.2672165632247925, "learning_rate": 1e-05, "loss": 0.9237, "step": 49650 }, { "epoch": 43.98139946855625, "grad_norm": 0.3126530051231384, "learning_rate": 1e-05, "loss": 0.9304, "step": 49655 }, { "epoch": 43.98582816651904, "grad_norm": 0.2688734829425812, "learning_rate": 1e-05, "loss": 0.9867, "step": 49660 }, { "epoch": 43.99025686448184, "grad_norm": 0.2695014774799347, "learning_rate": 1e-05, "loss": 0.957, "step": 49665 }, { "epoch": 43.994685562444644, "grad_norm": 0.23811259865760803, "learning_rate": 1e-05, "loss": 0.9637, "step": 49670 }, { "epoch": 43.99911426040744, "grad_norm": 0.2175956815481186, "learning_rate": 1e-05, "loss": 1.0484, "step": 49675 }, { "epoch": 44.00354295837024, "grad_norm": 0.2453879415988922, "learning_rate": 1e-05, "loss": 1.0154, "step": 49680 }, { "epoch": 44.00797165633304, "grad_norm": 0.2216404676437378, "learning_rate": 1e-05, "loss": 0.9916, "step": 49685 }, { "epoch": 44.012400354295835, "grad_norm": 0.25337862968444824, "learning_rate": 1e-05, "loss": 1.0044, "step": 49690 }, { "epoch": 44.016829052258636, "grad_norm": 0.2043168991804123, "learning_rate": 1e-05, "loss": 0.9828, "step": 49695 }, { "epoch": 44.02125775022144, "grad_norm": 0.24075400829315186, "learning_rate": 1e-05, "loss": 0.9322, "step": 49700 }, { "epoch": 44.02568644818423, "grad_norm": 0.2637993097305298, "learning_rate": 1e-05, "loss": 0.9988, "step": 49705 }, { "epoch": 44.03011514614703, "grad_norm": 0.2577388882637024, "learning_rate": 1e-05, "loss": 0.9996, "step": 49710 }, { "epoch": 44.034543844109834, "grad_norm": 0.2601379454135895, "learning_rate": 1e-05, "loss": 0.9498, "step": 49715 }, { "epoch": 44.03897254207263, "grad_norm": 0.3299559950828552, "learning_rate": 1e-05, "loss": 0.985, "step": 49720 }, { "epoch": 44.04340124003543, "grad_norm": 0.29679402709007263, "learning_rate": 1e-05, "loss": 0.9529, "step": 49725 }, { "epoch": 44.04782993799823, "grad_norm": 0.2444685995578766, "learning_rate": 1e-05, "loss": 0.9649, "step": 49730 }, { "epoch": 44.052258635961024, "grad_norm": 0.2680133283138275, "learning_rate": 1e-05, "loss": 0.9999, "step": 49735 }, { "epoch": 44.056687333923826, "grad_norm": 0.23599475622177124, "learning_rate": 1e-05, "loss": 0.9543, "step": 49740 }, { "epoch": 44.06111603188663, "grad_norm": 0.2521182894706726, "learning_rate": 1e-05, "loss": 0.9752, "step": 49745 }, { "epoch": 44.06554472984942, "grad_norm": 0.2508750855922699, "learning_rate": 1e-05, "loss": 0.9722, "step": 49750 }, { "epoch": 44.06997342781222, "grad_norm": 0.2555854916572571, "learning_rate": 1e-05, "loss": 0.9269, "step": 49755 }, { "epoch": 44.07440212577502, "grad_norm": 0.22437188029289246, "learning_rate": 1e-05, "loss": 0.9523, "step": 49760 }, { "epoch": 44.078830823737825, "grad_norm": 0.22609266638755798, "learning_rate": 1e-05, "loss": 0.9757, "step": 49765 }, { "epoch": 44.08325952170062, "grad_norm": 0.21780064702033997, "learning_rate": 1e-05, "loss": 1.0021, "step": 49770 }, { "epoch": 44.08768821966342, "grad_norm": 0.23511624336242676, "learning_rate": 1e-05, "loss": 0.9837, "step": 49775 }, { "epoch": 44.09211691762622, "grad_norm": 0.355492502450943, "learning_rate": 1e-05, "loss": 0.9527, "step": 49780 }, { "epoch": 44.096545615589015, "grad_norm": 0.3028511703014374, "learning_rate": 1e-05, "loss": 0.96, "step": 49785 }, { "epoch": 44.100974313551816, "grad_norm": 0.35851171612739563, "learning_rate": 1e-05, "loss": 0.9797, "step": 49790 }, { "epoch": 44.10540301151462, "grad_norm": 0.25587978959083557, "learning_rate": 1e-05, "loss": 1.0084, "step": 49795 }, { "epoch": 44.10983170947741, "grad_norm": 0.23267780244350433, "learning_rate": 1e-05, "loss": 0.9898, "step": 49800 }, { "epoch": 44.11426040744021, "grad_norm": 0.2884264886379242, "learning_rate": 1e-05, "loss": 1.0532, "step": 49805 }, { "epoch": 44.118689105403014, "grad_norm": 0.23949430882930756, "learning_rate": 1e-05, "loss": 0.9448, "step": 49810 }, { "epoch": 44.12311780336581, "grad_norm": 0.23606997728347778, "learning_rate": 1e-05, "loss": 1.0379, "step": 49815 }, { "epoch": 44.12754650132861, "grad_norm": 0.2520465552806854, "learning_rate": 1e-05, "loss": 0.9789, "step": 49820 }, { "epoch": 44.13197519929141, "grad_norm": 0.2147519588470459, "learning_rate": 1e-05, "loss": 0.9857, "step": 49825 }, { "epoch": 44.136403897254205, "grad_norm": 0.2593078911304474, "learning_rate": 1e-05, "loss": 1.0028, "step": 49830 }, { "epoch": 44.140832595217006, "grad_norm": 0.28337571024894714, "learning_rate": 1e-05, "loss": 0.9661, "step": 49835 }, { "epoch": 44.14526129317981, "grad_norm": 0.2923653721809387, "learning_rate": 1e-05, "loss": 0.9474, "step": 49840 }, { "epoch": 44.1496899911426, "grad_norm": 0.2859342396259308, "learning_rate": 1e-05, "loss": 0.9637, "step": 49845 }, { "epoch": 44.1541186891054, "grad_norm": 0.23269562423229218, "learning_rate": 1e-05, "loss": 0.9582, "step": 49850 }, { "epoch": 44.158547387068204, "grad_norm": 0.3974922299385071, "learning_rate": 1e-05, "loss": 1.0306, "step": 49855 }, { "epoch": 44.162976085031, "grad_norm": 0.2668763995170593, "learning_rate": 1e-05, "loss": 1.0092, "step": 49860 }, { "epoch": 44.1674047829938, "grad_norm": 0.23206990957260132, "learning_rate": 1e-05, "loss": 1.0052, "step": 49865 }, { "epoch": 44.1718334809566, "grad_norm": 0.2509148418903351, "learning_rate": 1e-05, "loss": 0.9878, "step": 49870 }, { "epoch": 44.176262178919394, "grad_norm": 0.2504391670227051, "learning_rate": 1e-05, "loss": 0.9421, "step": 49875 }, { "epoch": 44.180690876882196, "grad_norm": 0.2371387928724289, "learning_rate": 1e-05, "loss": 1.0103, "step": 49880 }, { "epoch": 44.185119574845, "grad_norm": 0.2349540740251541, "learning_rate": 1e-05, "loss": 0.9481, "step": 49885 }, { "epoch": 44.1895482728078, "grad_norm": 0.2520585060119629, "learning_rate": 1e-05, "loss": 0.9253, "step": 49890 }, { "epoch": 44.19397697077059, "grad_norm": 0.290661096572876, "learning_rate": 1e-05, "loss": 0.9355, "step": 49895 }, { "epoch": 44.19840566873339, "grad_norm": 0.26891621947288513, "learning_rate": 1e-05, "loss": 0.9396, "step": 49900 }, { "epoch": 44.202834366696194, "grad_norm": 0.23947502672672272, "learning_rate": 1e-05, "loss": 1.0063, "step": 49905 }, { "epoch": 44.20726306465899, "grad_norm": 0.24221175909042358, "learning_rate": 1e-05, "loss": 1.0136, "step": 49910 }, { "epoch": 44.21169176262179, "grad_norm": 0.28259408473968506, "learning_rate": 1e-05, "loss": 0.9661, "step": 49915 }, { "epoch": 44.21612046058459, "grad_norm": 0.25452733039855957, "learning_rate": 1e-05, "loss": 1.0169, "step": 49920 }, { "epoch": 44.220549158547385, "grad_norm": 0.22958946228027344, "learning_rate": 1e-05, "loss": 0.9688, "step": 49925 }, { "epoch": 44.224977856510186, "grad_norm": 0.23510897159576416, "learning_rate": 1e-05, "loss": 0.9887, "step": 49930 }, { "epoch": 44.22940655447299, "grad_norm": 0.250924289226532, "learning_rate": 1e-05, "loss": 0.9775, "step": 49935 }, { "epoch": 44.23383525243578, "grad_norm": 0.2388603538274765, "learning_rate": 1e-05, "loss": 0.9832, "step": 49940 }, { "epoch": 44.23826395039858, "grad_norm": 0.2535986602306366, "learning_rate": 1e-05, "loss": 0.9692, "step": 49945 }, { "epoch": 44.242692648361384, "grad_norm": 0.2537343502044678, "learning_rate": 1e-05, "loss": 1.0212, "step": 49950 }, { "epoch": 44.24712134632418, "grad_norm": 0.22179286181926727, "learning_rate": 1e-05, "loss": 0.9915, "step": 49955 }, { "epoch": 44.25155004428698, "grad_norm": 0.23771311342716217, "learning_rate": 1e-05, "loss": 0.996, "step": 49960 }, { "epoch": 44.25597874224978, "grad_norm": 0.2930850088596344, "learning_rate": 1e-05, "loss": 0.9826, "step": 49965 }, { "epoch": 44.260407440212575, "grad_norm": 0.22962325811386108, "learning_rate": 1e-05, "loss": 0.9829, "step": 49970 }, { "epoch": 44.264836138175376, "grad_norm": 0.2151719182729721, "learning_rate": 1e-05, "loss": 1.0045, "step": 49975 }, { "epoch": 44.26926483613818, "grad_norm": 0.2562272846698761, "learning_rate": 1e-05, "loss": 0.9916, "step": 49980 }, { "epoch": 44.27369353410097, "grad_norm": 0.2469555139541626, "learning_rate": 1e-05, "loss": 0.9538, "step": 49985 }, { "epoch": 44.27812223206377, "grad_norm": 0.23190981149673462, "learning_rate": 1e-05, "loss": 0.9993, "step": 49990 }, { "epoch": 44.282550930026574, "grad_norm": 0.2526366710662842, "learning_rate": 1e-05, "loss": 0.9924, "step": 49995 }, { "epoch": 44.28697962798937, "grad_norm": 0.2519548535346985, "learning_rate": 1e-05, "loss": 0.9485, "step": 50000 }, { "epoch": 44.29140832595217, "grad_norm": 0.2310408651828766, "learning_rate": 1e-05, "loss": 0.9818, "step": 50005 }, { "epoch": 44.29583702391497, "grad_norm": 0.24374184012413025, "learning_rate": 1e-05, "loss": 0.9754, "step": 50010 }, { "epoch": 44.30026572187777, "grad_norm": 0.20592452585697174, "learning_rate": 1e-05, "loss": 0.9589, "step": 50015 }, { "epoch": 44.304694419840565, "grad_norm": 0.23045875132083893, "learning_rate": 1e-05, "loss": 0.9745, "step": 50020 }, { "epoch": 44.30912311780337, "grad_norm": 0.23315216600894928, "learning_rate": 1e-05, "loss": 0.9638, "step": 50025 }, { "epoch": 44.31355181576617, "grad_norm": 0.23353172838687897, "learning_rate": 1e-05, "loss": 0.9895, "step": 50030 }, { "epoch": 44.31798051372896, "grad_norm": 0.2239086925983429, "learning_rate": 1e-05, "loss": 0.9817, "step": 50035 }, { "epoch": 44.32240921169176, "grad_norm": 0.23434504866600037, "learning_rate": 1e-05, "loss": 1.0056, "step": 50040 }, { "epoch": 44.326837909654564, "grad_norm": 0.25160253047943115, "learning_rate": 1e-05, "loss": 0.9704, "step": 50045 }, { "epoch": 44.33126660761736, "grad_norm": 0.22875922918319702, "learning_rate": 1e-05, "loss": 1.0105, "step": 50050 }, { "epoch": 44.33569530558016, "grad_norm": 0.2536490857601166, "learning_rate": 1e-05, "loss": 0.9644, "step": 50055 }, { "epoch": 44.34012400354296, "grad_norm": 0.25222399830818176, "learning_rate": 1e-05, "loss": 0.9829, "step": 50060 }, { "epoch": 44.344552701505755, "grad_norm": 0.24515140056610107, "learning_rate": 1e-05, "loss": 1.0121, "step": 50065 }, { "epoch": 44.348981399468556, "grad_norm": 0.20229072868824005, "learning_rate": 1e-05, "loss": 0.9951, "step": 50070 }, { "epoch": 44.35341009743136, "grad_norm": 0.22831720113754272, "learning_rate": 1e-05, "loss": 0.9443, "step": 50075 }, { "epoch": 44.35783879539415, "grad_norm": 0.25126734375953674, "learning_rate": 1e-05, "loss": 1.0495, "step": 50080 }, { "epoch": 44.36226749335695, "grad_norm": 0.24788789451122284, "learning_rate": 1e-05, "loss": 0.9868, "step": 50085 }, { "epoch": 44.366696191319754, "grad_norm": 0.23605138063430786, "learning_rate": 1e-05, "loss": 0.9948, "step": 50090 }, { "epoch": 44.37112488928255, "grad_norm": 0.31426650285720825, "learning_rate": 1e-05, "loss": 0.9221, "step": 50095 }, { "epoch": 44.37555358724535, "grad_norm": 0.22368678450584412, "learning_rate": 1e-05, "loss": 0.939, "step": 50100 }, { "epoch": 44.37998228520815, "grad_norm": 0.23569771647453308, "learning_rate": 1e-05, "loss": 1.016, "step": 50105 }, { "epoch": 44.384410983170945, "grad_norm": 0.3110570013523102, "learning_rate": 1e-05, "loss": 1.0301, "step": 50110 }, { "epoch": 44.388839681133746, "grad_norm": 0.25935137271881104, "learning_rate": 1e-05, "loss": 1.0228, "step": 50115 }, { "epoch": 44.39326837909655, "grad_norm": 0.28817296028137207, "learning_rate": 1e-05, "loss": 0.9634, "step": 50120 }, { "epoch": 44.39769707705934, "grad_norm": 0.28363922238349915, "learning_rate": 1e-05, "loss": 0.9766, "step": 50125 }, { "epoch": 44.40212577502214, "grad_norm": 0.2399965524673462, "learning_rate": 1e-05, "loss": 0.9604, "step": 50130 }, { "epoch": 44.40655447298494, "grad_norm": 0.21465909481048584, "learning_rate": 1e-05, "loss": 0.9294, "step": 50135 }, { "epoch": 44.410983170947745, "grad_norm": 0.22371327877044678, "learning_rate": 1e-05, "loss": 0.9284, "step": 50140 }, { "epoch": 44.41541186891054, "grad_norm": 0.22318057715892792, "learning_rate": 1e-05, "loss": 1.0019, "step": 50145 }, { "epoch": 44.41984056687334, "grad_norm": 0.22202415764331818, "learning_rate": 1e-05, "loss": 0.974, "step": 50150 }, { "epoch": 44.42426926483614, "grad_norm": 0.24073345959186554, "learning_rate": 1e-05, "loss": 1.0188, "step": 50155 }, { "epoch": 44.428697962798935, "grad_norm": 0.2259293496608734, "learning_rate": 1e-05, "loss": 0.9898, "step": 50160 }, { "epoch": 44.43312666076174, "grad_norm": 0.21760140359401703, "learning_rate": 1e-05, "loss": 0.9504, "step": 50165 }, { "epoch": 44.43755535872454, "grad_norm": 0.22334861755371094, "learning_rate": 1e-05, "loss": 0.9922, "step": 50170 }, { "epoch": 44.44198405668733, "grad_norm": 0.26698681712150574, "learning_rate": 1e-05, "loss": 0.8922, "step": 50175 }, { "epoch": 44.44641275465013, "grad_norm": 0.24635346233844757, "learning_rate": 1e-05, "loss": 0.9682, "step": 50180 }, { "epoch": 44.450841452612934, "grad_norm": 0.251383513212204, "learning_rate": 1e-05, "loss": 0.92, "step": 50185 }, { "epoch": 44.45527015057573, "grad_norm": 0.28668734431266785, "learning_rate": 1e-05, "loss": 0.943, "step": 50190 }, { "epoch": 44.45969884853853, "grad_norm": 0.22254236042499542, "learning_rate": 1e-05, "loss": 0.9877, "step": 50195 }, { "epoch": 44.46412754650133, "grad_norm": 0.21074004471302032, "learning_rate": 1e-05, "loss": 1.0112, "step": 50200 }, { "epoch": 44.468556244464125, "grad_norm": 0.26638713479042053, "learning_rate": 1e-05, "loss": 0.9357, "step": 50205 }, { "epoch": 44.472984942426926, "grad_norm": 0.2354864925146103, "learning_rate": 1e-05, "loss": 0.9932, "step": 50210 }, { "epoch": 44.47741364038973, "grad_norm": 0.22966063022613525, "learning_rate": 1e-05, "loss": 0.9787, "step": 50215 }, { "epoch": 44.48184233835252, "grad_norm": 0.27157625555992126, "learning_rate": 1e-05, "loss": 0.9479, "step": 50220 }, { "epoch": 44.48627103631532, "grad_norm": 0.23083417117595673, "learning_rate": 1e-05, "loss": 0.9476, "step": 50225 }, { "epoch": 44.490699734278124, "grad_norm": 0.22872625291347504, "learning_rate": 1e-05, "loss": 1.0007, "step": 50230 }, { "epoch": 44.49512843224092, "grad_norm": 0.23940850794315338, "learning_rate": 1e-05, "loss": 0.9912, "step": 50235 }, { "epoch": 44.49955713020372, "grad_norm": 0.2548903226852417, "learning_rate": 1e-05, "loss": 0.9689, "step": 50240 }, { "epoch": 44.50398582816652, "grad_norm": 0.268465131521225, "learning_rate": 1e-05, "loss": 0.997, "step": 50245 }, { "epoch": 44.508414526129314, "grad_norm": 0.25344371795654297, "learning_rate": 1e-05, "loss": 0.964, "step": 50250 }, { "epoch": 44.512843224092116, "grad_norm": 0.20049132406711578, "learning_rate": 1e-05, "loss": 0.9863, "step": 50255 }, { "epoch": 44.51727192205492, "grad_norm": 0.2764725685119629, "learning_rate": 1e-05, "loss": 0.9899, "step": 50260 }, { "epoch": 44.52170062001772, "grad_norm": 0.23784217238426208, "learning_rate": 1e-05, "loss": 0.9865, "step": 50265 }, { "epoch": 44.52612931798051, "grad_norm": 0.29805687069892883, "learning_rate": 1e-05, "loss": 1.0211, "step": 50270 }, { "epoch": 44.53055801594331, "grad_norm": 0.3250724673271179, "learning_rate": 1e-05, "loss": 0.9917, "step": 50275 }, { "epoch": 44.534986713906115, "grad_norm": 0.23270177841186523, "learning_rate": 1e-05, "loss": 0.9756, "step": 50280 }, { "epoch": 44.53941541186891, "grad_norm": 0.2521251142024994, "learning_rate": 1e-05, "loss": 0.9924, "step": 50285 }, { "epoch": 44.54384410983171, "grad_norm": 0.25402703881263733, "learning_rate": 1e-05, "loss": 1.0151, "step": 50290 }, { "epoch": 44.54827280779451, "grad_norm": 0.26695531606674194, "learning_rate": 1e-05, "loss": 0.9714, "step": 50295 }, { "epoch": 44.552701505757305, "grad_norm": 0.2517922520637512, "learning_rate": 1e-05, "loss": 0.9928, "step": 50300 }, { "epoch": 44.55713020372011, "grad_norm": 0.21802237629890442, "learning_rate": 1e-05, "loss": 0.9458, "step": 50305 }, { "epoch": 44.56155890168291, "grad_norm": 0.20943692326545715, "learning_rate": 1e-05, "loss": 1.0037, "step": 50310 }, { "epoch": 44.5659875996457, "grad_norm": 0.22353410720825195, "learning_rate": 1e-05, "loss": 0.991, "step": 50315 }, { "epoch": 44.5704162976085, "grad_norm": 0.248456671833992, "learning_rate": 1e-05, "loss": 0.9884, "step": 50320 }, { "epoch": 44.574844995571304, "grad_norm": 0.2801806330680847, "learning_rate": 1e-05, "loss": 0.9969, "step": 50325 }, { "epoch": 44.5792736935341, "grad_norm": 0.28935131430625916, "learning_rate": 1e-05, "loss": 0.9777, "step": 50330 }, { "epoch": 44.5837023914969, "grad_norm": 0.20283353328704834, "learning_rate": 1e-05, "loss": 0.9646, "step": 50335 }, { "epoch": 44.5881310894597, "grad_norm": 0.22326818108558655, "learning_rate": 1e-05, "loss": 0.9764, "step": 50340 }, { "epoch": 44.592559787422495, "grad_norm": 0.2251996397972107, "learning_rate": 1e-05, "loss": 0.9646, "step": 50345 }, { "epoch": 44.596988485385296, "grad_norm": 0.23880022764205933, "learning_rate": 1e-05, "loss": 0.9425, "step": 50350 }, { "epoch": 44.6014171833481, "grad_norm": 0.29937225580215454, "learning_rate": 1e-05, "loss": 0.9706, "step": 50355 }, { "epoch": 44.60584588131089, "grad_norm": 0.2504315972328186, "learning_rate": 1e-05, "loss": 1.001, "step": 50360 }, { "epoch": 44.61027457927369, "grad_norm": 0.2714006006717682, "learning_rate": 1e-05, "loss": 0.9527, "step": 50365 }, { "epoch": 44.614703277236494, "grad_norm": 0.2577667534351349, "learning_rate": 1e-05, "loss": 0.9638, "step": 50370 }, { "epoch": 44.619131975199295, "grad_norm": 0.2723548114299774, "learning_rate": 1e-05, "loss": 0.9503, "step": 50375 }, { "epoch": 44.62356067316209, "grad_norm": 0.22927972674369812, "learning_rate": 1e-05, "loss": 1.0186, "step": 50380 }, { "epoch": 44.62798937112489, "grad_norm": 0.2656670808792114, "learning_rate": 1e-05, "loss": 0.9452, "step": 50385 }, { "epoch": 44.63241806908769, "grad_norm": 0.24565164744853973, "learning_rate": 1e-05, "loss": 0.9391, "step": 50390 }, { "epoch": 44.636846767050486, "grad_norm": 0.23352958261966705, "learning_rate": 1e-05, "loss": 0.9687, "step": 50395 }, { "epoch": 44.64127546501329, "grad_norm": 0.24695728719234467, "learning_rate": 1e-05, "loss": 0.9937, "step": 50400 }, { "epoch": 44.64570416297609, "grad_norm": 0.24550925195217133, "learning_rate": 1e-05, "loss": 0.9845, "step": 50405 }, { "epoch": 44.65013286093888, "grad_norm": 0.24056026339530945, "learning_rate": 1e-05, "loss": 0.9786, "step": 50410 }, { "epoch": 44.65456155890168, "grad_norm": 0.23579323291778564, "learning_rate": 1e-05, "loss": 0.9607, "step": 50415 }, { "epoch": 44.658990256864485, "grad_norm": 0.2570098638534546, "learning_rate": 1e-05, "loss": 1.0079, "step": 50420 }, { "epoch": 44.66341895482728, "grad_norm": 0.22641190886497498, "learning_rate": 1e-05, "loss": 0.9494, "step": 50425 }, { "epoch": 44.66784765279008, "grad_norm": 0.23759019374847412, "learning_rate": 1e-05, "loss": 0.882, "step": 50430 }, { "epoch": 44.67227635075288, "grad_norm": 0.2774142622947693, "learning_rate": 1e-05, "loss": 0.9689, "step": 50435 }, { "epoch": 44.676705048715675, "grad_norm": 0.23752395808696747, "learning_rate": 1e-05, "loss": 0.9412, "step": 50440 }, { "epoch": 44.681133746678476, "grad_norm": 0.23774580657482147, "learning_rate": 1e-05, "loss": 0.9602, "step": 50445 }, { "epoch": 44.68556244464128, "grad_norm": 0.2331254482269287, "learning_rate": 1e-05, "loss": 1.0198, "step": 50450 }, { "epoch": 44.68999114260407, "grad_norm": 0.23788121342658997, "learning_rate": 1e-05, "loss": 0.9611, "step": 50455 }, { "epoch": 44.69441984056687, "grad_norm": 0.2610919177532196, "learning_rate": 1e-05, "loss": 0.9828, "step": 50460 }, { "epoch": 44.698848538529674, "grad_norm": 0.24243460595607758, "learning_rate": 1e-05, "loss": 0.9794, "step": 50465 }, { "epoch": 44.70327723649247, "grad_norm": 0.25183090567588806, "learning_rate": 1e-05, "loss": 0.9492, "step": 50470 }, { "epoch": 44.70770593445527, "grad_norm": 0.2365809977054596, "learning_rate": 1e-05, "loss": 0.9368, "step": 50475 }, { "epoch": 44.71213463241807, "grad_norm": 0.3140511214733124, "learning_rate": 1e-05, "loss": 0.9497, "step": 50480 }, { "epoch": 44.716563330380865, "grad_norm": 0.19660690426826477, "learning_rate": 1e-05, "loss": 0.9419, "step": 50485 }, { "epoch": 44.720992028343666, "grad_norm": 0.21523156762123108, "learning_rate": 1e-05, "loss": 0.9705, "step": 50490 }, { "epoch": 44.72542072630647, "grad_norm": 0.23465435206890106, "learning_rate": 1e-05, "loss": 0.9516, "step": 50495 }, { "epoch": 44.72984942426926, "grad_norm": 0.2012229710817337, "learning_rate": 1e-05, "loss": 0.9805, "step": 50500 }, { "epoch": 44.73427812223206, "grad_norm": 0.2181909829378128, "learning_rate": 1e-05, "loss": 0.9994, "step": 50505 }, { "epoch": 44.738706820194864, "grad_norm": 0.2760373651981354, "learning_rate": 1e-05, "loss": 0.9992, "step": 50510 }, { "epoch": 44.743135518157665, "grad_norm": 0.2988514006137848, "learning_rate": 1e-05, "loss": 0.9707, "step": 50515 }, { "epoch": 44.74756421612046, "grad_norm": 0.23796944320201874, "learning_rate": 1e-05, "loss": 0.9073, "step": 50520 }, { "epoch": 44.75199291408326, "grad_norm": 0.23518982529640198, "learning_rate": 1e-05, "loss": 1.0174, "step": 50525 }, { "epoch": 44.75642161204606, "grad_norm": 0.2623854875564575, "learning_rate": 1e-05, "loss": 0.9711, "step": 50530 }, { "epoch": 44.760850310008856, "grad_norm": 0.2675706148147583, "learning_rate": 1e-05, "loss": 0.954, "step": 50535 }, { "epoch": 44.76527900797166, "grad_norm": 0.27577558159828186, "learning_rate": 1e-05, "loss": 0.9886, "step": 50540 }, { "epoch": 44.76970770593446, "grad_norm": 0.26294589042663574, "learning_rate": 1e-05, "loss": 1.0168, "step": 50545 }, { "epoch": 44.77413640389725, "grad_norm": 0.2526409924030304, "learning_rate": 1e-05, "loss": 0.9121, "step": 50550 }, { "epoch": 44.77856510186005, "grad_norm": 0.30389928817749023, "learning_rate": 1e-05, "loss": 0.989, "step": 50555 }, { "epoch": 44.782993799822854, "grad_norm": 0.2704399824142456, "learning_rate": 1e-05, "loss": 0.9725, "step": 50560 }, { "epoch": 44.78742249778565, "grad_norm": 0.2622881233692169, "learning_rate": 1e-05, "loss": 0.9472, "step": 50565 }, { "epoch": 44.79185119574845, "grad_norm": 0.27411016821861267, "learning_rate": 1e-05, "loss": 0.9957, "step": 50570 }, { "epoch": 44.79627989371125, "grad_norm": 0.24862916767597198, "learning_rate": 1e-05, "loss": 0.951, "step": 50575 }, { "epoch": 44.800708591674045, "grad_norm": 0.2665644586086273, "learning_rate": 1e-05, "loss": 0.9687, "step": 50580 }, { "epoch": 44.805137289636846, "grad_norm": 0.24571362137794495, "learning_rate": 1e-05, "loss": 0.9964, "step": 50585 }, { "epoch": 44.80956598759965, "grad_norm": 0.23265193402767181, "learning_rate": 1e-05, "loss": 0.998, "step": 50590 }, { "epoch": 44.81399468556244, "grad_norm": 0.2324245721101761, "learning_rate": 1e-05, "loss": 0.929, "step": 50595 }, { "epoch": 44.81842338352524, "grad_norm": 0.22979331016540527, "learning_rate": 1e-05, "loss": 0.9438, "step": 50600 }, { "epoch": 44.822852081488044, "grad_norm": 0.21760888397693634, "learning_rate": 1e-05, "loss": 0.9403, "step": 50605 }, { "epoch": 44.82728077945084, "grad_norm": 0.23312786221504211, "learning_rate": 1e-05, "loss": 0.9617, "step": 50610 }, { "epoch": 44.83170947741364, "grad_norm": 0.2248609960079193, "learning_rate": 1e-05, "loss": 1.0563, "step": 50615 }, { "epoch": 44.83613817537644, "grad_norm": 0.25403907895088196, "learning_rate": 1e-05, "loss": 0.9518, "step": 50620 }, { "epoch": 44.84056687333924, "grad_norm": 0.18929818272590637, "learning_rate": 1e-05, "loss": 0.9489, "step": 50625 }, { "epoch": 44.844995571302036, "grad_norm": 0.22148524224758148, "learning_rate": 1e-05, "loss": 0.9741, "step": 50630 }, { "epoch": 44.84942426926484, "grad_norm": 0.2323078215122223, "learning_rate": 1e-05, "loss": 0.9751, "step": 50635 }, { "epoch": 44.85385296722764, "grad_norm": 0.250608891248703, "learning_rate": 1e-05, "loss": 0.9759, "step": 50640 }, { "epoch": 44.85828166519043, "grad_norm": 0.2476261556148529, "learning_rate": 1e-05, "loss": 0.9564, "step": 50645 }, { "epoch": 44.862710363153234, "grad_norm": 0.27119264006614685, "learning_rate": 1e-05, "loss": 0.993, "step": 50650 }, { "epoch": 44.867139061116035, "grad_norm": 0.24649271368980408, "learning_rate": 1e-05, "loss": 0.9385, "step": 50655 }, { "epoch": 44.87156775907883, "grad_norm": 0.26250141859054565, "learning_rate": 1e-05, "loss": 1.023, "step": 50660 }, { "epoch": 44.87599645704163, "grad_norm": 0.2345227748155594, "learning_rate": 1e-05, "loss": 0.915, "step": 50665 }, { "epoch": 44.88042515500443, "grad_norm": 0.2239430844783783, "learning_rate": 1e-05, "loss": 1.0138, "step": 50670 }, { "epoch": 44.884853852967225, "grad_norm": 0.22486598789691925, "learning_rate": 1e-05, "loss": 1.0295, "step": 50675 }, { "epoch": 44.88928255093003, "grad_norm": 0.2772603929042816, "learning_rate": 1e-05, "loss": 0.997, "step": 50680 }, { "epoch": 44.89371124889283, "grad_norm": 0.2388048619031906, "learning_rate": 1e-05, "loss": 0.9519, "step": 50685 }, { "epoch": 44.89813994685562, "grad_norm": 0.2416672706604004, "learning_rate": 1e-05, "loss": 0.9889, "step": 50690 }, { "epoch": 44.90256864481842, "grad_norm": 0.21063125133514404, "learning_rate": 1e-05, "loss": 0.9967, "step": 50695 }, { "epoch": 44.906997342781224, "grad_norm": 0.24858325719833374, "learning_rate": 1e-05, "loss": 0.9686, "step": 50700 }, { "epoch": 44.91142604074402, "grad_norm": 0.2745394706726074, "learning_rate": 1e-05, "loss": 0.9485, "step": 50705 }, { "epoch": 44.91585473870682, "grad_norm": 0.2880264222621918, "learning_rate": 1e-05, "loss": 0.9698, "step": 50710 }, { "epoch": 44.92028343666962, "grad_norm": 0.22836659848690033, "learning_rate": 1e-05, "loss": 0.9537, "step": 50715 }, { "epoch": 44.924712134632415, "grad_norm": 0.22982192039489746, "learning_rate": 1e-05, "loss": 0.8998, "step": 50720 }, { "epoch": 44.929140832595216, "grad_norm": 0.2595456540584564, "learning_rate": 1e-05, "loss": 0.9597, "step": 50725 }, { "epoch": 44.93356953055802, "grad_norm": 0.2588038444519043, "learning_rate": 1e-05, "loss": 1.0001, "step": 50730 }, { "epoch": 44.93799822852081, "grad_norm": 0.2293577641248703, "learning_rate": 1e-05, "loss": 1.0079, "step": 50735 }, { "epoch": 44.94242692648361, "grad_norm": 0.2277379035949707, "learning_rate": 1e-05, "loss": 1.0167, "step": 50740 }, { "epoch": 44.946855624446414, "grad_norm": 0.22213824093341827, "learning_rate": 1e-05, "loss": 0.9555, "step": 50745 }, { "epoch": 44.951284322409215, "grad_norm": 0.24956080317497253, "learning_rate": 1e-05, "loss": 0.9833, "step": 50750 }, { "epoch": 44.95571302037201, "grad_norm": 0.22188463807106018, "learning_rate": 1e-05, "loss": 0.937, "step": 50755 }, { "epoch": 44.96014171833481, "grad_norm": 0.22646038234233856, "learning_rate": 1e-05, "loss": 0.9716, "step": 50760 }, { "epoch": 44.96457041629761, "grad_norm": 0.20522108674049377, "learning_rate": 1e-05, "loss": 0.9608, "step": 50765 }, { "epoch": 44.968999114260406, "grad_norm": 0.2669883668422699, "learning_rate": 1e-05, "loss": 0.9664, "step": 50770 }, { "epoch": 44.97342781222321, "grad_norm": 0.24617964029312134, "learning_rate": 1e-05, "loss": 0.9792, "step": 50775 }, { "epoch": 44.97785651018601, "grad_norm": 0.20243538916110992, "learning_rate": 1e-05, "loss": 0.9288, "step": 50780 }, { "epoch": 44.9822852081488, "grad_norm": 0.22546884417533875, "learning_rate": 1e-05, "loss": 0.9922, "step": 50785 }, { "epoch": 44.9867139061116, "grad_norm": 0.21208958327770233, "learning_rate": 1e-05, "loss": 0.9474, "step": 50790 }, { "epoch": 44.991142604074405, "grad_norm": 0.20937855541706085, "learning_rate": 1e-05, "loss": 0.9963, "step": 50795 }, { "epoch": 44.9955713020372, "grad_norm": 0.1977820247411728, "learning_rate": 1e-05, "loss": 0.9956, "step": 50800 }, { "epoch": 45.0, "grad_norm": 0.22402580082416534, "learning_rate": 1e-05, "loss": 0.9591, "step": 50805 }, { "epoch": 45.0044286979628, "grad_norm": 0.2275746762752533, "learning_rate": 1e-05, "loss": 0.9977, "step": 50810 }, { "epoch": 45.008857395925595, "grad_norm": 0.23878690600395203, "learning_rate": 1e-05, "loss": 0.955, "step": 50815 }, { "epoch": 45.0132860938884, "grad_norm": 0.257638543844223, "learning_rate": 1e-05, "loss": 1.0114, "step": 50820 }, { "epoch": 45.0177147918512, "grad_norm": 0.26441490650177, "learning_rate": 1e-05, "loss": 0.9678, "step": 50825 }, { "epoch": 45.02214348981399, "grad_norm": 0.24332650005817413, "learning_rate": 1e-05, "loss": 0.9359, "step": 50830 }, { "epoch": 45.02657218777679, "grad_norm": 0.2957375645637512, "learning_rate": 1e-05, "loss": 0.9673, "step": 50835 }, { "epoch": 45.031000885739594, "grad_norm": 0.239308699965477, "learning_rate": 1e-05, "loss": 0.9462, "step": 50840 }, { "epoch": 45.03542958370239, "grad_norm": 0.22293758392333984, "learning_rate": 1e-05, "loss": 0.9543, "step": 50845 }, { "epoch": 45.03985828166519, "grad_norm": 0.23287378251552582, "learning_rate": 1e-05, "loss": 0.9718, "step": 50850 }, { "epoch": 45.04428697962799, "grad_norm": 0.23699893057346344, "learning_rate": 1e-05, "loss": 0.9402, "step": 50855 }, { "epoch": 45.048715677590785, "grad_norm": 0.28027772903442383, "learning_rate": 1e-05, "loss": 0.9798, "step": 50860 }, { "epoch": 45.053144375553586, "grad_norm": 0.2623668313026428, "learning_rate": 1e-05, "loss": 0.9446, "step": 50865 }, { "epoch": 45.05757307351639, "grad_norm": 0.20903250575065613, "learning_rate": 1e-05, "loss": 0.9751, "step": 50870 }, { "epoch": 45.06200177147919, "grad_norm": 0.2196718454360962, "learning_rate": 1e-05, "loss": 0.9902, "step": 50875 }, { "epoch": 45.06643046944198, "grad_norm": 0.19057652354240417, "learning_rate": 1e-05, "loss": 0.9494, "step": 50880 }, { "epoch": 45.070859167404784, "grad_norm": 0.21576182544231415, "learning_rate": 1e-05, "loss": 0.9845, "step": 50885 }, { "epoch": 45.075287865367585, "grad_norm": 0.23814213275909424, "learning_rate": 1e-05, "loss": 0.9489, "step": 50890 }, { "epoch": 45.07971656333038, "grad_norm": 0.26735758781433105, "learning_rate": 1e-05, "loss": 0.9797, "step": 50895 }, { "epoch": 45.08414526129318, "grad_norm": 0.24511125683784485, "learning_rate": 1e-05, "loss": 0.9827, "step": 50900 }, { "epoch": 45.08857395925598, "grad_norm": 0.22583621740341187, "learning_rate": 1e-05, "loss": 0.9906, "step": 50905 }, { "epoch": 45.093002657218776, "grad_norm": 0.2847575843334198, "learning_rate": 1e-05, "loss": 0.9767, "step": 50910 }, { "epoch": 45.09743135518158, "grad_norm": 0.24746952950954437, "learning_rate": 1e-05, "loss": 0.937, "step": 50915 }, { "epoch": 45.10186005314438, "grad_norm": 0.22392427921295166, "learning_rate": 1e-05, "loss": 1.0191, "step": 50920 }, { "epoch": 45.10628875110717, "grad_norm": 0.20248247683048248, "learning_rate": 1e-05, "loss": 1.0252, "step": 50925 }, { "epoch": 45.11071744906997, "grad_norm": 0.24867719411849976, "learning_rate": 1e-05, "loss": 0.9273, "step": 50930 }, { "epoch": 45.115146147032775, "grad_norm": 0.2462959736585617, "learning_rate": 1e-05, "loss": 0.9749, "step": 50935 }, { "epoch": 45.11957484499557, "grad_norm": 0.24415653944015503, "learning_rate": 1e-05, "loss": 0.9414, "step": 50940 }, { "epoch": 45.12400354295837, "grad_norm": 0.24125155806541443, "learning_rate": 1e-05, "loss": 0.9898, "step": 50945 }, { "epoch": 45.12843224092117, "grad_norm": 0.264469176530838, "learning_rate": 1e-05, "loss": 0.9988, "step": 50950 }, { "epoch": 45.132860938883965, "grad_norm": 0.2606436610221863, "learning_rate": 1e-05, "loss": 0.9544, "step": 50955 }, { "epoch": 45.137289636846766, "grad_norm": 0.20980465412139893, "learning_rate": 1e-05, "loss": 0.9874, "step": 50960 }, { "epoch": 45.14171833480957, "grad_norm": 0.2637585699558258, "learning_rate": 1e-05, "loss": 0.9967, "step": 50965 }, { "epoch": 45.14614703277236, "grad_norm": 0.2547243535518646, "learning_rate": 1e-05, "loss": 0.9802, "step": 50970 }, { "epoch": 45.15057573073516, "grad_norm": 0.2801729738712311, "learning_rate": 1e-05, "loss": 0.9792, "step": 50975 }, { "epoch": 45.155004428697964, "grad_norm": 0.2803928256034851, "learning_rate": 1e-05, "loss": 0.9305, "step": 50980 }, { "epoch": 45.15943312666076, "grad_norm": 0.27185243368148804, "learning_rate": 1e-05, "loss": 0.998, "step": 50985 }, { "epoch": 45.16386182462356, "grad_norm": 0.23501235246658325, "learning_rate": 1e-05, "loss": 0.9813, "step": 50990 }, { "epoch": 45.16829052258636, "grad_norm": 0.24999774992465973, "learning_rate": 1e-05, "loss": 0.9525, "step": 50995 }, { "epoch": 45.17271922054916, "grad_norm": 0.2192055881023407, "learning_rate": 1e-05, "loss": 0.9913, "step": 51000 }, { "epoch": 45.177147918511956, "grad_norm": 0.26313719153404236, "learning_rate": 1e-05, "loss": 0.9918, "step": 51005 }, { "epoch": 45.18157661647476, "grad_norm": 0.2927393317222595, "learning_rate": 1e-05, "loss": 1.002, "step": 51010 }, { "epoch": 45.18600531443756, "grad_norm": 0.2240595519542694, "learning_rate": 1e-05, "loss": 0.9504, "step": 51015 }, { "epoch": 45.19043401240035, "grad_norm": 0.32358798384666443, "learning_rate": 1e-05, "loss": 0.9581, "step": 51020 }, { "epoch": 45.194862710363154, "grad_norm": 0.26124095916748047, "learning_rate": 1e-05, "loss": 0.972, "step": 51025 }, { "epoch": 45.199291408325955, "grad_norm": 0.22017693519592285, "learning_rate": 1e-05, "loss": 0.9775, "step": 51030 }, { "epoch": 45.20372010628875, "grad_norm": 0.21079875528812408, "learning_rate": 1e-05, "loss": 0.9896, "step": 51035 }, { "epoch": 45.20814880425155, "grad_norm": 0.2244163453578949, "learning_rate": 1e-05, "loss": 0.959, "step": 51040 }, { "epoch": 45.21257750221435, "grad_norm": 0.2547610104084015, "learning_rate": 1e-05, "loss": 0.9675, "step": 51045 }, { "epoch": 45.217006200177146, "grad_norm": 0.24453681707382202, "learning_rate": 1e-05, "loss": 0.9869, "step": 51050 }, { "epoch": 45.22143489813995, "grad_norm": 0.25053828954696655, "learning_rate": 1e-05, "loss": 0.9586, "step": 51055 }, { "epoch": 45.22586359610275, "grad_norm": 0.24711820483207703, "learning_rate": 1e-05, "loss": 0.9074, "step": 51060 }, { "epoch": 45.23029229406554, "grad_norm": 0.32900500297546387, "learning_rate": 1e-05, "loss": 0.9433, "step": 51065 }, { "epoch": 45.23472099202834, "grad_norm": 0.23388823866844177, "learning_rate": 1e-05, "loss": 1.0031, "step": 51070 }, { "epoch": 45.239149689991144, "grad_norm": 0.21588705480098724, "learning_rate": 1e-05, "loss": 0.9448, "step": 51075 }, { "epoch": 45.24357838795394, "grad_norm": 0.2930537462234497, "learning_rate": 1e-05, "loss": 0.951, "step": 51080 }, { "epoch": 45.24800708591674, "grad_norm": 0.2271912395954132, "learning_rate": 1e-05, "loss": 1.0157, "step": 51085 }, { "epoch": 45.25243578387954, "grad_norm": 0.241968035697937, "learning_rate": 1e-05, "loss": 0.9838, "step": 51090 }, { "epoch": 45.256864481842335, "grad_norm": 0.23127003014087677, "learning_rate": 1e-05, "loss": 0.9962, "step": 51095 }, { "epoch": 45.261293179805136, "grad_norm": 0.2244143784046173, "learning_rate": 1e-05, "loss": 0.9576, "step": 51100 }, { "epoch": 45.26572187776794, "grad_norm": 0.2516489624977112, "learning_rate": 1e-05, "loss": 0.9735, "step": 51105 }, { "epoch": 45.27015057573073, "grad_norm": 0.2522209882736206, "learning_rate": 1e-05, "loss": 0.9895, "step": 51110 }, { "epoch": 45.27457927369353, "grad_norm": 0.2186204046010971, "learning_rate": 1e-05, "loss": 0.9416, "step": 51115 }, { "epoch": 45.279007971656334, "grad_norm": 0.24312123656272888, "learning_rate": 1e-05, "loss": 0.9393, "step": 51120 }, { "epoch": 45.283436669619135, "grad_norm": 0.21799670159816742, "learning_rate": 1e-05, "loss": 1.0084, "step": 51125 }, { "epoch": 45.28786536758193, "grad_norm": 0.22940537333488464, "learning_rate": 1e-05, "loss": 1.0067, "step": 51130 }, { "epoch": 45.29229406554473, "grad_norm": 0.22109197080135345, "learning_rate": 1e-05, "loss": 0.9662, "step": 51135 }, { "epoch": 45.29672276350753, "grad_norm": 0.27144771814346313, "learning_rate": 1e-05, "loss": 0.9687, "step": 51140 }, { "epoch": 45.301151461470326, "grad_norm": 0.23288264870643616, "learning_rate": 1e-05, "loss": 0.9191, "step": 51145 }, { "epoch": 45.30558015943313, "grad_norm": 0.2264794111251831, "learning_rate": 1e-05, "loss": 0.9561, "step": 51150 }, { "epoch": 45.31000885739593, "grad_norm": 0.23706716299057007, "learning_rate": 1e-05, "loss": 0.9369, "step": 51155 }, { "epoch": 45.31443755535872, "grad_norm": 0.2442762702703476, "learning_rate": 1e-05, "loss": 0.9908, "step": 51160 }, { "epoch": 45.318866253321524, "grad_norm": 0.24715778231620789, "learning_rate": 1e-05, "loss": 0.9698, "step": 51165 }, { "epoch": 45.323294951284325, "grad_norm": 0.31723394989967346, "learning_rate": 1e-05, "loss": 0.9982, "step": 51170 }, { "epoch": 45.32772364924712, "grad_norm": 0.2172892540693283, "learning_rate": 1e-05, "loss": 0.9484, "step": 51175 }, { "epoch": 45.33215234720992, "grad_norm": 0.24963992834091187, "learning_rate": 1e-05, "loss": 0.9843, "step": 51180 }, { "epoch": 45.33658104517272, "grad_norm": 0.24094165861606598, "learning_rate": 1e-05, "loss": 0.9608, "step": 51185 }, { "epoch": 45.341009743135515, "grad_norm": 0.20671811699867249, "learning_rate": 1e-05, "loss": 0.9333, "step": 51190 }, { "epoch": 45.34543844109832, "grad_norm": 0.25364503264427185, "learning_rate": 1e-05, "loss": 0.987, "step": 51195 }, { "epoch": 45.34986713906112, "grad_norm": 0.2803034484386444, "learning_rate": 1e-05, "loss": 0.9633, "step": 51200 }, { "epoch": 45.35429583702391, "grad_norm": 0.24685046076774597, "learning_rate": 1e-05, "loss": 0.9716, "step": 51205 }, { "epoch": 45.35872453498671, "grad_norm": 0.23616907000541687, "learning_rate": 1e-05, "loss": 0.9528, "step": 51210 }, { "epoch": 45.363153232949514, "grad_norm": 0.2507687211036682, "learning_rate": 1e-05, "loss": 0.9918, "step": 51215 }, { "epoch": 45.36758193091231, "grad_norm": 0.2694598436355591, "learning_rate": 1e-05, "loss": 0.9661, "step": 51220 }, { "epoch": 45.37201062887511, "grad_norm": 0.2728506624698639, "learning_rate": 1e-05, "loss": 0.9508, "step": 51225 }, { "epoch": 45.37643932683791, "grad_norm": 0.21888592839241028, "learning_rate": 1e-05, "loss": 0.9342, "step": 51230 }, { "epoch": 45.380868024800705, "grad_norm": 0.25766709446907043, "learning_rate": 1e-05, "loss": 0.9665, "step": 51235 }, { "epoch": 45.385296722763506, "grad_norm": 0.28223270177841187, "learning_rate": 1e-05, "loss": 0.9533, "step": 51240 }, { "epoch": 45.38972542072631, "grad_norm": 0.251563161611557, "learning_rate": 1e-05, "loss": 1.0292, "step": 51245 }, { "epoch": 45.39415411868911, "grad_norm": 0.24172428250312805, "learning_rate": 1e-05, "loss": 0.9708, "step": 51250 }, { "epoch": 45.3985828166519, "grad_norm": 0.25851836800575256, "learning_rate": 1e-05, "loss": 0.9831, "step": 51255 }, { "epoch": 45.403011514614704, "grad_norm": 0.21488241851329803, "learning_rate": 1e-05, "loss": 0.9737, "step": 51260 }, { "epoch": 45.407440212577505, "grad_norm": 0.24836045503616333, "learning_rate": 1e-05, "loss": 0.9084, "step": 51265 }, { "epoch": 45.4118689105403, "grad_norm": 0.2217872142791748, "learning_rate": 1e-05, "loss": 0.946, "step": 51270 }, { "epoch": 45.4162976085031, "grad_norm": 0.2358526587486267, "learning_rate": 1e-05, "loss": 1.0031, "step": 51275 }, { "epoch": 45.4207263064659, "grad_norm": 0.2298174649477005, "learning_rate": 1e-05, "loss": 0.9872, "step": 51280 }, { "epoch": 45.425155004428696, "grad_norm": 0.22027640044689178, "learning_rate": 1e-05, "loss": 1.0028, "step": 51285 }, { "epoch": 45.4295837023915, "grad_norm": 0.22802740335464478, "learning_rate": 1e-05, "loss": 0.9789, "step": 51290 }, { "epoch": 45.4340124003543, "grad_norm": 0.25210607051849365, "learning_rate": 1e-05, "loss": 0.9646, "step": 51295 }, { "epoch": 45.43844109831709, "grad_norm": 0.22045212984085083, "learning_rate": 1e-05, "loss": 0.978, "step": 51300 }, { "epoch": 45.44286979627989, "grad_norm": 0.21925128996372223, "learning_rate": 1e-05, "loss": 0.9897, "step": 51305 }, { "epoch": 45.447298494242695, "grad_norm": 0.23857742547988892, "learning_rate": 1e-05, "loss": 0.9581, "step": 51310 }, { "epoch": 45.45172719220549, "grad_norm": 0.2602781653404236, "learning_rate": 1e-05, "loss": 0.9711, "step": 51315 }, { "epoch": 45.45615589016829, "grad_norm": 0.28282612562179565, "learning_rate": 1e-05, "loss": 0.947, "step": 51320 }, { "epoch": 45.46058458813109, "grad_norm": 0.2592891752719879, "learning_rate": 1e-05, "loss": 1.0222, "step": 51325 }, { "epoch": 45.465013286093885, "grad_norm": 0.2841559946537018, "learning_rate": 1e-05, "loss": 0.9437, "step": 51330 }, { "epoch": 45.46944198405669, "grad_norm": 0.20972949266433716, "learning_rate": 1e-05, "loss": 0.9579, "step": 51335 }, { "epoch": 45.47387068201949, "grad_norm": 0.20510976016521454, "learning_rate": 1e-05, "loss": 0.9415, "step": 51340 }, { "epoch": 45.47829937998228, "grad_norm": 0.24025958776474, "learning_rate": 1e-05, "loss": 0.9899, "step": 51345 }, { "epoch": 45.48272807794508, "grad_norm": 0.25290006399154663, "learning_rate": 1e-05, "loss": 0.9755, "step": 51350 }, { "epoch": 45.487156775907884, "grad_norm": 0.2119912952184677, "learning_rate": 1e-05, "loss": 0.9923, "step": 51355 }, { "epoch": 45.491585473870686, "grad_norm": 0.24001964926719666, "learning_rate": 1e-05, "loss": 0.9535, "step": 51360 }, { "epoch": 45.49601417183348, "grad_norm": 0.22937263548374176, "learning_rate": 1e-05, "loss": 0.9586, "step": 51365 }, { "epoch": 45.50044286979628, "grad_norm": 0.20668862760066986, "learning_rate": 1e-05, "loss": 0.973, "step": 51370 }, { "epoch": 45.50487156775908, "grad_norm": 0.26289650797843933, "learning_rate": 1e-05, "loss": 0.9858, "step": 51375 }, { "epoch": 45.509300265721876, "grad_norm": 0.27244558930397034, "learning_rate": 1e-05, "loss": 1.0128, "step": 51380 }, { "epoch": 45.51372896368468, "grad_norm": 0.2312142699956894, "learning_rate": 1e-05, "loss": 0.9574, "step": 51385 }, { "epoch": 45.51815766164748, "grad_norm": 0.27039605379104614, "learning_rate": 1e-05, "loss": 0.9573, "step": 51390 }, { "epoch": 45.52258635961027, "grad_norm": 0.27057579159736633, "learning_rate": 1e-05, "loss": 0.978, "step": 51395 }, { "epoch": 45.527015057573074, "grad_norm": 0.21195568144321442, "learning_rate": 1e-05, "loss": 0.9626, "step": 51400 }, { "epoch": 45.531443755535875, "grad_norm": 0.22906166315078735, "learning_rate": 1e-05, "loss": 0.942, "step": 51405 }, { "epoch": 45.53587245349867, "grad_norm": 0.25505486130714417, "learning_rate": 1e-05, "loss": 0.9821, "step": 51410 }, { "epoch": 45.54030115146147, "grad_norm": 0.2492789626121521, "learning_rate": 1e-05, "loss": 1.0026, "step": 51415 }, { "epoch": 45.54472984942427, "grad_norm": 0.25532904267311096, "learning_rate": 1e-05, "loss": 0.9472, "step": 51420 }, { "epoch": 45.549158547387066, "grad_norm": 0.23320272564888, "learning_rate": 1e-05, "loss": 0.9029, "step": 51425 }, { "epoch": 45.55358724534987, "grad_norm": 0.279902845621109, "learning_rate": 1e-05, "loss": 0.949, "step": 51430 }, { "epoch": 45.55801594331267, "grad_norm": 0.21254844963550568, "learning_rate": 1e-05, "loss": 0.9673, "step": 51435 }, { "epoch": 45.56244464127546, "grad_norm": 0.2398146092891693, "learning_rate": 1e-05, "loss": 0.9719, "step": 51440 }, { "epoch": 45.56687333923826, "grad_norm": 0.2217506319284439, "learning_rate": 1e-05, "loss": 1.0097, "step": 51445 }, { "epoch": 45.571302037201065, "grad_norm": 0.26074621081352234, "learning_rate": 1e-05, "loss": 0.9876, "step": 51450 }, { "epoch": 45.57573073516386, "grad_norm": 0.2479376345872879, "learning_rate": 1e-05, "loss": 0.9577, "step": 51455 }, { "epoch": 45.58015943312666, "grad_norm": 0.2737751603126526, "learning_rate": 1e-05, "loss": 0.9977, "step": 51460 }, { "epoch": 45.58458813108946, "grad_norm": 0.27061790227890015, "learning_rate": 1e-05, "loss": 0.9811, "step": 51465 }, { "epoch": 45.589016829052255, "grad_norm": 0.2958587408065796, "learning_rate": 1e-05, "loss": 0.9578, "step": 51470 }, { "epoch": 45.59344552701506, "grad_norm": 0.3015517592430115, "learning_rate": 1e-05, "loss": 0.9659, "step": 51475 }, { "epoch": 45.59787422497786, "grad_norm": 0.2944192588329315, "learning_rate": 1e-05, "loss": 0.9543, "step": 51480 }, { "epoch": 45.60230292294066, "grad_norm": 0.26193124055862427, "learning_rate": 1e-05, "loss": 1.02, "step": 51485 }, { "epoch": 45.60673162090345, "grad_norm": 0.2511567175388336, "learning_rate": 1e-05, "loss": 1.0191, "step": 51490 }, { "epoch": 45.611160318866254, "grad_norm": 0.20771130919456482, "learning_rate": 1e-05, "loss": 1.0024, "step": 51495 }, { "epoch": 45.615589016829055, "grad_norm": 0.26946723461151123, "learning_rate": 1e-05, "loss": 0.9546, "step": 51500 }, { "epoch": 45.62001771479185, "grad_norm": 0.23836848139762878, "learning_rate": 1e-05, "loss": 0.9635, "step": 51505 }, { "epoch": 45.62444641275465, "grad_norm": 0.21833090484142303, "learning_rate": 1e-05, "loss": 0.9843, "step": 51510 }, { "epoch": 45.62887511071745, "grad_norm": 0.24224352836608887, "learning_rate": 1e-05, "loss": 0.9577, "step": 51515 }, { "epoch": 45.633303808680246, "grad_norm": 0.22550438344478607, "learning_rate": 1e-05, "loss": 1.0255, "step": 51520 }, { "epoch": 45.63773250664305, "grad_norm": 0.21138441562652588, "learning_rate": 1e-05, "loss": 0.9639, "step": 51525 }, { "epoch": 45.64216120460585, "grad_norm": 0.2626248598098755, "learning_rate": 1e-05, "loss": 1.001, "step": 51530 }, { "epoch": 45.64658990256864, "grad_norm": 0.3008551001548767, "learning_rate": 1e-05, "loss": 0.9951, "step": 51535 }, { "epoch": 45.651018600531444, "grad_norm": 0.2048415243625641, "learning_rate": 1e-05, "loss": 0.9918, "step": 51540 }, { "epoch": 45.655447298494245, "grad_norm": 0.23698744177818298, "learning_rate": 1e-05, "loss": 0.9038, "step": 51545 }, { "epoch": 45.65987599645704, "grad_norm": 0.2327071577310562, "learning_rate": 1e-05, "loss": 0.9491, "step": 51550 }, { "epoch": 45.66430469441984, "grad_norm": 0.3224489092826843, "learning_rate": 1e-05, "loss": 0.9838, "step": 51555 }, { "epoch": 45.66873339238264, "grad_norm": 0.2258215844631195, "learning_rate": 1e-05, "loss": 0.9956, "step": 51560 }, { "epoch": 45.673162090345436, "grad_norm": 0.2476758062839508, "learning_rate": 1e-05, "loss": 0.95, "step": 51565 }, { "epoch": 45.67759078830824, "grad_norm": 0.2695644795894623, "learning_rate": 1e-05, "loss": 0.9446, "step": 51570 }, { "epoch": 45.68201948627104, "grad_norm": 0.2607501447200775, "learning_rate": 1e-05, "loss": 0.9666, "step": 51575 }, { "epoch": 45.68644818423383, "grad_norm": 0.21129955351352692, "learning_rate": 1e-05, "loss": 0.9695, "step": 51580 }, { "epoch": 45.69087688219663, "grad_norm": 0.2089204490184784, "learning_rate": 1e-05, "loss": 0.97, "step": 51585 }, { "epoch": 45.695305580159435, "grad_norm": 0.234258770942688, "learning_rate": 1e-05, "loss": 0.9215, "step": 51590 }, { "epoch": 45.69973427812223, "grad_norm": 0.24929247796535492, "learning_rate": 1e-05, "loss": 0.9953, "step": 51595 }, { "epoch": 45.70416297608503, "grad_norm": 0.2664491832256317, "learning_rate": 1e-05, "loss": 0.975, "step": 51600 }, { "epoch": 45.70859167404783, "grad_norm": 0.23142677545547485, "learning_rate": 1e-05, "loss": 1.0122, "step": 51605 }, { "epoch": 45.71302037201063, "grad_norm": 0.23572951555252075, "learning_rate": 1e-05, "loss": 0.944, "step": 51610 }, { "epoch": 45.717449069973426, "grad_norm": 0.2164895087480545, "learning_rate": 1e-05, "loss": 0.9895, "step": 51615 }, { "epoch": 45.72187776793623, "grad_norm": 0.23070400953292847, "learning_rate": 1e-05, "loss": 0.9374, "step": 51620 }, { "epoch": 45.72630646589903, "grad_norm": 0.2819378674030304, "learning_rate": 1e-05, "loss": 0.9406, "step": 51625 }, { "epoch": 45.73073516386182, "grad_norm": 0.22256404161453247, "learning_rate": 1e-05, "loss": 0.9002, "step": 51630 }, { "epoch": 45.735163861824624, "grad_norm": 0.2503831386566162, "learning_rate": 1e-05, "loss": 0.9382, "step": 51635 }, { "epoch": 45.739592559787425, "grad_norm": 0.2594408392906189, "learning_rate": 1e-05, "loss": 0.9732, "step": 51640 }, { "epoch": 45.74402125775022, "grad_norm": 0.2545607089996338, "learning_rate": 1e-05, "loss": 0.8817, "step": 51645 }, { "epoch": 45.74844995571302, "grad_norm": 0.21343974769115448, "learning_rate": 1e-05, "loss": 0.9657, "step": 51650 }, { "epoch": 45.75287865367582, "grad_norm": 0.24160389602184296, "learning_rate": 1e-05, "loss": 0.932, "step": 51655 }, { "epoch": 45.757307351638616, "grad_norm": 0.2479962408542633, "learning_rate": 1e-05, "loss": 1.0156, "step": 51660 }, { "epoch": 45.76173604960142, "grad_norm": 0.2532936930656433, "learning_rate": 1e-05, "loss": 0.9254, "step": 51665 }, { "epoch": 45.76616474756422, "grad_norm": 0.22991423308849335, "learning_rate": 1e-05, "loss": 0.921, "step": 51670 }, { "epoch": 45.77059344552701, "grad_norm": 0.23111528158187866, "learning_rate": 1e-05, "loss": 0.9472, "step": 51675 }, { "epoch": 45.775022143489814, "grad_norm": 0.23853762447834015, "learning_rate": 1e-05, "loss": 0.9639, "step": 51680 }, { "epoch": 45.779450841452615, "grad_norm": 0.28785794973373413, "learning_rate": 1e-05, "loss": 0.9692, "step": 51685 }, { "epoch": 45.78387953941541, "grad_norm": 0.20950739085674286, "learning_rate": 1e-05, "loss": 0.9445, "step": 51690 }, { "epoch": 45.78830823737821, "grad_norm": 0.2349524199962616, "learning_rate": 1e-05, "loss": 0.993, "step": 51695 }, { "epoch": 45.79273693534101, "grad_norm": 0.22990046441555023, "learning_rate": 1e-05, "loss": 0.9637, "step": 51700 }, { "epoch": 45.797165633303806, "grad_norm": 0.23833337426185608, "learning_rate": 1e-05, "loss": 0.9923, "step": 51705 }, { "epoch": 45.80159433126661, "grad_norm": 0.2167847752571106, "learning_rate": 1e-05, "loss": 0.9954, "step": 51710 }, { "epoch": 45.80602302922941, "grad_norm": 0.24349065124988556, "learning_rate": 1e-05, "loss": 0.9453, "step": 51715 }, { "epoch": 45.8104517271922, "grad_norm": 0.24023036658763885, "learning_rate": 1e-05, "loss": 1.0219, "step": 51720 }, { "epoch": 45.814880425155, "grad_norm": 0.21085967123508453, "learning_rate": 1e-05, "loss": 1.0227, "step": 51725 }, { "epoch": 45.819309123117804, "grad_norm": 0.20929795503616333, "learning_rate": 1e-05, "loss": 0.9883, "step": 51730 }, { "epoch": 45.823737821080606, "grad_norm": 0.22110675275325775, "learning_rate": 1e-05, "loss": 0.9886, "step": 51735 }, { "epoch": 45.8281665190434, "grad_norm": 0.2524730861186981, "learning_rate": 1e-05, "loss": 0.9361, "step": 51740 }, { "epoch": 45.8325952170062, "grad_norm": 0.24815669655799866, "learning_rate": 1e-05, "loss": 0.9332, "step": 51745 }, { "epoch": 45.837023914969, "grad_norm": 0.2149936705827713, "learning_rate": 1e-05, "loss": 0.9487, "step": 51750 }, { "epoch": 45.841452612931796, "grad_norm": 0.2428337037563324, "learning_rate": 1e-05, "loss": 0.9737, "step": 51755 }, { "epoch": 45.8458813108946, "grad_norm": 0.23148475587368011, "learning_rate": 1e-05, "loss": 0.9338, "step": 51760 }, { "epoch": 45.8503100088574, "grad_norm": 0.23069745302200317, "learning_rate": 1e-05, "loss": 0.9739, "step": 51765 }, { "epoch": 45.85473870682019, "grad_norm": 0.24695904552936554, "learning_rate": 1e-05, "loss": 0.93, "step": 51770 }, { "epoch": 45.859167404782994, "grad_norm": 0.25191372632980347, "learning_rate": 1e-05, "loss": 0.9696, "step": 51775 }, { "epoch": 45.863596102745795, "grad_norm": 0.23737689852714539, "learning_rate": 1e-05, "loss": 0.9569, "step": 51780 }, { "epoch": 45.86802480070859, "grad_norm": 0.27893298864364624, "learning_rate": 1e-05, "loss": 0.9827, "step": 51785 }, { "epoch": 45.87245349867139, "grad_norm": 0.2078242301940918, "learning_rate": 1e-05, "loss": 0.9174, "step": 51790 }, { "epoch": 45.87688219663419, "grad_norm": 0.2302054464817047, "learning_rate": 1e-05, "loss": 0.981, "step": 51795 }, { "epoch": 45.881310894596986, "grad_norm": 0.2452826052904129, "learning_rate": 1e-05, "loss": 1.0028, "step": 51800 }, { "epoch": 45.88573959255979, "grad_norm": 0.2006453573703766, "learning_rate": 1e-05, "loss": 0.978, "step": 51805 }, { "epoch": 45.89016829052259, "grad_norm": 0.25416144728660583, "learning_rate": 1e-05, "loss": 1.008, "step": 51810 }, { "epoch": 45.89459698848538, "grad_norm": 0.24128666520118713, "learning_rate": 1e-05, "loss": 1.01, "step": 51815 }, { "epoch": 45.899025686448184, "grad_norm": 0.24687378108501434, "learning_rate": 1e-05, "loss": 0.9855, "step": 51820 }, { "epoch": 45.903454384410985, "grad_norm": 0.22308023273944855, "learning_rate": 1e-05, "loss": 0.9159, "step": 51825 }, { "epoch": 45.90788308237378, "grad_norm": 0.22135116159915924, "learning_rate": 1e-05, "loss": 0.9598, "step": 51830 }, { "epoch": 45.91231178033658, "grad_norm": 0.21836702525615692, "learning_rate": 1e-05, "loss": 0.8916, "step": 51835 }, { "epoch": 45.91674047829938, "grad_norm": 0.2132343053817749, "learning_rate": 1e-05, "loss": 0.9739, "step": 51840 }, { "epoch": 45.921169176262175, "grad_norm": 0.22857122123241425, "learning_rate": 1e-05, "loss": 1.0141, "step": 51845 }, { "epoch": 45.92559787422498, "grad_norm": 0.22367356717586517, "learning_rate": 1e-05, "loss": 0.9353, "step": 51850 }, { "epoch": 45.93002657218778, "grad_norm": 0.23527488112449646, "learning_rate": 1e-05, "loss": 0.9772, "step": 51855 }, { "epoch": 45.93445527015058, "grad_norm": 0.2949398458003998, "learning_rate": 1e-05, "loss": 0.9948, "step": 51860 }, { "epoch": 45.93888396811337, "grad_norm": 0.21424800157546997, "learning_rate": 1e-05, "loss": 1.011, "step": 51865 }, { "epoch": 45.943312666076174, "grad_norm": 0.2660035490989685, "learning_rate": 1e-05, "loss": 0.9893, "step": 51870 }, { "epoch": 45.947741364038976, "grad_norm": 0.2730546295642853, "learning_rate": 1e-05, "loss": 1.0172, "step": 51875 }, { "epoch": 45.95217006200177, "grad_norm": 0.23108869791030884, "learning_rate": 1e-05, "loss": 0.9353, "step": 51880 }, { "epoch": 45.95659875996457, "grad_norm": 0.22088433802127838, "learning_rate": 1e-05, "loss": 0.9066, "step": 51885 }, { "epoch": 45.96102745792737, "grad_norm": 0.20338571071624756, "learning_rate": 1e-05, "loss": 0.967, "step": 51890 }, { "epoch": 45.965456155890166, "grad_norm": 0.22179971635341644, "learning_rate": 1e-05, "loss": 0.9992, "step": 51895 }, { "epoch": 45.96988485385297, "grad_norm": 0.22934699058532715, "learning_rate": 1e-05, "loss": 0.9695, "step": 51900 }, { "epoch": 45.97431355181577, "grad_norm": 0.25358590483665466, "learning_rate": 1e-05, "loss": 1.039, "step": 51905 }, { "epoch": 45.97874224977856, "grad_norm": 0.31548011302948, "learning_rate": 1e-05, "loss": 0.9132, "step": 51910 }, { "epoch": 45.983170947741364, "grad_norm": 0.2892420291900635, "learning_rate": 1e-05, "loss": 1.0206, "step": 51915 }, { "epoch": 45.987599645704165, "grad_norm": 0.24548064172267914, "learning_rate": 1e-05, "loss": 0.9275, "step": 51920 }, { "epoch": 45.99202834366696, "grad_norm": 0.2229636162519455, "learning_rate": 1e-05, "loss": 0.981, "step": 51925 }, { "epoch": 45.99645704162976, "grad_norm": 0.24473243951797485, "learning_rate": 1e-05, "loss": 0.9398, "step": 51930 }, { "epoch": 46.00088573959256, "grad_norm": 0.22059190273284912, "learning_rate": 1e-05, "loss": 0.9811, "step": 51935 }, { "epoch": 46.005314437555356, "grad_norm": 0.23424828052520752, "learning_rate": 1e-05, "loss": 0.928, "step": 51940 }, { "epoch": 46.00974313551816, "grad_norm": 0.24413150548934937, "learning_rate": 1e-05, "loss": 0.9897, "step": 51945 }, { "epoch": 46.01417183348096, "grad_norm": 0.29263216257095337, "learning_rate": 1e-05, "loss": 0.9668, "step": 51950 }, { "epoch": 46.01860053144375, "grad_norm": 0.2872030735015869, "learning_rate": 1e-05, "loss": 1.0185, "step": 51955 }, { "epoch": 46.02302922940655, "grad_norm": 0.25302475690841675, "learning_rate": 1e-05, "loss": 0.9227, "step": 51960 }, { "epoch": 46.027457927369355, "grad_norm": 0.27512097358703613, "learning_rate": 1e-05, "loss": 0.9806, "step": 51965 }, { "epoch": 46.03188662533215, "grad_norm": 0.28898295760154724, "learning_rate": 1e-05, "loss": 0.942, "step": 51970 }, { "epoch": 46.03631532329495, "grad_norm": 0.23756752908229828, "learning_rate": 1e-05, "loss": 0.9105, "step": 51975 }, { "epoch": 46.04074402125775, "grad_norm": 0.24522282183170319, "learning_rate": 1e-05, "loss": 0.926, "step": 51980 }, { "epoch": 46.04517271922055, "grad_norm": 0.2577900290489197, "learning_rate": 1e-05, "loss": 0.9843, "step": 51985 }, { "epoch": 46.04960141718335, "grad_norm": 0.23627310991287231, "learning_rate": 1e-05, "loss": 1.0009, "step": 51990 }, { "epoch": 46.05403011514615, "grad_norm": 0.22613753378391266, "learning_rate": 1e-05, "loss": 0.9964, "step": 51995 }, { "epoch": 46.05845881310895, "grad_norm": 0.23439499735832214, "learning_rate": 1e-05, "loss": 0.955, "step": 52000 }, { "epoch": 46.06288751107174, "grad_norm": 0.2566295266151428, "learning_rate": 1e-05, "loss": 0.9563, "step": 52005 }, { "epoch": 46.067316209034544, "grad_norm": 0.2557554841041565, "learning_rate": 1e-05, "loss": 0.9615, "step": 52010 }, { "epoch": 46.071744906997345, "grad_norm": 0.2788117229938507, "learning_rate": 1e-05, "loss": 1.0405, "step": 52015 }, { "epoch": 46.07617360496014, "grad_norm": 0.2325088381767273, "learning_rate": 1e-05, "loss": 0.9861, "step": 52020 }, { "epoch": 46.08060230292294, "grad_norm": 0.25615447759628296, "learning_rate": 1e-05, "loss": 1.0299, "step": 52025 }, { "epoch": 46.08503100088574, "grad_norm": 0.23676101863384247, "learning_rate": 1e-05, "loss": 1.0212, "step": 52030 }, { "epoch": 46.089459698848536, "grad_norm": 0.2522531747817993, "learning_rate": 1e-05, "loss": 0.9693, "step": 52035 }, { "epoch": 46.09388839681134, "grad_norm": 0.2111278623342514, "learning_rate": 1e-05, "loss": 1.0284, "step": 52040 }, { "epoch": 46.09831709477414, "grad_norm": 0.2221577763557434, "learning_rate": 1e-05, "loss": 0.9617, "step": 52045 }, { "epoch": 46.10274579273693, "grad_norm": 0.22191591560840607, "learning_rate": 1e-05, "loss": 0.9161, "step": 52050 }, { "epoch": 46.107174490699734, "grad_norm": 0.2434382140636444, "learning_rate": 1e-05, "loss": 0.9857, "step": 52055 }, { "epoch": 46.111603188662535, "grad_norm": 0.23474010825157166, "learning_rate": 1e-05, "loss": 1.0408, "step": 52060 }, { "epoch": 46.11603188662533, "grad_norm": 0.2551935017108917, "learning_rate": 1e-05, "loss": 0.9983, "step": 52065 }, { "epoch": 46.12046058458813, "grad_norm": 0.220155268907547, "learning_rate": 1e-05, "loss": 0.96, "step": 52070 }, { "epoch": 46.12488928255093, "grad_norm": 0.2255057990550995, "learning_rate": 1e-05, "loss": 0.9337, "step": 52075 }, { "epoch": 46.129317980513726, "grad_norm": 0.26529330015182495, "learning_rate": 1e-05, "loss": 0.9677, "step": 52080 }, { "epoch": 46.13374667847653, "grad_norm": 0.23907260596752167, "learning_rate": 1e-05, "loss": 1.0212, "step": 52085 }, { "epoch": 46.13817537643933, "grad_norm": 0.27552422881126404, "learning_rate": 1e-05, "loss": 1.028, "step": 52090 }, { "epoch": 46.14260407440213, "grad_norm": 0.23915618658065796, "learning_rate": 1e-05, "loss": 0.9762, "step": 52095 }, { "epoch": 46.14703277236492, "grad_norm": 0.24072015285491943, "learning_rate": 1e-05, "loss": 0.9137, "step": 52100 }, { "epoch": 46.151461470327725, "grad_norm": 0.22663548588752747, "learning_rate": 1e-05, "loss": 0.9733, "step": 52105 }, { "epoch": 46.155890168290526, "grad_norm": 0.2206011712551117, "learning_rate": 1e-05, "loss": 0.9214, "step": 52110 }, { "epoch": 46.16031886625332, "grad_norm": 0.23620931804180145, "learning_rate": 1e-05, "loss": 1.0128, "step": 52115 }, { "epoch": 46.16474756421612, "grad_norm": 0.2478964924812317, "learning_rate": 1e-05, "loss": 0.9593, "step": 52120 }, { "epoch": 46.16917626217892, "grad_norm": 0.24203436076641083, "learning_rate": 1e-05, "loss": 1.0389, "step": 52125 }, { "epoch": 46.173604960141716, "grad_norm": 0.20357997715473175, "learning_rate": 1e-05, "loss": 0.9882, "step": 52130 }, { "epoch": 46.17803365810452, "grad_norm": 0.24347221851348877, "learning_rate": 1e-05, "loss": 0.9113, "step": 52135 }, { "epoch": 46.18246235606732, "grad_norm": 0.24830567836761475, "learning_rate": 1e-05, "loss": 0.9855, "step": 52140 }, { "epoch": 46.18689105403011, "grad_norm": 0.25571784377098083, "learning_rate": 1e-05, "loss": 0.9486, "step": 52145 }, { "epoch": 46.191319751992914, "grad_norm": 0.2986680567264557, "learning_rate": 1e-05, "loss": 0.9673, "step": 52150 }, { "epoch": 46.195748449955715, "grad_norm": 0.2642230987548828, "learning_rate": 1e-05, "loss": 1.0307, "step": 52155 }, { "epoch": 46.20017714791851, "grad_norm": 0.22545696794986725, "learning_rate": 1e-05, "loss": 0.9696, "step": 52160 }, { "epoch": 46.20460584588131, "grad_norm": 0.21802228689193726, "learning_rate": 1e-05, "loss": 0.9532, "step": 52165 }, { "epoch": 46.20903454384411, "grad_norm": 0.22780759632587433, "learning_rate": 1e-05, "loss": 0.9841, "step": 52170 }, { "epoch": 46.213463241806906, "grad_norm": 0.21695683896541595, "learning_rate": 1e-05, "loss": 0.9889, "step": 52175 }, { "epoch": 46.21789193976971, "grad_norm": 0.24940991401672363, "learning_rate": 1e-05, "loss": 0.9516, "step": 52180 }, { "epoch": 46.22232063773251, "grad_norm": 0.2487255185842514, "learning_rate": 1e-05, "loss": 0.9405, "step": 52185 }, { "epoch": 46.2267493356953, "grad_norm": 0.20448024570941925, "learning_rate": 1e-05, "loss": 0.9971, "step": 52190 }, { "epoch": 46.231178033658104, "grad_norm": 0.2528068423271179, "learning_rate": 1e-05, "loss": 1.0509, "step": 52195 }, { "epoch": 46.235606731620905, "grad_norm": 0.22307176887989044, "learning_rate": 1e-05, "loss": 0.9658, "step": 52200 }, { "epoch": 46.2400354295837, "grad_norm": 0.2304147630929947, "learning_rate": 1e-05, "loss": 0.9763, "step": 52205 }, { "epoch": 46.2444641275465, "grad_norm": 0.2592250108718872, "learning_rate": 1e-05, "loss": 1.0273, "step": 52210 }, { "epoch": 46.2488928255093, "grad_norm": 0.25643765926361084, "learning_rate": 1e-05, "loss": 0.9887, "step": 52215 }, { "epoch": 46.2533215234721, "grad_norm": 0.265780508518219, "learning_rate": 1e-05, "loss": 0.9836, "step": 52220 }, { "epoch": 46.2577502214349, "grad_norm": 0.24850358068943024, "learning_rate": 1e-05, "loss": 0.9181, "step": 52225 }, { "epoch": 46.2621789193977, "grad_norm": 0.24540919065475464, "learning_rate": 1e-05, "loss": 0.9591, "step": 52230 }, { "epoch": 46.2666076173605, "grad_norm": 0.2508479952812195, "learning_rate": 1e-05, "loss": 0.9945, "step": 52235 }, { "epoch": 46.27103631532329, "grad_norm": 0.22535806894302368, "learning_rate": 1e-05, "loss": 0.9811, "step": 52240 }, { "epoch": 46.275465013286095, "grad_norm": 0.2499355971813202, "learning_rate": 1e-05, "loss": 0.9856, "step": 52245 }, { "epoch": 46.279893711248896, "grad_norm": 0.25124189257621765, "learning_rate": 1e-05, "loss": 0.9427, "step": 52250 }, { "epoch": 46.28432240921169, "grad_norm": 0.2670814096927643, "learning_rate": 1e-05, "loss": 0.9566, "step": 52255 }, { "epoch": 46.28875110717449, "grad_norm": 0.25615066289901733, "learning_rate": 1e-05, "loss": 0.9832, "step": 52260 }, { "epoch": 46.29317980513729, "grad_norm": 0.23560094833374023, "learning_rate": 1e-05, "loss": 0.9494, "step": 52265 }, { "epoch": 46.297608503100086, "grad_norm": 0.25396451354026794, "learning_rate": 1e-05, "loss": 0.9744, "step": 52270 }, { "epoch": 46.30203720106289, "grad_norm": 0.22137683629989624, "learning_rate": 1e-05, "loss": 0.9707, "step": 52275 }, { "epoch": 46.30646589902569, "grad_norm": 0.23736625909805298, "learning_rate": 1e-05, "loss": 1.0328, "step": 52280 }, { "epoch": 46.31089459698848, "grad_norm": 0.2688720226287842, "learning_rate": 1e-05, "loss": 0.9479, "step": 52285 }, { "epoch": 46.315323294951284, "grad_norm": 0.2810586392879486, "learning_rate": 1e-05, "loss": 0.9994, "step": 52290 }, { "epoch": 46.319751992914085, "grad_norm": 0.27852848172187805, "learning_rate": 1e-05, "loss": 0.9533, "step": 52295 }, { "epoch": 46.32418069087688, "grad_norm": 0.28398391604423523, "learning_rate": 1e-05, "loss": 0.9556, "step": 52300 }, { "epoch": 46.32860938883968, "grad_norm": 0.26669609546661377, "learning_rate": 1e-05, "loss": 0.9609, "step": 52305 }, { "epoch": 46.33303808680248, "grad_norm": 0.27653294801712036, "learning_rate": 1e-05, "loss": 0.9929, "step": 52310 }, { "epoch": 46.337466784765276, "grad_norm": 0.25108420848846436, "learning_rate": 1e-05, "loss": 0.9284, "step": 52315 }, { "epoch": 46.34189548272808, "grad_norm": 0.267616868019104, "learning_rate": 1e-05, "loss": 0.9823, "step": 52320 }, { "epoch": 46.34632418069088, "grad_norm": 0.22993402183055878, "learning_rate": 1e-05, "loss": 0.9974, "step": 52325 }, { "epoch": 46.35075287865367, "grad_norm": 0.25327572226524353, "learning_rate": 1e-05, "loss": 1.003, "step": 52330 }, { "epoch": 46.355181576616474, "grad_norm": 0.25147002935409546, "learning_rate": 1e-05, "loss": 0.9556, "step": 52335 }, { "epoch": 46.359610274579275, "grad_norm": 0.3183804750442505, "learning_rate": 1e-05, "loss": 0.956, "step": 52340 }, { "epoch": 46.364038972542076, "grad_norm": 0.27869829535484314, "learning_rate": 1e-05, "loss": 0.976, "step": 52345 }, { "epoch": 46.36846767050487, "grad_norm": 0.2506049573421478, "learning_rate": 1e-05, "loss": 1.0046, "step": 52350 }, { "epoch": 46.37289636846767, "grad_norm": 0.23751741647720337, "learning_rate": 1e-05, "loss": 1.0114, "step": 52355 }, { "epoch": 46.37732506643047, "grad_norm": 0.33607232570648193, "learning_rate": 1e-05, "loss": 0.9738, "step": 52360 }, { "epoch": 46.38175376439327, "grad_norm": 0.23826178908348083, "learning_rate": 1e-05, "loss": 0.9528, "step": 52365 }, { "epoch": 46.38618246235607, "grad_norm": 0.26693665981292725, "learning_rate": 1e-05, "loss": 0.9316, "step": 52370 }, { "epoch": 46.39061116031887, "grad_norm": 0.24751687049865723, "learning_rate": 1e-05, "loss": 0.9569, "step": 52375 }, { "epoch": 46.39503985828166, "grad_norm": 0.2192055881023407, "learning_rate": 1e-05, "loss": 0.9881, "step": 52380 }, { "epoch": 46.399468556244464, "grad_norm": 0.2583507001399994, "learning_rate": 1e-05, "loss": 1.0034, "step": 52385 }, { "epoch": 46.403897254207266, "grad_norm": 0.23104475438594818, "learning_rate": 1e-05, "loss": 0.9115, "step": 52390 }, { "epoch": 46.40832595217006, "grad_norm": 0.2703949511051178, "learning_rate": 1e-05, "loss": 1.0194, "step": 52395 }, { "epoch": 46.41275465013286, "grad_norm": 0.24414384365081787, "learning_rate": 1e-05, "loss": 1.0157, "step": 52400 }, { "epoch": 46.41718334809566, "grad_norm": 0.22799710929393768, "learning_rate": 1e-05, "loss": 1.023, "step": 52405 }, { "epoch": 46.421612046058456, "grad_norm": 0.23787589371204376, "learning_rate": 1e-05, "loss": 0.9568, "step": 52410 }, { "epoch": 46.42604074402126, "grad_norm": 0.2353285402059555, "learning_rate": 1e-05, "loss": 0.9421, "step": 52415 }, { "epoch": 46.43046944198406, "grad_norm": 0.3217039704322815, "learning_rate": 1e-05, "loss": 0.9637, "step": 52420 }, { "epoch": 46.43489813994685, "grad_norm": 0.25284624099731445, "learning_rate": 1e-05, "loss": 0.9211, "step": 52425 }, { "epoch": 46.439326837909654, "grad_norm": 0.2597035765647888, "learning_rate": 1e-05, "loss": 1.0327, "step": 52430 }, { "epoch": 46.443755535872455, "grad_norm": 0.2135528326034546, "learning_rate": 1e-05, "loss": 0.9494, "step": 52435 }, { "epoch": 46.44818423383525, "grad_norm": 0.281564861536026, "learning_rate": 1e-05, "loss": 0.9282, "step": 52440 }, { "epoch": 46.45261293179805, "grad_norm": 0.21969282627105713, "learning_rate": 1e-05, "loss": 0.962, "step": 52445 }, { "epoch": 46.45704162976085, "grad_norm": 0.259161114692688, "learning_rate": 1e-05, "loss": 0.9792, "step": 52450 }, { "epoch": 46.461470327723646, "grad_norm": 0.22296705842018127, "learning_rate": 1e-05, "loss": 0.9913, "step": 52455 }, { "epoch": 46.46589902568645, "grad_norm": 0.22984325885772705, "learning_rate": 1e-05, "loss": 0.9737, "step": 52460 }, { "epoch": 46.47032772364925, "grad_norm": 0.2613532841205597, "learning_rate": 1e-05, "loss": 1.0068, "step": 52465 }, { "epoch": 46.47475642161205, "grad_norm": 0.22698812186717987, "learning_rate": 1e-05, "loss": 0.966, "step": 52470 }, { "epoch": 46.479185119574844, "grad_norm": 0.20282553136348724, "learning_rate": 1e-05, "loss": 0.9556, "step": 52475 }, { "epoch": 46.483613817537645, "grad_norm": 0.22666481137275696, "learning_rate": 1e-05, "loss": 0.9682, "step": 52480 }, { "epoch": 46.488042515500446, "grad_norm": 0.2294263243675232, "learning_rate": 1e-05, "loss": 0.98, "step": 52485 }, { "epoch": 46.49247121346324, "grad_norm": 0.21758776903152466, "learning_rate": 1e-05, "loss": 0.9697, "step": 52490 }, { "epoch": 46.49689991142604, "grad_norm": 0.23589198291301727, "learning_rate": 1e-05, "loss": 1.0076, "step": 52495 }, { "epoch": 46.50132860938884, "grad_norm": 0.21198731660842896, "learning_rate": 1e-05, "loss": 0.9942, "step": 52500 }, { "epoch": 46.50575730735164, "grad_norm": 0.21669331192970276, "learning_rate": 1e-05, "loss": 1.0036, "step": 52505 }, { "epoch": 46.51018600531444, "grad_norm": 0.18793557584285736, "learning_rate": 1e-05, "loss": 0.9781, "step": 52510 }, { "epoch": 46.51461470327724, "grad_norm": 0.2524021863937378, "learning_rate": 1e-05, "loss": 0.9938, "step": 52515 }, { "epoch": 46.51904340124003, "grad_norm": 0.25322335958480835, "learning_rate": 1e-05, "loss": 1.0375, "step": 52520 }, { "epoch": 46.523472099202834, "grad_norm": 0.2473878711462021, "learning_rate": 1e-05, "loss": 0.9862, "step": 52525 }, { "epoch": 46.527900797165636, "grad_norm": 0.2516082227230072, "learning_rate": 1e-05, "loss": 0.9211, "step": 52530 }, { "epoch": 46.53232949512843, "grad_norm": 0.2698797881603241, "learning_rate": 1e-05, "loss": 0.9578, "step": 52535 }, { "epoch": 46.53675819309123, "grad_norm": 0.22685733437538147, "learning_rate": 1e-05, "loss": 0.9938, "step": 52540 }, { "epoch": 46.54118689105403, "grad_norm": 0.23823480308055878, "learning_rate": 1e-05, "loss": 0.9704, "step": 52545 }, { "epoch": 46.545615589016826, "grad_norm": 0.20379240810871124, "learning_rate": 1e-05, "loss": 0.9454, "step": 52550 }, { "epoch": 46.55004428697963, "grad_norm": 0.2341829538345337, "learning_rate": 1e-05, "loss": 0.9881, "step": 52555 }, { "epoch": 46.55447298494243, "grad_norm": 0.23614032566547394, "learning_rate": 1e-05, "loss": 1.0101, "step": 52560 }, { "epoch": 46.55890168290522, "grad_norm": 0.2548588812351227, "learning_rate": 1e-05, "loss": 0.9862, "step": 52565 }, { "epoch": 46.563330380868024, "grad_norm": 0.26242223381996155, "learning_rate": 1e-05, "loss": 1.0033, "step": 52570 }, { "epoch": 46.567759078830825, "grad_norm": 0.30709484219551086, "learning_rate": 1e-05, "loss": 0.9388, "step": 52575 }, { "epoch": 46.57218777679362, "grad_norm": 0.2339414805173874, "learning_rate": 1e-05, "loss": 0.9651, "step": 52580 }, { "epoch": 46.57661647475642, "grad_norm": 0.25077104568481445, "learning_rate": 1e-05, "loss": 0.9463, "step": 52585 }, { "epoch": 46.58104517271922, "grad_norm": 0.23809531331062317, "learning_rate": 1e-05, "loss": 1.004, "step": 52590 }, { "epoch": 46.58547387068202, "grad_norm": 0.26254934072494507, "learning_rate": 1e-05, "loss": 0.9553, "step": 52595 }, { "epoch": 46.58990256864482, "grad_norm": 0.24272571504116058, "learning_rate": 1e-05, "loss": 1.0001, "step": 52600 }, { "epoch": 46.59433126660762, "grad_norm": 0.25622281432151794, "learning_rate": 1e-05, "loss": 1.0159, "step": 52605 }, { "epoch": 46.59875996457042, "grad_norm": 0.2809261083602905, "learning_rate": 1e-05, "loss": 0.9969, "step": 52610 }, { "epoch": 46.60318866253321, "grad_norm": 0.26929810643196106, "learning_rate": 1e-05, "loss": 0.9706, "step": 52615 }, { "epoch": 46.607617360496015, "grad_norm": 0.209640234708786, "learning_rate": 1e-05, "loss": 0.9742, "step": 52620 }, { "epoch": 46.612046058458816, "grad_norm": 0.24667634069919586, "learning_rate": 1e-05, "loss": 0.9539, "step": 52625 }, { "epoch": 46.61647475642161, "grad_norm": 0.24101629853248596, "learning_rate": 1e-05, "loss": 1.005, "step": 52630 }, { "epoch": 46.62090345438441, "grad_norm": 0.2497033178806305, "learning_rate": 1e-05, "loss": 0.9879, "step": 52635 }, { "epoch": 46.62533215234721, "grad_norm": 0.25059953331947327, "learning_rate": 1e-05, "loss": 0.9276, "step": 52640 }, { "epoch": 46.62976085031001, "grad_norm": 0.25847601890563965, "learning_rate": 1e-05, "loss": 1.0056, "step": 52645 }, { "epoch": 46.63418954827281, "grad_norm": 0.2661818861961365, "learning_rate": 1e-05, "loss": 0.9383, "step": 52650 }, { "epoch": 46.63861824623561, "grad_norm": 0.2577047049999237, "learning_rate": 1e-05, "loss": 0.9509, "step": 52655 }, { "epoch": 46.6430469441984, "grad_norm": 0.25426873564720154, "learning_rate": 1e-05, "loss": 1.0249, "step": 52660 }, { "epoch": 46.647475642161204, "grad_norm": 0.22114379703998566, "learning_rate": 1e-05, "loss": 1.0048, "step": 52665 }, { "epoch": 46.651904340124005, "grad_norm": 0.27035418152809143, "learning_rate": 1e-05, "loss": 0.9378, "step": 52670 }, { "epoch": 46.6563330380868, "grad_norm": 0.256034255027771, "learning_rate": 1e-05, "loss": 0.9979, "step": 52675 }, { "epoch": 46.6607617360496, "grad_norm": 0.29731854796409607, "learning_rate": 1e-05, "loss": 0.9876, "step": 52680 }, { "epoch": 46.6651904340124, "grad_norm": 0.23128163814544678, "learning_rate": 1e-05, "loss": 0.9589, "step": 52685 }, { "epoch": 46.669619131975196, "grad_norm": 0.22359953820705414, "learning_rate": 1e-05, "loss": 0.909, "step": 52690 }, { "epoch": 46.674047829938, "grad_norm": 0.21049128472805023, "learning_rate": 1e-05, "loss": 0.9651, "step": 52695 }, { "epoch": 46.6784765279008, "grad_norm": 0.26686444878578186, "learning_rate": 1e-05, "loss": 1.0016, "step": 52700 }, { "epoch": 46.68290522586359, "grad_norm": 0.29327306151390076, "learning_rate": 1e-05, "loss": 0.9621, "step": 52705 }, { "epoch": 46.687333923826394, "grad_norm": 0.264400839805603, "learning_rate": 1e-05, "loss": 0.9365, "step": 52710 }, { "epoch": 46.691762621789195, "grad_norm": 0.28055617213249207, "learning_rate": 1e-05, "loss": 0.9973, "step": 52715 }, { "epoch": 46.696191319751996, "grad_norm": 0.24878482520580292, "learning_rate": 1e-05, "loss": 1.0002, "step": 52720 }, { "epoch": 46.70062001771479, "grad_norm": 0.26178857684135437, "learning_rate": 1e-05, "loss": 0.9868, "step": 52725 }, { "epoch": 46.70504871567759, "grad_norm": 0.25145775079727173, "learning_rate": 1e-05, "loss": 1.029, "step": 52730 }, { "epoch": 46.70947741364039, "grad_norm": 0.21283026039600372, "learning_rate": 1e-05, "loss": 1.0149, "step": 52735 }, { "epoch": 46.71390611160319, "grad_norm": 0.2254999727010727, "learning_rate": 1e-05, "loss": 0.9918, "step": 52740 }, { "epoch": 46.71833480956599, "grad_norm": 0.1945682168006897, "learning_rate": 1e-05, "loss": 1.0056, "step": 52745 }, { "epoch": 46.72276350752879, "grad_norm": 0.2572639286518097, "learning_rate": 1e-05, "loss": 1.0452, "step": 52750 }, { "epoch": 46.72719220549158, "grad_norm": 0.2676490545272827, "learning_rate": 1e-05, "loss": 0.9614, "step": 52755 }, { "epoch": 46.731620903454385, "grad_norm": 0.25357845425605774, "learning_rate": 1e-05, "loss": 0.9596, "step": 52760 }, { "epoch": 46.736049601417186, "grad_norm": 0.2400568425655365, "learning_rate": 1e-05, "loss": 0.9941, "step": 52765 }, { "epoch": 46.74047829937998, "grad_norm": 0.26042479276657104, "learning_rate": 1e-05, "loss": 0.9632, "step": 52770 }, { "epoch": 46.74490699734278, "grad_norm": 0.24639937281608582, "learning_rate": 1e-05, "loss": 0.9821, "step": 52775 }, { "epoch": 46.74933569530558, "grad_norm": 0.2331128567457199, "learning_rate": 1e-05, "loss": 1.0065, "step": 52780 }, { "epoch": 46.753764393268376, "grad_norm": 0.2004622220993042, "learning_rate": 1e-05, "loss": 0.9028, "step": 52785 }, { "epoch": 46.75819309123118, "grad_norm": 0.23124127089977264, "learning_rate": 1e-05, "loss": 0.9562, "step": 52790 }, { "epoch": 46.76262178919398, "grad_norm": 0.22730982303619385, "learning_rate": 1e-05, "loss": 1.022, "step": 52795 }, { "epoch": 46.76705048715677, "grad_norm": 0.2400243580341339, "learning_rate": 1e-05, "loss": 0.9463, "step": 52800 }, { "epoch": 46.771479185119574, "grad_norm": 0.23408199846744537, "learning_rate": 1e-05, "loss": 0.9531, "step": 52805 }, { "epoch": 46.775907883082375, "grad_norm": 0.2206108272075653, "learning_rate": 1e-05, "loss": 0.9544, "step": 52810 }, { "epoch": 46.78033658104517, "grad_norm": 0.2546866238117218, "learning_rate": 1e-05, "loss": 0.9638, "step": 52815 }, { "epoch": 46.78476527900797, "grad_norm": 0.24876323342323303, "learning_rate": 1e-05, "loss": 0.9548, "step": 52820 }, { "epoch": 46.78919397697077, "grad_norm": 0.21203528344631195, "learning_rate": 1e-05, "loss": 0.9742, "step": 52825 }, { "epoch": 46.79362267493357, "grad_norm": 0.2326049953699112, "learning_rate": 1e-05, "loss": 0.9865, "step": 52830 }, { "epoch": 46.79805137289637, "grad_norm": 0.21757188439369202, "learning_rate": 1e-05, "loss": 1.0436, "step": 52835 }, { "epoch": 46.80248007085917, "grad_norm": 0.2163865715265274, "learning_rate": 1e-05, "loss": 0.9463, "step": 52840 }, { "epoch": 46.80690876882197, "grad_norm": 0.2674662470817566, "learning_rate": 1e-05, "loss": 0.9766, "step": 52845 }, { "epoch": 46.811337466784764, "grad_norm": 0.26493677496910095, "learning_rate": 1e-05, "loss": 0.9257, "step": 52850 }, { "epoch": 46.815766164747565, "grad_norm": 0.27579277753829956, "learning_rate": 1e-05, "loss": 1.0069, "step": 52855 }, { "epoch": 46.820194862710366, "grad_norm": 0.24195124208927155, "learning_rate": 1e-05, "loss": 0.9657, "step": 52860 }, { "epoch": 46.82462356067316, "grad_norm": 0.2612171769142151, "learning_rate": 1e-05, "loss": 1.0164, "step": 52865 }, { "epoch": 46.82905225863596, "grad_norm": 0.2395055890083313, "learning_rate": 1e-05, "loss": 1.0019, "step": 52870 }, { "epoch": 46.83348095659876, "grad_norm": 0.23697564005851746, "learning_rate": 1e-05, "loss": 1.0321, "step": 52875 }, { "epoch": 46.83790965456156, "grad_norm": 0.2651281952857971, "learning_rate": 1e-05, "loss": 0.9473, "step": 52880 }, { "epoch": 46.84233835252436, "grad_norm": 0.23796038329601288, "learning_rate": 1e-05, "loss": 1.0297, "step": 52885 }, { "epoch": 46.84676705048716, "grad_norm": 0.23892737925052643, "learning_rate": 1e-05, "loss": 0.9959, "step": 52890 }, { "epoch": 46.85119574844995, "grad_norm": 0.28136616945266724, "learning_rate": 1e-05, "loss": 0.9671, "step": 52895 }, { "epoch": 46.855624446412754, "grad_norm": 0.24340462684631348, "learning_rate": 1e-05, "loss": 0.9753, "step": 52900 }, { "epoch": 46.860053144375556, "grad_norm": 0.2332012802362442, "learning_rate": 1e-05, "loss": 0.9581, "step": 52905 }, { "epoch": 46.86448184233835, "grad_norm": 0.21644599735736847, "learning_rate": 1e-05, "loss": 1.0087, "step": 52910 }, { "epoch": 46.86891054030115, "grad_norm": 0.2537345588207245, "learning_rate": 1e-05, "loss": 0.9803, "step": 52915 }, { "epoch": 46.87333923826395, "grad_norm": 0.2520318627357483, "learning_rate": 1e-05, "loss": 0.9907, "step": 52920 }, { "epoch": 46.877767936226746, "grad_norm": 0.241068035364151, "learning_rate": 1e-05, "loss": 0.994, "step": 52925 }, { "epoch": 46.88219663418955, "grad_norm": 0.21747778356075287, "learning_rate": 1e-05, "loss": 0.9968, "step": 52930 }, { "epoch": 46.88662533215235, "grad_norm": 0.22705544531345367, "learning_rate": 1e-05, "loss": 0.9215, "step": 52935 }, { "epoch": 46.89105403011514, "grad_norm": 0.24327921867370605, "learning_rate": 1e-05, "loss": 0.9728, "step": 52940 }, { "epoch": 46.895482728077944, "grad_norm": 0.21224263310432434, "learning_rate": 1e-05, "loss": 0.9356, "step": 52945 }, { "epoch": 46.899911426040745, "grad_norm": 0.28321564197540283, "learning_rate": 1e-05, "loss": 0.9857, "step": 52950 }, { "epoch": 46.90434012400354, "grad_norm": 0.24056753516197205, "learning_rate": 1e-05, "loss": 0.9627, "step": 52955 }, { "epoch": 46.90876882196634, "grad_norm": 0.3190236985683441, "learning_rate": 1e-05, "loss": 0.9294, "step": 52960 }, { "epoch": 46.91319751992914, "grad_norm": 0.2720867991447449, "learning_rate": 1e-05, "loss": 1.0158, "step": 52965 }, { "epoch": 46.91762621789194, "grad_norm": 0.22937411069869995, "learning_rate": 1e-05, "loss": 0.9674, "step": 52970 }, { "epoch": 46.92205491585474, "grad_norm": 0.2518390715122223, "learning_rate": 1e-05, "loss": 0.9889, "step": 52975 }, { "epoch": 46.92648361381754, "grad_norm": 0.19199016690254211, "learning_rate": 1e-05, "loss": 0.9711, "step": 52980 }, { "epoch": 46.93091231178034, "grad_norm": 0.1845492422580719, "learning_rate": 1e-05, "loss": 0.9838, "step": 52985 }, { "epoch": 46.935341009743134, "grad_norm": 0.2563817799091339, "learning_rate": 1e-05, "loss": 0.958, "step": 52990 }, { "epoch": 46.939769707705935, "grad_norm": 0.235687255859375, "learning_rate": 1e-05, "loss": 1.0119, "step": 52995 }, { "epoch": 46.944198405668736, "grad_norm": 0.25768518447875977, "learning_rate": 1e-05, "loss": 0.908, "step": 53000 }, { "epoch": 46.94862710363153, "grad_norm": 0.25883081555366516, "learning_rate": 1e-05, "loss": 0.9731, "step": 53005 }, { "epoch": 46.95305580159433, "grad_norm": 0.23598439991474152, "learning_rate": 1e-05, "loss": 1.0461, "step": 53010 }, { "epoch": 46.95748449955713, "grad_norm": 0.21958620846271515, "learning_rate": 1e-05, "loss": 0.9895, "step": 53015 }, { "epoch": 46.96191319751993, "grad_norm": 0.22603271901607513, "learning_rate": 1e-05, "loss": 0.8979, "step": 53020 }, { "epoch": 46.96634189548273, "grad_norm": 0.3098353445529938, "learning_rate": 1e-05, "loss": 0.9642, "step": 53025 }, { "epoch": 46.97077059344553, "grad_norm": 0.25202155113220215, "learning_rate": 1e-05, "loss": 1.0113, "step": 53030 }, { "epoch": 46.97519929140832, "grad_norm": 0.24985453486442566, "learning_rate": 1e-05, "loss": 0.9933, "step": 53035 }, { "epoch": 46.979627989371124, "grad_norm": 0.23234573006629944, "learning_rate": 1e-05, "loss": 0.9841, "step": 53040 }, { "epoch": 46.984056687333926, "grad_norm": 0.22871297597885132, "learning_rate": 1e-05, "loss": 0.9235, "step": 53045 }, { "epoch": 46.98848538529672, "grad_norm": 0.25631675124168396, "learning_rate": 1e-05, "loss": 0.9206, "step": 53050 }, { "epoch": 46.99291408325952, "grad_norm": 0.25236403942108154, "learning_rate": 1e-05, "loss": 0.9237, "step": 53055 }, { "epoch": 46.99734278122232, "grad_norm": 0.26691293716430664, "learning_rate": 1e-05, "loss": 0.9442, "step": 53060 }, { "epoch": 47.001771479185116, "grad_norm": 0.26703035831451416, "learning_rate": 1e-05, "loss": 0.9549, "step": 53065 }, { "epoch": 47.00620017714792, "grad_norm": 0.24113349616527557, "learning_rate": 1e-05, "loss": 1.0245, "step": 53070 }, { "epoch": 47.01062887511072, "grad_norm": 0.22206377983093262, "learning_rate": 1e-05, "loss": 0.9828, "step": 53075 }, { "epoch": 47.01505757307352, "grad_norm": 0.24047952890396118, "learning_rate": 1e-05, "loss": 0.9251, "step": 53080 }, { "epoch": 47.019486271036314, "grad_norm": 0.26321879029273987, "learning_rate": 1e-05, "loss": 1.0058, "step": 53085 }, { "epoch": 47.023914968999115, "grad_norm": 0.2279481440782547, "learning_rate": 1e-05, "loss": 0.9691, "step": 53090 }, { "epoch": 47.028343666961916, "grad_norm": 0.3014968931674957, "learning_rate": 1e-05, "loss": 0.9764, "step": 53095 }, { "epoch": 47.03277236492471, "grad_norm": 0.23308083415031433, "learning_rate": 1e-05, "loss": 1.0541, "step": 53100 }, { "epoch": 47.03720106288751, "grad_norm": 0.2552223205566406, "learning_rate": 1e-05, "loss": 1.0253, "step": 53105 }, { "epoch": 47.04162976085031, "grad_norm": 0.2541239559650421, "learning_rate": 1e-05, "loss": 0.939, "step": 53110 }, { "epoch": 47.04605845881311, "grad_norm": 0.2731362283229828, "learning_rate": 1e-05, "loss": 0.965, "step": 53115 }, { "epoch": 47.05048715677591, "grad_norm": 0.2793257236480713, "learning_rate": 1e-05, "loss": 0.9564, "step": 53120 }, { "epoch": 47.05491585473871, "grad_norm": 0.2322128713130951, "learning_rate": 1e-05, "loss": 0.9733, "step": 53125 }, { "epoch": 47.0593445527015, "grad_norm": 0.2558023929595947, "learning_rate": 1e-05, "loss": 0.9908, "step": 53130 }, { "epoch": 47.063773250664305, "grad_norm": 0.3224157989025116, "learning_rate": 1e-05, "loss": 0.9382, "step": 53135 }, { "epoch": 47.068201948627106, "grad_norm": 0.22113892436027527, "learning_rate": 1e-05, "loss": 0.9743, "step": 53140 }, { "epoch": 47.0726306465899, "grad_norm": 0.21309953927993774, "learning_rate": 1e-05, "loss": 0.9793, "step": 53145 }, { "epoch": 47.0770593445527, "grad_norm": 0.24870753288269043, "learning_rate": 1e-05, "loss": 0.985, "step": 53150 }, { "epoch": 47.0814880425155, "grad_norm": 0.2497696727514267, "learning_rate": 1e-05, "loss": 0.9378, "step": 53155 }, { "epoch": 47.0859167404783, "grad_norm": 0.2304968684911728, "learning_rate": 1e-05, "loss": 0.9745, "step": 53160 }, { "epoch": 47.0903454384411, "grad_norm": 0.2342444509267807, "learning_rate": 1e-05, "loss": 0.9149, "step": 53165 }, { "epoch": 47.0947741364039, "grad_norm": 0.28979218006134033, "learning_rate": 1e-05, "loss": 0.9309, "step": 53170 }, { "epoch": 47.09920283436669, "grad_norm": 0.23976381123065948, "learning_rate": 1e-05, "loss": 0.9199, "step": 53175 }, { "epoch": 47.103631532329494, "grad_norm": 0.2094898670911789, "learning_rate": 1e-05, "loss": 0.9993, "step": 53180 }, { "epoch": 47.108060230292296, "grad_norm": 0.21998946368694305, "learning_rate": 1e-05, "loss": 0.9358, "step": 53185 }, { "epoch": 47.11248892825509, "grad_norm": 0.22302959859371185, "learning_rate": 1e-05, "loss": 0.9763, "step": 53190 }, { "epoch": 47.11691762621789, "grad_norm": 0.2214338630437851, "learning_rate": 1e-05, "loss": 0.9558, "step": 53195 }, { "epoch": 47.12134632418069, "grad_norm": 0.23871473968029022, "learning_rate": 1e-05, "loss": 0.9699, "step": 53200 }, { "epoch": 47.12577502214349, "grad_norm": 0.252048522233963, "learning_rate": 1e-05, "loss": 0.961, "step": 53205 }, { "epoch": 47.13020372010629, "grad_norm": 0.3042607605457306, "learning_rate": 1e-05, "loss": 0.9748, "step": 53210 }, { "epoch": 47.13463241806909, "grad_norm": 0.2738129794597626, "learning_rate": 1e-05, "loss": 0.9536, "step": 53215 }, { "epoch": 47.13906111603189, "grad_norm": 0.24043193459510803, "learning_rate": 1e-05, "loss": 0.9863, "step": 53220 }, { "epoch": 47.143489813994684, "grad_norm": 0.22027045488357544, "learning_rate": 1e-05, "loss": 0.9043, "step": 53225 }, { "epoch": 47.147918511957485, "grad_norm": 0.26409274339675903, "learning_rate": 1e-05, "loss": 0.9505, "step": 53230 }, { "epoch": 47.152347209920286, "grad_norm": 0.21213717758655548, "learning_rate": 1e-05, "loss": 1.0403, "step": 53235 }, { "epoch": 47.15677590788308, "grad_norm": 0.207491934299469, "learning_rate": 1e-05, "loss": 0.958, "step": 53240 }, { "epoch": 47.16120460584588, "grad_norm": 0.265442818403244, "learning_rate": 1e-05, "loss": 0.9713, "step": 53245 }, { "epoch": 47.16563330380868, "grad_norm": 0.21938614547252655, "learning_rate": 1e-05, "loss": 0.994, "step": 53250 }, { "epoch": 47.17006200177148, "grad_norm": 0.25768953561782837, "learning_rate": 1e-05, "loss": 0.9311, "step": 53255 }, { "epoch": 47.17449069973428, "grad_norm": 0.22516220808029175, "learning_rate": 1e-05, "loss": 1.0104, "step": 53260 }, { "epoch": 47.17891939769708, "grad_norm": 0.25387775897979736, "learning_rate": 1e-05, "loss": 0.996, "step": 53265 }, { "epoch": 47.18334809565987, "grad_norm": 0.26574185490608215, "learning_rate": 1e-05, "loss": 0.9355, "step": 53270 }, { "epoch": 47.187776793622675, "grad_norm": 0.2780439853668213, "learning_rate": 1e-05, "loss": 0.9788, "step": 53275 }, { "epoch": 47.192205491585476, "grad_norm": 0.2373197376728058, "learning_rate": 1e-05, "loss": 0.9777, "step": 53280 }, { "epoch": 47.19663418954827, "grad_norm": 0.22846949100494385, "learning_rate": 1e-05, "loss": 0.9808, "step": 53285 }, { "epoch": 47.20106288751107, "grad_norm": 0.20439369976520538, "learning_rate": 1e-05, "loss": 0.9653, "step": 53290 }, { "epoch": 47.20549158547387, "grad_norm": 0.22882992029190063, "learning_rate": 1e-05, "loss": 1.0099, "step": 53295 }, { "epoch": 47.20992028343667, "grad_norm": 0.21933390200138092, "learning_rate": 1e-05, "loss": 0.967, "step": 53300 }, { "epoch": 47.21434898139947, "grad_norm": 0.23676227033138275, "learning_rate": 1e-05, "loss": 0.9635, "step": 53305 }, { "epoch": 47.21877767936227, "grad_norm": 0.2338477224111557, "learning_rate": 1e-05, "loss": 0.9655, "step": 53310 }, { "epoch": 47.22320637732506, "grad_norm": 0.21973244845867157, "learning_rate": 1e-05, "loss": 0.9244, "step": 53315 }, { "epoch": 47.227635075287864, "grad_norm": 0.2668595314025879, "learning_rate": 1e-05, "loss": 0.9429, "step": 53320 }, { "epoch": 47.232063773250665, "grad_norm": 0.2607423663139343, "learning_rate": 1e-05, "loss": 1.0101, "step": 53325 }, { "epoch": 47.23649247121347, "grad_norm": 0.22921982407569885, "learning_rate": 1e-05, "loss": 1.0264, "step": 53330 }, { "epoch": 47.24092116917626, "grad_norm": 0.2475089430809021, "learning_rate": 1e-05, "loss": 1.0031, "step": 53335 }, { "epoch": 47.24534986713906, "grad_norm": 0.24275843799114227, "learning_rate": 1e-05, "loss": 1.0163, "step": 53340 }, { "epoch": 47.24977856510186, "grad_norm": 0.24358443915843964, "learning_rate": 1e-05, "loss": 0.9772, "step": 53345 }, { "epoch": 47.25420726306466, "grad_norm": 0.20708854496479034, "learning_rate": 1e-05, "loss": 1.002, "step": 53350 }, { "epoch": 47.25863596102746, "grad_norm": 0.2137162834405899, "learning_rate": 1e-05, "loss": 1.0069, "step": 53355 }, { "epoch": 47.26306465899026, "grad_norm": 0.2265847772359848, "learning_rate": 1e-05, "loss": 0.9666, "step": 53360 }, { "epoch": 47.267493356953054, "grad_norm": 0.20596414804458618, "learning_rate": 1e-05, "loss": 0.9836, "step": 53365 }, { "epoch": 47.271922054915855, "grad_norm": 0.23469996452331543, "learning_rate": 1e-05, "loss": 0.9854, "step": 53370 }, { "epoch": 47.276350752878656, "grad_norm": 0.2332441210746765, "learning_rate": 1e-05, "loss": 0.9635, "step": 53375 }, { "epoch": 47.28077945084145, "grad_norm": 0.2466612458229065, "learning_rate": 1e-05, "loss": 1.0057, "step": 53380 }, { "epoch": 47.28520814880425, "grad_norm": 0.24347557127475739, "learning_rate": 1e-05, "loss": 0.9415, "step": 53385 }, { "epoch": 47.28963684676705, "grad_norm": 0.24266250431537628, "learning_rate": 1e-05, "loss": 0.9598, "step": 53390 }, { "epoch": 47.29406554472985, "grad_norm": 0.2543930411338806, "learning_rate": 1e-05, "loss": 0.957, "step": 53395 }, { "epoch": 47.29849424269265, "grad_norm": 0.24881596863269806, "learning_rate": 1e-05, "loss": 0.9541, "step": 53400 }, { "epoch": 47.30292294065545, "grad_norm": 0.264466792345047, "learning_rate": 1e-05, "loss": 0.9587, "step": 53405 }, { "epoch": 47.30735163861824, "grad_norm": 0.2405591905117035, "learning_rate": 1e-05, "loss": 0.9876, "step": 53410 }, { "epoch": 47.311780336581045, "grad_norm": 0.2447965145111084, "learning_rate": 1e-05, "loss": 1.0131, "step": 53415 }, { "epoch": 47.316209034543846, "grad_norm": 0.21730738878250122, "learning_rate": 1e-05, "loss": 0.9322, "step": 53420 }, { "epoch": 47.32063773250664, "grad_norm": 0.22900474071502686, "learning_rate": 1e-05, "loss": 0.9606, "step": 53425 }, { "epoch": 47.32506643046944, "grad_norm": 0.18706859648227692, "learning_rate": 1e-05, "loss": 0.9585, "step": 53430 }, { "epoch": 47.32949512843224, "grad_norm": 0.23171640932559967, "learning_rate": 1e-05, "loss": 0.9728, "step": 53435 }, { "epoch": 47.333923826395036, "grad_norm": 0.21889935433864594, "learning_rate": 1e-05, "loss": 0.9093, "step": 53440 }, { "epoch": 47.33835252435784, "grad_norm": 0.199715718626976, "learning_rate": 1e-05, "loss": 0.9529, "step": 53445 }, { "epoch": 47.34278122232064, "grad_norm": 0.23362258076667786, "learning_rate": 1e-05, "loss": 0.9619, "step": 53450 }, { "epoch": 47.34720992028344, "grad_norm": 0.25792279839515686, "learning_rate": 1e-05, "loss": 0.9991, "step": 53455 }, { "epoch": 47.351638618246234, "grad_norm": 0.261173814535141, "learning_rate": 1e-05, "loss": 0.9723, "step": 53460 }, { "epoch": 47.356067316209035, "grad_norm": 0.21843306720256805, "learning_rate": 1e-05, "loss": 0.9621, "step": 53465 }, { "epoch": 47.36049601417184, "grad_norm": 0.2818938195705414, "learning_rate": 1e-05, "loss": 0.9405, "step": 53470 }, { "epoch": 47.36492471213463, "grad_norm": 0.27318310737609863, "learning_rate": 1e-05, "loss": 0.9681, "step": 53475 }, { "epoch": 47.36935341009743, "grad_norm": 0.24385161697864532, "learning_rate": 1e-05, "loss": 0.9645, "step": 53480 }, { "epoch": 47.37378210806023, "grad_norm": 0.22357363998889923, "learning_rate": 1e-05, "loss": 0.9445, "step": 53485 }, { "epoch": 47.37821080602303, "grad_norm": 0.23831617832183838, "learning_rate": 1e-05, "loss": 0.9794, "step": 53490 }, { "epoch": 47.38263950398583, "grad_norm": 0.2219279557466507, "learning_rate": 1e-05, "loss": 0.9754, "step": 53495 }, { "epoch": 47.38706820194863, "grad_norm": 0.2879375219345093, "learning_rate": 1e-05, "loss": 0.9923, "step": 53500 }, { "epoch": 47.391496899911424, "grad_norm": 0.23290784657001495, "learning_rate": 1e-05, "loss": 0.9588, "step": 53505 }, { "epoch": 47.395925597874225, "grad_norm": 0.2344203144311905, "learning_rate": 1e-05, "loss": 1.0241, "step": 53510 }, { "epoch": 47.400354295837026, "grad_norm": 0.2480880469083786, "learning_rate": 1e-05, "loss": 0.9789, "step": 53515 }, { "epoch": 47.40478299379982, "grad_norm": 0.2302074283361435, "learning_rate": 1e-05, "loss": 0.9752, "step": 53520 }, { "epoch": 47.40921169176262, "grad_norm": 0.2596975862979889, "learning_rate": 1e-05, "loss": 0.9758, "step": 53525 }, { "epoch": 47.41364038972542, "grad_norm": 0.20955277979373932, "learning_rate": 1e-05, "loss": 0.932, "step": 53530 }, { "epoch": 47.41806908768822, "grad_norm": 0.23876962065696716, "learning_rate": 1e-05, "loss": 0.9718, "step": 53535 }, { "epoch": 47.42249778565102, "grad_norm": 0.23004160821437836, "learning_rate": 1e-05, "loss": 0.9574, "step": 53540 }, { "epoch": 47.42692648361382, "grad_norm": 0.2267003059387207, "learning_rate": 1e-05, "loss": 0.9831, "step": 53545 }, { "epoch": 47.43135518157661, "grad_norm": 0.2454613447189331, "learning_rate": 1e-05, "loss": 0.8968, "step": 53550 }, { "epoch": 47.435783879539414, "grad_norm": 0.2609652876853943, "learning_rate": 1e-05, "loss": 0.9854, "step": 53555 }, { "epoch": 47.440212577502216, "grad_norm": 0.22250857949256897, "learning_rate": 1e-05, "loss": 0.9926, "step": 53560 }, { "epoch": 47.44464127546502, "grad_norm": 0.24546082317829132, "learning_rate": 1e-05, "loss": 0.9942, "step": 53565 }, { "epoch": 47.44906997342781, "grad_norm": 0.25648465752601624, "learning_rate": 1e-05, "loss": 0.9568, "step": 53570 }, { "epoch": 47.45349867139061, "grad_norm": 0.22747467458248138, "learning_rate": 1e-05, "loss": 0.8994, "step": 53575 }, { "epoch": 47.45792736935341, "grad_norm": 0.2028781920671463, "learning_rate": 1e-05, "loss": 0.9321, "step": 53580 }, { "epoch": 47.46235606731621, "grad_norm": 0.21915559470653534, "learning_rate": 1e-05, "loss": 0.9662, "step": 53585 }, { "epoch": 47.46678476527901, "grad_norm": 0.28994935750961304, "learning_rate": 1e-05, "loss": 0.9554, "step": 53590 }, { "epoch": 47.47121346324181, "grad_norm": 0.2160787731409073, "learning_rate": 1e-05, "loss": 0.9377, "step": 53595 }, { "epoch": 47.475642161204604, "grad_norm": 0.2394951432943344, "learning_rate": 1e-05, "loss": 1.0178, "step": 53600 }, { "epoch": 47.480070859167405, "grad_norm": 0.22000344097614288, "learning_rate": 1e-05, "loss": 0.964, "step": 53605 }, { "epoch": 47.484499557130206, "grad_norm": 0.21622705459594727, "learning_rate": 1e-05, "loss": 0.941, "step": 53610 }, { "epoch": 47.488928255093, "grad_norm": 0.2536621391773224, "learning_rate": 1e-05, "loss": 0.9771, "step": 53615 }, { "epoch": 47.4933569530558, "grad_norm": 0.26189449429512024, "learning_rate": 1e-05, "loss": 1.0113, "step": 53620 }, { "epoch": 47.4977856510186, "grad_norm": 0.25883767008781433, "learning_rate": 1e-05, "loss": 0.9463, "step": 53625 }, { "epoch": 47.5022143489814, "grad_norm": 0.2206197828054428, "learning_rate": 1e-05, "loss": 0.9678, "step": 53630 }, { "epoch": 47.5066430469442, "grad_norm": 0.28032901883125305, "learning_rate": 1e-05, "loss": 0.9902, "step": 53635 }, { "epoch": 47.511071744907, "grad_norm": 0.2708328664302826, "learning_rate": 1e-05, "loss": 0.9791, "step": 53640 }, { "epoch": 47.515500442869794, "grad_norm": 0.24346905946731567, "learning_rate": 1e-05, "loss": 0.9471, "step": 53645 }, { "epoch": 47.519929140832595, "grad_norm": 0.23383086919784546, "learning_rate": 1e-05, "loss": 0.9628, "step": 53650 }, { "epoch": 47.524357838795396, "grad_norm": 0.2327410876750946, "learning_rate": 1e-05, "loss": 0.9527, "step": 53655 }, { "epoch": 47.52878653675819, "grad_norm": 0.24351760745048523, "learning_rate": 1e-05, "loss": 0.9258, "step": 53660 }, { "epoch": 47.53321523472099, "grad_norm": 0.2381892055273056, "learning_rate": 1e-05, "loss": 1.0114, "step": 53665 }, { "epoch": 47.53764393268379, "grad_norm": 0.2196206897497177, "learning_rate": 1e-05, "loss": 0.9859, "step": 53670 }, { "epoch": 47.54207263064659, "grad_norm": 0.2274877429008484, "learning_rate": 1e-05, "loss": 0.9877, "step": 53675 }, { "epoch": 47.54650132860939, "grad_norm": 0.24286487698554993, "learning_rate": 1e-05, "loss": 1.0188, "step": 53680 }, { "epoch": 47.55093002657219, "grad_norm": 0.2319401651620865, "learning_rate": 1e-05, "loss": 0.9844, "step": 53685 }, { "epoch": 47.55535872453498, "grad_norm": 0.24155199527740479, "learning_rate": 1e-05, "loss": 0.9923, "step": 53690 }, { "epoch": 47.559787422497784, "grad_norm": 0.20747703313827515, "learning_rate": 1e-05, "loss": 0.9281, "step": 53695 }, { "epoch": 47.564216120460586, "grad_norm": 0.24954165518283844, "learning_rate": 1e-05, "loss": 0.9337, "step": 53700 }, { "epoch": 47.56864481842339, "grad_norm": 0.2240035980939865, "learning_rate": 1e-05, "loss": 0.9673, "step": 53705 }, { "epoch": 47.57307351638618, "grad_norm": 0.24623899161815643, "learning_rate": 1e-05, "loss": 1.0011, "step": 53710 }, { "epoch": 47.57750221434898, "grad_norm": 0.2567157447338104, "learning_rate": 1e-05, "loss": 0.9441, "step": 53715 }, { "epoch": 47.58193091231178, "grad_norm": 0.2522636353969574, "learning_rate": 1e-05, "loss": 0.9837, "step": 53720 }, { "epoch": 47.58635961027458, "grad_norm": 0.27009207010269165, "learning_rate": 1e-05, "loss": 0.9299, "step": 53725 }, { "epoch": 47.59078830823738, "grad_norm": 0.2906951904296875, "learning_rate": 1e-05, "loss": 0.959, "step": 53730 }, { "epoch": 47.59521700620018, "grad_norm": 0.22079971432685852, "learning_rate": 1e-05, "loss": 0.948, "step": 53735 }, { "epoch": 47.599645704162974, "grad_norm": 0.21642599999904633, "learning_rate": 1e-05, "loss": 0.9518, "step": 53740 }, { "epoch": 47.604074402125775, "grad_norm": 0.27416425943374634, "learning_rate": 1e-05, "loss": 1.051, "step": 53745 }, { "epoch": 47.608503100088576, "grad_norm": 0.24693958461284637, "learning_rate": 1e-05, "loss": 0.9598, "step": 53750 }, { "epoch": 47.61293179805137, "grad_norm": 0.2254188358783722, "learning_rate": 1e-05, "loss": 0.9738, "step": 53755 }, { "epoch": 47.61736049601417, "grad_norm": 0.24221350252628326, "learning_rate": 1e-05, "loss": 1.01, "step": 53760 }, { "epoch": 47.62178919397697, "grad_norm": 0.23769111931324005, "learning_rate": 1e-05, "loss": 0.9933, "step": 53765 }, { "epoch": 47.62621789193977, "grad_norm": 0.20381732285022736, "learning_rate": 1e-05, "loss": 1.0168, "step": 53770 }, { "epoch": 47.63064658990257, "grad_norm": 0.23721915483474731, "learning_rate": 1e-05, "loss": 0.9718, "step": 53775 }, { "epoch": 47.63507528786537, "grad_norm": 0.23032695055007935, "learning_rate": 1e-05, "loss": 0.9208, "step": 53780 }, { "epoch": 47.63950398582816, "grad_norm": 0.19805924594402313, "learning_rate": 1e-05, "loss": 1.0015, "step": 53785 }, { "epoch": 47.643932683790965, "grad_norm": 0.29126277565956116, "learning_rate": 1e-05, "loss": 1.0057, "step": 53790 }, { "epoch": 47.648361381753766, "grad_norm": 0.23259305953979492, "learning_rate": 1e-05, "loss": 0.976, "step": 53795 }, { "epoch": 47.65279007971656, "grad_norm": 0.2581517994403839, "learning_rate": 1e-05, "loss": 0.9276, "step": 53800 }, { "epoch": 47.65721877767936, "grad_norm": 0.25572654604911804, "learning_rate": 1e-05, "loss": 1.0109, "step": 53805 }, { "epoch": 47.66164747564216, "grad_norm": 0.21386483311653137, "learning_rate": 1e-05, "loss": 0.9297, "step": 53810 }, { "epoch": 47.666076173604964, "grad_norm": 0.25590816140174866, "learning_rate": 1e-05, "loss": 0.9533, "step": 53815 }, { "epoch": 47.67050487156776, "grad_norm": 0.31813332438468933, "learning_rate": 1e-05, "loss": 0.9261, "step": 53820 }, { "epoch": 47.67493356953056, "grad_norm": 0.24896399676799774, "learning_rate": 1e-05, "loss": 0.9957, "step": 53825 }, { "epoch": 47.67936226749336, "grad_norm": 0.22610758244991302, "learning_rate": 1e-05, "loss": 0.9857, "step": 53830 }, { "epoch": 47.683790965456154, "grad_norm": 0.24754638969898224, "learning_rate": 1e-05, "loss": 0.9442, "step": 53835 }, { "epoch": 47.688219663418955, "grad_norm": 0.2399856001138687, "learning_rate": 1e-05, "loss": 0.9922, "step": 53840 }, { "epoch": 47.69264836138176, "grad_norm": 0.21230283379554749, "learning_rate": 1e-05, "loss": 1.0101, "step": 53845 }, { "epoch": 47.69707705934455, "grad_norm": 0.2711069583892822, "learning_rate": 1e-05, "loss": 0.9166, "step": 53850 }, { "epoch": 47.70150575730735, "grad_norm": 0.2500757873058319, "learning_rate": 1e-05, "loss": 1.0142, "step": 53855 }, { "epoch": 47.70593445527015, "grad_norm": 0.21669919788837433, "learning_rate": 1e-05, "loss": 1.0311, "step": 53860 }, { "epoch": 47.71036315323295, "grad_norm": 0.207510307431221, "learning_rate": 1e-05, "loss": 0.935, "step": 53865 }, { "epoch": 47.71479185119575, "grad_norm": 0.2725810408592224, "learning_rate": 1e-05, "loss": 0.9992, "step": 53870 }, { "epoch": 47.71922054915855, "grad_norm": 0.2077665477991104, "learning_rate": 1e-05, "loss": 0.9641, "step": 53875 }, { "epoch": 47.723649247121344, "grad_norm": 0.2801382541656494, "learning_rate": 1e-05, "loss": 0.9638, "step": 53880 }, { "epoch": 47.728077945084145, "grad_norm": 0.2574128806591034, "learning_rate": 1e-05, "loss": 0.9858, "step": 53885 }, { "epoch": 47.732506643046946, "grad_norm": 0.28305867314338684, "learning_rate": 1e-05, "loss": 1.0048, "step": 53890 }, { "epoch": 47.73693534100974, "grad_norm": 0.27080875635147095, "learning_rate": 1e-05, "loss": 0.9559, "step": 53895 }, { "epoch": 47.74136403897254, "grad_norm": 0.24162890017032623, "learning_rate": 1e-05, "loss": 1.0026, "step": 53900 }, { "epoch": 47.74579273693534, "grad_norm": 0.2586243152618408, "learning_rate": 1e-05, "loss": 1.0177, "step": 53905 }, { "epoch": 47.75022143489814, "grad_norm": 0.26357874274253845, "learning_rate": 1e-05, "loss": 0.9467, "step": 53910 }, { "epoch": 47.75465013286094, "grad_norm": 0.2479870319366455, "learning_rate": 1e-05, "loss": 1.0255, "step": 53915 }, { "epoch": 47.75907883082374, "grad_norm": 0.28403475880622864, "learning_rate": 1e-05, "loss": 0.9896, "step": 53920 }, { "epoch": 47.76350752878653, "grad_norm": 0.2357282191514969, "learning_rate": 1e-05, "loss": 0.9499, "step": 53925 }, { "epoch": 47.767936226749335, "grad_norm": 0.27125847339630127, "learning_rate": 1e-05, "loss": 1.0236, "step": 53930 }, { "epoch": 47.772364924712136, "grad_norm": 0.22664161026477814, "learning_rate": 1e-05, "loss": 0.98, "step": 53935 }, { "epoch": 47.77679362267494, "grad_norm": 0.22646038234233856, "learning_rate": 1e-05, "loss": 0.9803, "step": 53940 }, { "epoch": 47.78122232063773, "grad_norm": 0.22992365062236786, "learning_rate": 1e-05, "loss": 0.9506, "step": 53945 }, { "epoch": 47.78565101860053, "grad_norm": 0.22834976017475128, "learning_rate": 1e-05, "loss": 1.0092, "step": 53950 }, { "epoch": 47.79007971656333, "grad_norm": 0.24656164646148682, "learning_rate": 1e-05, "loss": 0.9644, "step": 53955 }, { "epoch": 47.79450841452613, "grad_norm": 0.25544074177742004, "learning_rate": 1e-05, "loss": 1.005, "step": 53960 }, { "epoch": 47.79893711248893, "grad_norm": 0.2300511598587036, "learning_rate": 1e-05, "loss": 0.9605, "step": 53965 }, { "epoch": 47.80336581045173, "grad_norm": 0.21994829177856445, "learning_rate": 1e-05, "loss": 0.9345, "step": 53970 }, { "epoch": 47.807794508414524, "grad_norm": 0.21942482888698578, "learning_rate": 1e-05, "loss": 0.9937, "step": 53975 }, { "epoch": 47.812223206377325, "grad_norm": 0.24433690309524536, "learning_rate": 1e-05, "loss": 0.9543, "step": 53980 }, { "epoch": 47.81665190434013, "grad_norm": 0.21427181363105774, "learning_rate": 1e-05, "loss": 0.9653, "step": 53985 }, { "epoch": 47.82108060230292, "grad_norm": 0.24657323956489563, "learning_rate": 1e-05, "loss": 1.0113, "step": 53990 }, { "epoch": 47.82550930026572, "grad_norm": 0.24297063052654266, "learning_rate": 1e-05, "loss": 0.9196, "step": 53995 }, { "epoch": 47.82993799822852, "grad_norm": 0.25518327951431274, "learning_rate": 1e-05, "loss": 0.9355, "step": 54000 }, { "epoch": 47.83436669619132, "grad_norm": 0.2253987193107605, "learning_rate": 1e-05, "loss": 0.9828, "step": 54005 }, { "epoch": 47.83879539415412, "grad_norm": 0.2907208204269409, "learning_rate": 1e-05, "loss": 0.991, "step": 54010 }, { "epoch": 47.84322409211692, "grad_norm": 0.2752867639064789, "learning_rate": 1e-05, "loss": 0.9447, "step": 54015 }, { "epoch": 47.847652790079714, "grad_norm": 0.24904656410217285, "learning_rate": 1e-05, "loss": 0.9827, "step": 54020 }, { "epoch": 47.852081488042515, "grad_norm": 0.24027155339717865, "learning_rate": 1e-05, "loss": 0.9578, "step": 54025 }, { "epoch": 47.856510186005316, "grad_norm": 0.2629442512989044, "learning_rate": 1e-05, "loss": 0.9972, "step": 54030 }, { "epoch": 47.86093888396811, "grad_norm": 0.2401168793439865, "learning_rate": 1e-05, "loss": 0.983, "step": 54035 }, { "epoch": 47.86536758193091, "grad_norm": 0.3038504123687744, "learning_rate": 1e-05, "loss": 0.9161, "step": 54040 }, { "epoch": 47.86979627989371, "grad_norm": 0.2369646579027176, "learning_rate": 1e-05, "loss": 0.9675, "step": 54045 }, { "epoch": 47.87422497785651, "grad_norm": 0.2360883504152298, "learning_rate": 1e-05, "loss": 0.9368, "step": 54050 }, { "epoch": 47.87865367581931, "grad_norm": 0.23813392221927643, "learning_rate": 1e-05, "loss": 1.058, "step": 54055 }, { "epoch": 47.88308237378211, "grad_norm": 0.24803613126277924, "learning_rate": 1e-05, "loss": 1.0178, "step": 54060 }, { "epoch": 47.88751107174491, "grad_norm": 0.28278231620788574, "learning_rate": 1e-05, "loss": 0.9968, "step": 54065 }, { "epoch": 47.891939769707704, "grad_norm": 0.2390805184841156, "learning_rate": 1e-05, "loss": 1.0258, "step": 54070 }, { "epoch": 47.896368467670506, "grad_norm": 0.2573857009410858, "learning_rate": 1e-05, "loss": 0.9186, "step": 54075 }, { "epoch": 47.90079716563331, "grad_norm": 0.2704625725746155, "learning_rate": 1e-05, "loss": 0.9066, "step": 54080 }, { "epoch": 47.9052258635961, "grad_norm": 0.2473248392343521, "learning_rate": 1e-05, "loss": 1.03, "step": 54085 }, { "epoch": 47.9096545615589, "grad_norm": 0.2661992907524109, "learning_rate": 1e-05, "loss": 0.981, "step": 54090 }, { "epoch": 47.9140832595217, "grad_norm": 0.2230280339717865, "learning_rate": 1e-05, "loss": 0.9454, "step": 54095 }, { "epoch": 47.9185119574845, "grad_norm": 0.24677376449108124, "learning_rate": 1e-05, "loss": 0.9904, "step": 54100 }, { "epoch": 47.9229406554473, "grad_norm": 0.25990813970565796, "learning_rate": 1e-05, "loss": 0.9851, "step": 54105 }, { "epoch": 47.9273693534101, "grad_norm": 0.24985814094543457, "learning_rate": 1e-05, "loss": 0.942, "step": 54110 }, { "epoch": 47.931798051372894, "grad_norm": 0.24179169535636902, "learning_rate": 1e-05, "loss": 0.972, "step": 54115 }, { "epoch": 47.936226749335695, "grad_norm": 0.2248622179031372, "learning_rate": 1e-05, "loss": 1.0138, "step": 54120 }, { "epoch": 47.9406554472985, "grad_norm": 0.2241470366716385, "learning_rate": 1e-05, "loss": 0.9612, "step": 54125 }, { "epoch": 47.94508414526129, "grad_norm": 0.23480424284934998, "learning_rate": 1e-05, "loss": 0.9706, "step": 54130 }, { "epoch": 47.94951284322409, "grad_norm": 0.2681584060192108, "learning_rate": 1e-05, "loss": 0.9863, "step": 54135 }, { "epoch": 47.95394154118689, "grad_norm": 0.24585366249084473, "learning_rate": 1e-05, "loss": 0.9602, "step": 54140 }, { "epoch": 47.95837023914969, "grad_norm": 0.2310543656349182, "learning_rate": 1e-05, "loss": 0.9868, "step": 54145 }, { "epoch": 47.96279893711249, "grad_norm": 0.24443264305591583, "learning_rate": 1e-05, "loss": 0.9616, "step": 54150 }, { "epoch": 47.96722763507529, "grad_norm": 0.20922762155532837, "learning_rate": 1e-05, "loss": 0.939, "step": 54155 }, { "epoch": 47.971656333038084, "grad_norm": 0.25041335821151733, "learning_rate": 1e-05, "loss": 0.9999, "step": 54160 }, { "epoch": 47.976085031000885, "grad_norm": 0.22820109128952026, "learning_rate": 1e-05, "loss": 0.9991, "step": 54165 }, { "epoch": 47.980513728963686, "grad_norm": 0.21311110258102417, "learning_rate": 1e-05, "loss": 0.9362, "step": 54170 }, { "epoch": 47.98494242692648, "grad_norm": 0.2056189477443695, "learning_rate": 1e-05, "loss": 0.9815, "step": 54175 }, { "epoch": 47.98937112488928, "grad_norm": 0.21524974703788757, "learning_rate": 1e-05, "loss": 1.0085, "step": 54180 }, { "epoch": 47.99379982285208, "grad_norm": 0.2759053111076355, "learning_rate": 1e-05, "loss": 0.9957, "step": 54185 }, { "epoch": 47.998228520814884, "grad_norm": 0.26053863763809204, "learning_rate": 1e-05, "loss": 0.9464, "step": 54190 }, { "epoch": 48.00265721877768, "grad_norm": 0.27095502614974976, "learning_rate": 1e-05, "loss": 0.9967, "step": 54195 }, { "epoch": 48.00708591674048, "grad_norm": 0.21030853688716888, "learning_rate": 1e-05, "loss": 1.0046, "step": 54200 }, { "epoch": 48.01151461470328, "grad_norm": 0.21274219453334808, "learning_rate": 1e-05, "loss": 0.9936, "step": 54205 }, { "epoch": 48.015943312666074, "grad_norm": 0.22843638062477112, "learning_rate": 1e-05, "loss": 0.9487, "step": 54210 }, { "epoch": 48.020372010628876, "grad_norm": 0.21671180427074432, "learning_rate": 1e-05, "loss": 1.0042, "step": 54215 }, { "epoch": 48.02480070859168, "grad_norm": 0.20990025997161865, "learning_rate": 1e-05, "loss": 0.9424, "step": 54220 }, { "epoch": 48.02922940655447, "grad_norm": 0.24164745211601257, "learning_rate": 1e-05, "loss": 0.9981, "step": 54225 }, { "epoch": 48.03365810451727, "grad_norm": 0.22842633724212646, "learning_rate": 1e-05, "loss": 1.0373, "step": 54230 }, { "epoch": 48.03808680248007, "grad_norm": 0.2710988521575928, "learning_rate": 1e-05, "loss": 0.9678, "step": 54235 }, { "epoch": 48.04251550044287, "grad_norm": 0.23926052451133728, "learning_rate": 1e-05, "loss": 0.9655, "step": 54240 }, { "epoch": 48.04694419840567, "grad_norm": 0.25839963555336, "learning_rate": 1e-05, "loss": 0.9309, "step": 54245 }, { "epoch": 48.05137289636847, "grad_norm": 0.23707906901836395, "learning_rate": 1e-05, "loss": 1.0424, "step": 54250 }, { "epoch": 48.055801594331264, "grad_norm": 0.26977527141571045, "learning_rate": 1e-05, "loss": 0.9821, "step": 54255 }, { "epoch": 48.060230292294065, "grad_norm": 0.2278953343629837, "learning_rate": 1e-05, "loss": 0.9817, "step": 54260 }, { "epoch": 48.064658990256866, "grad_norm": 0.24550196528434753, "learning_rate": 1e-05, "loss": 0.971, "step": 54265 }, { "epoch": 48.06908768821966, "grad_norm": 0.23541414737701416, "learning_rate": 1e-05, "loss": 0.9479, "step": 54270 }, { "epoch": 48.07351638618246, "grad_norm": 0.2648276388645172, "learning_rate": 1e-05, "loss": 1.0024, "step": 54275 }, { "epoch": 48.07794508414526, "grad_norm": 0.22903665900230408, "learning_rate": 1e-05, "loss": 0.9965, "step": 54280 }, { "epoch": 48.08237378210806, "grad_norm": 0.26113361120224, "learning_rate": 1e-05, "loss": 0.982, "step": 54285 }, { "epoch": 48.08680248007086, "grad_norm": 0.24845163524150848, "learning_rate": 1e-05, "loss": 0.9443, "step": 54290 }, { "epoch": 48.09123117803366, "grad_norm": 0.2625034749507904, "learning_rate": 1e-05, "loss": 0.9391, "step": 54295 }, { "epoch": 48.09565987599645, "grad_norm": 0.2668391466140747, "learning_rate": 1e-05, "loss": 0.9645, "step": 54300 }, { "epoch": 48.100088573959255, "grad_norm": 0.2723771333694458, "learning_rate": 1e-05, "loss": 0.9772, "step": 54305 }, { "epoch": 48.104517271922056, "grad_norm": 0.2708938717842102, "learning_rate": 1e-05, "loss": 1.007, "step": 54310 }, { "epoch": 48.10894596988486, "grad_norm": 0.2522117495536804, "learning_rate": 1e-05, "loss": 0.9804, "step": 54315 }, { "epoch": 48.11337466784765, "grad_norm": 0.2573252320289612, "learning_rate": 1e-05, "loss": 0.9469, "step": 54320 }, { "epoch": 48.11780336581045, "grad_norm": 0.2446250021457672, "learning_rate": 1e-05, "loss": 1.014, "step": 54325 }, { "epoch": 48.122232063773254, "grad_norm": 0.23871248960494995, "learning_rate": 1e-05, "loss": 0.9478, "step": 54330 }, { "epoch": 48.12666076173605, "grad_norm": 0.23324066400527954, "learning_rate": 1e-05, "loss": 0.9253, "step": 54335 }, { "epoch": 48.13108945969885, "grad_norm": 0.220278799533844, "learning_rate": 1e-05, "loss": 0.9251, "step": 54340 }, { "epoch": 48.13551815766165, "grad_norm": 0.2946358025074005, "learning_rate": 1e-05, "loss": 0.9556, "step": 54345 }, { "epoch": 48.139946855624444, "grad_norm": 0.22677218914031982, "learning_rate": 1e-05, "loss": 0.9799, "step": 54350 }, { "epoch": 48.144375553587246, "grad_norm": 0.24953903257846832, "learning_rate": 1e-05, "loss": 0.9475, "step": 54355 }, { "epoch": 48.14880425155005, "grad_norm": 0.2694138288497925, "learning_rate": 1e-05, "loss": 0.9831, "step": 54360 }, { "epoch": 48.15323294951284, "grad_norm": 0.26679837703704834, "learning_rate": 1e-05, "loss": 0.9685, "step": 54365 }, { "epoch": 48.15766164747564, "grad_norm": 0.2267667055130005, "learning_rate": 1e-05, "loss": 0.9523, "step": 54370 }, { "epoch": 48.16209034543844, "grad_norm": 0.2701794505119324, "learning_rate": 1e-05, "loss": 0.9667, "step": 54375 }, { "epoch": 48.16651904340124, "grad_norm": 0.23114758729934692, "learning_rate": 1e-05, "loss": 0.9867, "step": 54380 }, { "epoch": 48.17094774136404, "grad_norm": 0.2564833164215088, "learning_rate": 1e-05, "loss": 0.9787, "step": 54385 }, { "epoch": 48.17537643932684, "grad_norm": 0.2633117437362671, "learning_rate": 1e-05, "loss": 1.0632, "step": 54390 }, { "epoch": 48.179805137289634, "grad_norm": 0.24890990555286407, "learning_rate": 1e-05, "loss": 0.9849, "step": 54395 }, { "epoch": 48.184233835252435, "grad_norm": 0.2447938770055771, "learning_rate": 1e-05, "loss": 1.0198, "step": 54400 }, { "epoch": 48.188662533215236, "grad_norm": 0.2054462432861328, "learning_rate": 1e-05, "loss": 0.993, "step": 54405 }, { "epoch": 48.19309123117803, "grad_norm": 0.23189909756183624, "learning_rate": 1e-05, "loss": 1.0023, "step": 54410 }, { "epoch": 48.19751992914083, "grad_norm": 0.22702151536941528, "learning_rate": 1e-05, "loss": 0.9697, "step": 54415 }, { "epoch": 48.20194862710363, "grad_norm": 0.22041070461273193, "learning_rate": 1e-05, "loss": 0.9281, "step": 54420 }, { "epoch": 48.20637732506643, "grad_norm": 0.22508110105991364, "learning_rate": 1e-05, "loss": 0.9614, "step": 54425 }, { "epoch": 48.21080602302923, "grad_norm": 0.24688751995563507, "learning_rate": 1e-05, "loss": 0.9378, "step": 54430 }, { "epoch": 48.21523472099203, "grad_norm": 0.2732565999031067, "learning_rate": 1e-05, "loss": 0.9804, "step": 54435 }, { "epoch": 48.21966341895483, "grad_norm": 0.20759116113185883, "learning_rate": 1e-05, "loss": 0.985, "step": 54440 }, { "epoch": 48.224092116917625, "grad_norm": 0.26913273334503174, "learning_rate": 1e-05, "loss": 0.8892, "step": 54445 }, { "epoch": 48.228520814880426, "grad_norm": 0.2323606163263321, "learning_rate": 1e-05, "loss": 0.9908, "step": 54450 }, { "epoch": 48.23294951284323, "grad_norm": 0.2318078726530075, "learning_rate": 1e-05, "loss": 0.9749, "step": 54455 }, { "epoch": 48.23737821080602, "grad_norm": 0.2297491878271103, "learning_rate": 1e-05, "loss": 0.978, "step": 54460 }, { "epoch": 48.24180690876882, "grad_norm": 0.24085623025894165, "learning_rate": 1e-05, "loss": 1.0047, "step": 54465 }, { "epoch": 48.246235606731624, "grad_norm": 0.23873399198055267, "learning_rate": 1e-05, "loss": 0.9592, "step": 54470 }, { "epoch": 48.25066430469442, "grad_norm": 0.2656705975532532, "learning_rate": 1e-05, "loss": 0.9703, "step": 54475 }, { "epoch": 48.25509300265722, "grad_norm": 0.24265353381633759, "learning_rate": 1e-05, "loss": 0.9222, "step": 54480 }, { "epoch": 48.25952170062002, "grad_norm": 0.2673168182373047, "learning_rate": 1e-05, "loss": 0.9785, "step": 54485 }, { "epoch": 48.263950398582814, "grad_norm": 0.28807422518730164, "learning_rate": 1e-05, "loss": 0.9852, "step": 54490 }, { "epoch": 48.268379096545615, "grad_norm": 0.24280890822410583, "learning_rate": 1e-05, "loss": 0.9256, "step": 54495 }, { "epoch": 48.27280779450842, "grad_norm": 0.2847305238246918, "learning_rate": 1e-05, "loss": 1.0037, "step": 54500 }, { "epoch": 48.27723649247121, "grad_norm": 0.23908749222755432, "learning_rate": 1e-05, "loss": 0.964, "step": 54505 }, { "epoch": 48.28166519043401, "grad_norm": 0.20606811344623566, "learning_rate": 1e-05, "loss": 0.9916, "step": 54510 }, { "epoch": 48.28609388839681, "grad_norm": 0.30251064896583557, "learning_rate": 1e-05, "loss": 1.0054, "step": 54515 }, { "epoch": 48.29052258635961, "grad_norm": 0.27221035957336426, "learning_rate": 1e-05, "loss": 0.9618, "step": 54520 }, { "epoch": 48.29495128432241, "grad_norm": 0.22023601830005646, "learning_rate": 1e-05, "loss": 1.0239, "step": 54525 }, { "epoch": 48.29937998228521, "grad_norm": 0.2757318317890167, "learning_rate": 1e-05, "loss": 0.9501, "step": 54530 }, { "epoch": 48.303808680248004, "grad_norm": 0.2227504700422287, "learning_rate": 1e-05, "loss": 0.9924, "step": 54535 }, { "epoch": 48.308237378210805, "grad_norm": 0.19213366508483887, "learning_rate": 1e-05, "loss": 0.9191, "step": 54540 }, { "epoch": 48.312666076173606, "grad_norm": 0.21350793540477753, "learning_rate": 1e-05, "loss": 1.0044, "step": 54545 }, { "epoch": 48.31709477413641, "grad_norm": 0.26234567165374756, "learning_rate": 1e-05, "loss": 0.9745, "step": 54550 }, { "epoch": 48.3215234720992, "grad_norm": 0.2475329041481018, "learning_rate": 1e-05, "loss": 0.9658, "step": 54555 }, { "epoch": 48.325952170062, "grad_norm": 0.2819952070713043, "learning_rate": 1e-05, "loss": 1.0143, "step": 54560 }, { "epoch": 48.330380868024804, "grad_norm": 0.23078052699565887, "learning_rate": 1e-05, "loss": 0.9849, "step": 54565 }, { "epoch": 48.3348095659876, "grad_norm": 0.2415207177400589, "learning_rate": 1e-05, "loss": 0.9815, "step": 54570 }, { "epoch": 48.3392382639504, "grad_norm": 0.21792201697826385, "learning_rate": 1e-05, "loss": 0.9467, "step": 54575 }, { "epoch": 48.3436669619132, "grad_norm": 0.21483545005321503, "learning_rate": 1e-05, "loss": 0.9952, "step": 54580 }, { "epoch": 48.348095659875995, "grad_norm": 0.24992138147354126, "learning_rate": 1e-05, "loss": 0.9597, "step": 54585 }, { "epoch": 48.352524357838796, "grad_norm": 0.21953345835208893, "learning_rate": 1e-05, "loss": 1.0051, "step": 54590 }, { "epoch": 48.3569530558016, "grad_norm": 0.2584550678730011, "learning_rate": 1e-05, "loss": 0.988, "step": 54595 }, { "epoch": 48.36138175376439, "grad_norm": 0.24125786125659943, "learning_rate": 1e-05, "loss": 1.0328, "step": 54600 }, { "epoch": 48.36581045172719, "grad_norm": 0.2452802062034607, "learning_rate": 1e-05, "loss": 0.9579, "step": 54605 }, { "epoch": 48.37023914968999, "grad_norm": 0.22685956954956055, "learning_rate": 1e-05, "loss": 0.9205, "step": 54610 }, { "epoch": 48.37466784765279, "grad_norm": 0.22611494362354279, "learning_rate": 1e-05, "loss": 0.9838, "step": 54615 }, { "epoch": 48.37909654561559, "grad_norm": 0.2419653683900833, "learning_rate": 1e-05, "loss": 0.9858, "step": 54620 }, { "epoch": 48.38352524357839, "grad_norm": 0.22503463923931122, "learning_rate": 1e-05, "loss": 0.9233, "step": 54625 }, { "epoch": 48.387953941541184, "grad_norm": 0.22026920318603516, "learning_rate": 1e-05, "loss": 0.9815, "step": 54630 }, { "epoch": 48.392382639503985, "grad_norm": 0.22886820137500763, "learning_rate": 1e-05, "loss": 0.9702, "step": 54635 }, { "epoch": 48.39681133746679, "grad_norm": 0.26335409283638, "learning_rate": 1e-05, "loss": 1.0063, "step": 54640 }, { "epoch": 48.40124003542958, "grad_norm": 0.24266499280929565, "learning_rate": 1e-05, "loss": 0.9731, "step": 54645 }, { "epoch": 48.40566873339238, "grad_norm": 0.22106784582138062, "learning_rate": 1e-05, "loss": 0.9298, "step": 54650 }, { "epoch": 48.41009743135518, "grad_norm": 0.23873266577720642, "learning_rate": 1e-05, "loss": 0.9677, "step": 54655 }, { "epoch": 48.41452612931798, "grad_norm": 0.263793408870697, "learning_rate": 1e-05, "loss": 1.0136, "step": 54660 }, { "epoch": 48.41895482728078, "grad_norm": 0.20200185477733612, "learning_rate": 1e-05, "loss": 0.9402, "step": 54665 }, { "epoch": 48.42338352524358, "grad_norm": 0.21077078580856323, "learning_rate": 1e-05, "loss": 0.9099, "step": 54670 }, { "epoch": 48.42781222320638, "grad_norm": 0.22018828988075256, "learning_rate": 1e-05, "loss": 0.9327, "step": 54675 }, { "epoch": 48.432240921169175, "grad_norm": 0.2709357440471649, "learning_rate": 1e-05, "loss": 0.9644, "step": 54680 }, { "epoch": 48.436669619131976, "grad_norm": 0.24960030615329742, "learning_rate": 1e-05, "loss": 1.0213, "step": 54685 }, { "epoch": 48.44109831709478, "grad_norm": 0.30026060342788696, "learning_rate": 1e-05, "loss": 0.9715, "step": 54690 }, { "epoch": 48.44552701505757, "grad_norm": 0.2196469008922577, "learning_rate": 1e-05, "loss": 0.9758, "step": 54695 }, { "epoch": 48.44995571302037, "grad_norm": 0.2054162323474884, "learning_rate": 1e-05, "loss": 1.0279, "step": 54700 }, { "epoch": 48.454384410983174, "grad_norm": 0.264117032289505, "learning_rate": 1e-05, "loss": 0.9801, "step": 54705 }, { "epoch": 48.45881310894597, "grad_norm": 0.2796826958656311, "learning_rate": 1e-05, "loss": 0.9606, "step": 54710 }, { "epoch": 48.46324180690877, "grad_norm": 0.26269227266311646, "learning_rate": 1e-05, "loss": 0.9714, "step": 54715 }, { "epoch": 48.46767050487157, "grad_norm": 0.2235548496246338, "learning_rate": 1e-05, "loss": 0.9833, "step": 54720 }, { "epoch": 48.472099202834364, "grad_norm": 0.23668207228183746, "learning_rate": 1e-05, "loss": 1.0468, "step": 54725 }, { "epoch": 48.476527900797166, "grad_norm": 0.24984893202781677, "learning_rate": 1e-05, "loss": 0.9611, "step": 54730 }, { "epoch": 48.48095659875997, "grad_norm": 0.2638162672519684, "learning_rate": 1e-05, "loss": 0.9401, "step": 54735 }, { "epoch": 48.48538529672276, "grad_norm": 0.22550000250339508, "learning_rate": 1e-05, "loss": 0.9606, "step": 54740 }, { "epoch": 48.48981399468556, "grad_norm": 0.2607899308204651, "learning_rate": 1e-05, "loss": 0.9843, "step": 54745 }, { "epoch": 48.49424269264836, "grad_norm": 0.25486019253730774, "learning_rate": 1e-05, "loss": 0.937, "step": 54750 }, { "epoch": 48.49867139061116, "grad_norm": 0.22319072484970093, "learning_rate": 1e-05, "loss": 0.9988, "step": 54755 }, { "epoch": 48.50310008857396, "grad_norm": 0.25386983156204224, "learning_rate": 1e-05, "loss": 0.9863, "step": 54760 }, { "epoch": 48.50752878653676, "grad_norm": 0.20265452563762665, "learning_rate": 1e-05, "loss": 0.9627, "step": 54765 }, { "epoch": 48.511957484499554, "grad_norm": 0.2146308869123459, "learning_rate": 1e-05, "loss": 0.9698, "step": 54770 }, { "epoch": 48.516386182462355, "grad_norm": 0.23876622319221497, "learning_rate": 1e-05, "loss": 1.0354, "step": 54775 }, { "epoch": 48.520814880425156, "grad_norm": 0.2343628853559494, "learning_rate": 1e-05, "loss": 1.0444, "step": 54780 }, { "epoch": 48.52524357838795, "grad_norm": 0.25492337346076965, "learning_rate": 1e-05, "loss": 0.9972, "step": 54785 }, { "epoch": 48.52967227635075, "grad_norm": 0.221262127161026, "learning_rate": 1e-05, "loss": 0.972, "step": 54790 }, { "epoch": 48.53410097431355, "grad_norm": 0.25594913959503174, "learning_rate": 1e-05, "loss": 1.0174, "step": 54795 }, { "epoch": 48.538529672276354, "grad_norm": 0.24185062944889069, "learning_rate": 1e-05, "loss": 0.9605, "step": 54800 }, { "epoch": 48.54295837023915, "grad_norm": 0.23660625517368317, "learning_rate": 1e-05, "loss": 1.0054, "step": 54805 }, { "epoch": 48.54738706820195, "grad_norm": 0.2552359402179718, "learning_rate": 1e-05, "loss": 0.9832, "step": 54810 }, { "epoch": 48.55181576616475, "grad_norm": 0.21541759371757507, "learning_rate": 1e-05, "loss": 0.9633, "step": 54815 }, { "epoch": 48.556244464127545, "grad_norm": 0.26602432131767273, "learning_rate": 1e-05, "loss": 0.9204, "step": 54820 }, { "epoch": 48.560673162090346, "grad_norm": 0.23079116642475128, "learning_rate": 1e-05, "loss": 0.9469, "step": 54825 }, { "epoch": 48.56510186005315, "grad_norm": 0.23874498903751373, "learning_rate": 1e-05, "loss": 1.0279, "step": 54830 }, { "epoch": 48.56953055801594, "grad_norm": 0.26916539669036865, "learning_rate": 1e-05, "loss": 0.9427, "step": 54835 }, { "epoch": 48.57395925597874, "grad_norm": 0.22758601605892181, "learning_rate": 1e-05, "loss": 0.9888, "step": 54840 }, { "epoch": 48.578387953941544, "grad_norm": 0.2415654957294464, "learning_rate": 1e-05, "loss": 0.9857, "step": 54845 }, { "epoch": 48.58281665190434, "grad_norm": 0.27855557203292847, "learning_rate": 1e-05, "loss": 0.9691, "step": 54850 }, { "epoch": 48.58724534986714, "grad_norm": 0.2430223971605301, "learning_rate": 1e-05, "loss": 0.9793, "step": 54855 }, { "epoch": 48.59167404782994, "grad_norm": 0.2339971363544464, "learning_rate": 1e-05, "loss": 1.0437, "step": 54860 }, { "epoch": 48.596102745792734, "grad_norm": 0.2321372628211975, "learning_rate": 1e-05, "loss": 0.9604, "step": 54865 }, { "epoch": 48.600531443755536, "grad_norm": 0.23226068913936615, "learning_rate": 1e-05, "loss": 0.991, "step": 54870 }, { "epoch": 48.60496014171834, "grad_norm": 0.19789043068885803, "learning_rate": 1e-05, "loss": 0.9574, "step": 54875 }, { "epoch": 48.60938883968113, "grad_norm": 0.2241741269826889, "learning_rate": 1e-05, "loss": 1.0115, "step": 54880 }, { "epoch": 48.61381753764393, "grad_norm": 0.22698169946670532, "learning_rate": 1e-05, "loss": 0.9905, "step": 54885 }, { "epoch": 48.61824623560673, "grad_norm": 0.2848401963710785, "learning_rate": 1e-05, "loss": 1.0345, "step": 54890 }, { "epoch": 48.62267493356953, "grad_norm": 0.2099856585264206, "learning_rate": 1e-05, "loss": 0.9359, "step": 54895 }, { "epoch": 48.62710363153233, "grad_norm": 0.24514034390449524, "learning_rate": 1e-05, "loss": 1.0077, "step": 54900 }, { "epoch": 48.63153232949513, "grad_norm": 0.2556520998477936, "learning_rate": 1e-05, "loss": 0.9607, "step": 54905 }, { "epoch": 48.635961027457924, "grad_norm": 0.22490465641021729, "learning_rate": 1e-05, "loss": 0.9864, "step": 54910 }, { "epoch": 48.640389725420725, "grad_norm": 0.2267404943704605, "learning_rate": 1e-05, "loss": 0.9919, "step": 54915 }, { "epoch": 48.644818423383526, "grad_norm": 0.25889432430267334, "learning_rate": 1e-05, "loss": 1.0173, "step": 54920 }, { "epoch": 48.64924712134633, "grad_norm": 0.27019771933555603, "learning_rate": 1e-05, "loss": 1.0759, "step": 54925 }, { "epoch": 48.65367581930912, "grad_norm": 0.22395308315753937, "learning_rate": 1e-05, "loss": 0.9474, "step": 54930 }, { "epoch": 48.65810451727192, "grad_norm": 0.22619661688804626, "learning_rate": 1e-05, "loss": 0.9572, "step": 54935 }, { "epoch": 48.662533215234724, "grad_norm": 0.2858215868473053, "learning_rate": 1e-05, "loss": 0.9328, "step": 54940 }, { "epoch": 48.66696191319752, "grad_norm": 0.2700244188308716, "learning_rate": 1e-05, "loss": 0.9627, "step": 54945 }, { "epoch": 48.67139061116032, "grad_norm": 0.2147485613822937, "learning_rate": 1e-05, "loss": 1.0032, "step": 54950 }, { "epoch": 48.67581930912312, "grad_norm": 0.23243673145771027, "learning_rate": 1e-05, "loss": 1.0111, "step": 54955 }, { "epoch": 48.680248007085915, "grad_norm": 0.2546096444129944, "learning_rate": 1e-05, "loss": 0.9603, "step": 54960 }, { "epoch": 48.684676705048716, "grad_norm": 0.2619113028049469, "learning_rate": 1e-05, "loss": 0.9847, "step": 54965 }, { "epoch": 48.68910540301152, "grad_norm": 0.2887438237667084, "learning_rate": 1e-05, "loss": 0.994, "step": 54970 }, { "epoch": 48.69353410097431, "grad_norm": 0.23999159038066864, "learning_rate": 1e-05, "loss": 1.0149, "step": 54975 }, { "epoch": 48.69796279893711, "grad_norm": 0.2825166583061218, "learning_rate": 1e-05, "loss": 0.9988, "step": 54980 }, { "epoch": 48.702391496899914, "grad_norm": 0.2746540606021881, "learning_rate": 1e-05, "loss": 0.9842, "step": 54985 }, { "epoch": 48.70682019486271, "grad_norm": 0.21760587394237518, "learning_rate": 1e-05, "loss": 0.9719, "step": 54990 }, { "epoch": 48.71124889282551, "grad_norm": 0.25527656078338623, "learning_rate": 1e-05, "loss": 0.969, "step": 54995 }, { "epoch": 48.71567759078831, "grad_norm": 0.21348458528518677, "learning_rate": 1e-05, "loss": 1.0256, "step": 55000 }, { "epoch": 48.720106288751104, "grad_norm": 0.2200413942337036, "learning_rate": 1e-05, "loss": 0.9628, "step": 55005 }, { "epoch": 48.724534986713905, "grad_norm": 0.26661837100982666, "learning_rate": 1e-05, "loss": 0.9733, "step": 55010 }, { "epoch": 48.72896368467671, "grad_norm": 0.2207280397415161, "learning_rate": 1e-05, "loss": 0.9841, "step": 55015 }, { "epoch": 48.7333923826395, "grad_norm": 0.21707503497600555, "learning_rate": 1e-05, "loss": 0.9684, "step": 55020 }, { "epoch": 48.7378210806023, "grad_norm": 0.21548376977443695, "learning_rate": 1e-05, "loss": 1.0106, "step": 55025 }, { "epoch": 48.7422497785651, "grad_norm": 0.3349223732948303, "learning_rate": 1e-05, "loss": 0.9562, "step": 55030 }, { "epoch": 48.7466784765279, "grad_norm": 0.2791121006011963, "learning_rate": 1e-05, "loss": 0.9545, "step": 55035 }, { "epoch": 48.7511071744907, "grad_norm": 0.24870958924293518, "learning_rate": 1e-05, "loss": 0.9966, "step": 55040 }, { "epoch": 48.7555358724535, "grad_norm": 0.2350766658782959, "learning_rate": 1e-05, "loss": 0.9517, "step": 55045 }, { "epoch": 48.7599645704163, "grad_norm": 0.26081809401512146, "learning_rate": 1e-05, "loss": 0.939, "step": 55050 }, { "epoch": 48.764393268379095, "grad_norm": 0.24910062551498413, "learning_rate": 1e-05, "loss": 0.9632, "step": 55055 }, { "epoch": 48.768821966341896, "grad_norm": 0.29383233189582825, "learning_rate": 1e-05, "loss": 0.9542, "step": 55060 }, { "epoch": 48.7732506643047, "grad_norm": 0.24867188930511475, "learning_rate": 1e-05, "loss": 0.9765, "step": 55065 }, { "epoch": 48.77767936226749, "grad_norm": 0.23956997692584991, "learning_rate": 1e-05, "loss": 0.9695, "step": 55070 }, { "epoch": 48.78210806023029, "grad_norm": 0.25531238317489624, "learning_rate": 1e-05, "loss": 0.9768, "step": 55075 }, { "epoch": 48.786536758193094, "grad_norm": 0.23537547886371613, "learning_rate": 1e-05, "loss": 1.0054, "step": 55080 }, { "epoch": 48.79096545615589, "grad_norm": 0.26684388518333435, "learning_rate": 1e-05, "loss": 0.989, "step": 55085 }, { "epoch": 48.79539415411869, "grad_norm": 0.2253996580839157, "learning_rate": 1e-05, "loss": 0.9684, "step": 55090 }, { "epoch": 48.79982285208149, "grad_norm": 0.2288932353258133, "learning_rate": 1e-05, "loss": 0.968, "step": 55095 }, { "epoch": 48.804251550044285, "grad_norm": 0.25369077920913696, "learning_rate": 1e-05, "loss": 0.9637, "step": 55100 }, { "epoch": 48.808680248007086, "grad_norm": 0.2032015323638916, "learning_rate": 1e-05, "loss": 0.956, "step": 55105 }, { "epoch": 48.81310894596989, "grad_norm": 0.2597110867500305, "learning_rate": 1e-05, "loss": 0.9304, "step": 55110 }, { "epoch": 48.81753764393268, "grad_norm": 0.2512738108634949, "learning_rate": 1e-05, "loss": 0.9885, "step": 55115 }, { "epoch": 48.82196634189548, "grad_norm": 0.35800260305404663, "learning_rate": 1e-05, "loss": 0.9594, "step": 55120 }, { "epoch": 48.826395039858284, "grad_norm": 0.2719745934009552, "learning_rate": 1e-05, "loss": 0.9799, "step": 55125 }, { "epoch": 48.83082373782108, "grad_norm": 0.21742922067642212, "learning_rate": 1e-05, "loss": 1.015, "step": 55130 }, { "epoch": 48.83525243578388, "grad_norm": 0.2501205503940582, "learning_rate": 1e-05, "loss": 0.937, "step": 55135 }, { "epoch": 48.83968113374668, "grad_norm": 0.26601022481918335, "learning_rate": 1e-05, "loss": 0.9126, "step": 55140 }, { "epoch": 48.844109831709474, "grad_norm": 0.24707823991775513, "learning_rate": 1e-05, "loss": 0.9373, "step": 55145 }, { "epoch": 48.848538529672275, "grad_norm": 0.2571483850479126, "learning_rate": 1e-05, "loss": 0.9819, "step": 55150 }, { "epoch": 48.85296722763508, "grad_norm": 0.262888103723526, "learning_rate": 1e-05, "loss": 0.9709, "step": 55155 }, { "epoch": 48.85739592559787, "grad_norm": 0.21559688448905945, "learning_rate": 1e-05, "loss": 0.9805, "step": 55160 }, { "epoch": 48.86182462356067, "grad_norm": 0.24982097744941711, "learning_rate": 1e-05, "loss": 0.9844, "step": 55165 }, { "epoch": 48.86625332152347, "grad_norm": 0.24011340737342834, "learning_rate": 1e-05, "loss": 0.9601, "step": 55170 }, { "epoch": 48.870682019486274, "grad_norm": 0.23663382232189178, "learning_rate": 1e-05, "loss": 0.9548, "step": 55175 }, { "epoch": 48.87511071744907, "grad_norm": 0.22923614084720612, "learning_rate": 1e-05, "loss": 0.995, "step": 55180 }, { "epoch": 48.87953941541187, "grad_norm": 0.22929036617279053, "learning_rate": 1e-05, "loss": 0.9928, "step": 55185 }, { "epoch": 48.88396811337467, "grad_norm": 0.2659350335597992, "learning_rate": 1e-05, "loss": 0.9714, "step": 55190 }, { "epoch": 48.888396811337465, "grad_norm": 0.26742982864379883, "learning_rate": 1e-05, "loss": 0.9471, "step": 55195 }, { "epoch": 48.892825509300266, "grad_norm": 0.26196664571762085, "learning_rate": 1e-05, "loss": 0.9457, "step": 55200 }, { "epoch": 48.89725420726307, "grad_norm": 0.1893482208251953, "learning_rate": 1e-05, "loss": 0.9706, "step": 55205 }, { "epoch": 48.90168290522586, "grad_norm": 0.24410848319530487, "learning_rate": 1e-05, "loss": 1.0018, "step": 55210 }, { "epoch": 48.90611160318866, "grad_norm": 0.2510742247104645, "learning_rate": 1e-05, "loss": 1.0251, "step": 55215 }, { "epoch": 48.910540301151464, "grad_norm": 0.2423769235610962, "learning_rate": 1e-05, "loss": 0.9613, "step": 55220 }, { "epoch": 48.91496899911426, "grad_norm": 0.24973231554031372, "learning_rate": 1e-05, "loss": 0.9777, "step": 55225 }, { "epoch": 48.91939769707706, "grad_norm": 0.2149386703968048, "learning_rate": 1e-05, "loss": 0.9869, "step": 55230 }, { "epoch": 48.92382639503986, "grad_norm": 0.2188480794429779, "learning_rate": 1e-05, "loss": 1.0036, "step": 55235 }, { "epoch": 48.928255093002655, "grad_norm": 0.2532457709312439, "learning_rate": 1e-05, "loss": 0.9648, "step": 55240 }, { "epoch": 48.932683790965456, "grad_norm": 0.28841453790664673, "learning_rate": 1e-05, "loss": 0.9658, "step": 55245 }, { "epoch": 48.93711248892826, "grad_norm": 0.23716680705547333, "learning_rate": 1e-05, "loss": 0.9769, "step": 55250 }, { "epoch": 48.94154118689105, "grad_norm": 0.239442378282547, "learning_rate": 1e-05, "loss": 0.9688, "step": 55255 }, { "epoch": 48.94596988485385, "grad_norm": 0.2541291415691376, "learning_rate": 1e-05, "loss": 0.9539, "step": 55260 }, { "epoch": 48.95039858281665, "grad_norm": 0.27392786741256714, "learning_rate": 1e-05, "loss": 0.9556, "step": 55265 }, { "epoch": 48.95482728077945, "grad_norm": 0.24729777872562408, "learning_rate": 1e-05, "loss": 1.0285, "step": 55270 }, { "epoch": 48.95925597874225, "grad_norm": 0.2830755114555359, "learning_rate": 1e-05, "loss": 0.9872, "step": 55275 }, { "epoch": 48.96368467670505, "grad_norm": 0.2020728439092636, "learning_rate": 1e-05, "loss": 1.0052, "step": 55280 }, { "epoch": 48.96811337466785, "grad_norm": 0.2305692881345749, "learning_rate": 1e-05, "loss": 0.9815, "step": 55285 }, { "epoch": 48.972542072630645, "grad_norm": 0.22978775203227997, "learning_rate": 1e-05, "loss": 1.0112, "step": 55290 }, { "epoch": 48.97697077059345, "grad_norm": 0.28941699862480164, "learning_rate": 1e-05, "loss": 0.9262, "step": 55295 }, { "epoch": 48.98139946855625, "grad_norm": 0.2947419285774231, "learning_rate": 1e-05, "loss": 0.9571, "step": 55300 }, { "epoch": 48.98582816651904, "grad_norm": 0.21778425574302673, "learning_rate": 1e-05, "loss": 0.9825, "step": 55305 }, { "epoch": 48.99025686448184, "grad_norm": 0.3094514310359955, "learning_rate": 1e-05, "loss": 0.9889, "step": 55310 }, { "epoch": 48.994685562444644, "grad_norm": 0.22837582230567932, "learning_rate": 1e-05, "loss": 0.9353, "step": 55315 }, { "epoch": 48.99911426040744, "grad_norm": 0.20818109810352325, "learning_rate": 1e-05, "loss": 0.9755, "step": 55320 }, { "epoch": 49.00354295837024, "grad_norm": 0.28077957034111023, "learning_rate": 1e-05, "loss": 0.9192, "step": 55325 }, { "epoch": 49.00797165633304, "grad_norm": 0.2179676741361618, "learning_rate": 1e-05, "loss": 0.9435, "step": 55330 }, { "epoch": 49.012400354295835, "grad_norm": 0.2138155847787857, "learning_rate": 1e-05, "loss": 0.8731, "step": 55335 }, { "epoch": 49.016829052258636, "grad_norm": 0.31002724170684814, "learning_rate": 1e-05, "loss": 0.9983, "step": 55340 }, { "epoch": 49.02125775022144, "grad_norm": 0.23113851249217987, "learning_rate": 1e-05, "loss": 0.9352, "step": 55345 }, { "epoch": 49.02568644818423, "grad_norm": 0.2922385632991791, "learning_rate": 1e-05, "loss": 0.9353, "step": 55350 }, { "epoch": 49.03011514614703, "grad_norm": 0.28313514590263367, "learning_rate": 1e-05, "loss": 0.9652, "step": 55355 }, { "epoch": 49.034543844109834, "grad_norm": 0.2304021269083023, "learning_rate": 1e-05, "loss": 0.9765, "step": 55360 }, { "epoch": 49.03897254207263, "grad_norm": 0.23780737817287445, "learning_rate": 1e-05, "loss": 1.0154, "step": 55365 }, { "epoch": 49.04340124003543, "grad_norm": 0.2541655898094177, "learning_rate": 1e-05, "loss": 0.9798, "step": 55370 }, { "epoch": 49.04782993799823, "grad_norm": 0.23372630774974823, "learning_rate": 1e-05, "loss": 0.9154, "step": 55375 }, { "epoch": 49.052258635961024, "grad_norm": 0.2583569288253784, "learning_rate": 1e-05, "loss": 0.9826, "step": 55380 }, { "epoch": 49.056687333923826, "grad_norm": 0.28064998984336853, "learning_rate": 1e-05, "loss": 0.9555, "step": 55385 }, { "epoch": 49.06111603188663, "grad_norm": 0.23718559741973877, "learning_rate": 1e-05, "loss": 0.9524, "step": 55390 }, { "epoch": 49.06554472984942, "grad_norm": 0.24140843749046326, "learning_rate": 1e-05, "loss": 0.9324, "step": 55395 }, { "epoch": 49.06997342781222, "grad_norm": 0.250434935092926, "learning_rate": 1e-05, "loss": 1.0298, "step": 55400 }, { "epoch": 49.07440212577502, "grad_norm": 0.2934398651123047, "learning_rate": 1e-05, "loss": 0.9707, "step": 55405 }, { "epoch": 49.078830823737825, "grad_norm": 0.21975839138031006, "learning_rate": 1e-05, "loss": 0.9579, "step": 55410 }, { "epoch": 49.08325952170062, "grad_norm": 0.24368907511234283, "learning_rate": 1e-05, "loss": 0.9385, "step": 55415 }, { "epoch": 49.08768821966342, "grad_norm": 0.2522338032722473, "learning_rate": 1e-05, "loss": 0.973, "step": 55420 }, { "epoch": 49.09211691762622, "grad_norm": 0.30634424090385437, "learning_rate": 1e-05, "loss": 0.977, "step": 55425 }, { "epoch": 49.096545615589015, "grad_norm": 0.27043506503105164, "learning_rate": 1e-05, "loss": 0.9732, "step": 55430 }, { "epoch": 49.100974313551816, "grad_norm": 0.24883215129375458, "learning_rate": 1e-05, "loss": 0.9965, "step": 55435 }, { "epoch": 49.10540301151462, "grad_norm": 0.2334386110305786, "learning_rate": 1e-05, "loss": 0.9868, "step": 55440 }, { "epoch": 49.10983170947741, "grad_norm": 0.2280300408601761, "learning_rate": 1e-05, "loss": 0.9399, "step": 55445 }, { "epoch": 49.11426040744021, "grad_norm": 0.22359693050384521, "learning_rate": 1e-05, "loss": 0.9368, "step": 55450 }, { "epoch": 49.118689105403014, "grad_norm": 0.22574204206466675, "learning_rate": 1e-05, "loss": 0.9929, "step": 55455 }, { "epoch": 49.12311780336581, "grad_norm": 0.26959648728370667, "learning_rate": 1e-05, "loss": 0.9534, "step": 55460 }, { "epoch": 49.12754650132861, "grad_norm": 0.2523326873779297, "learning_rate": 1e-05, "loss": 0.9978, "step": 55465 }, { "epoch": 49.13197519929141, "grad_norm": 0.26202499866485596, "learning_rate": 1e-05, "loss": 0.9385, "step": 55470 }, { "epoch": 49.136403897254205, "grad_norm": 0.2383251190185547, "learning_rate": 1e-05, "loss": 0.9659, "step": 55475 }, { "epoch": 49.140832595217006, "grad_norm": 0.22497080266475677, "learning_rate": 1e-05, "loss": 0.9391, "step": 55480 }, { "epoch": 49.14526129317981, "grad_norm": 0.363065242767334, "learning_rate": 1e-05, "loss": 0.9424, "step": 55485 }, { "epoch": 49.1496899911426, "grad_norm": 0.3056315779685974, "learning_rate": 1e-05, "loss": 0.9387, "step": 55490 }, { "epoch": 49.1541186891054, "grad_norm": 0.2373078316450119, "learning_rate": 1e-05, "loss": 0.9897, "step": 55495 }, { "epoch": 49.158547387068204, "grad_norm": 0.3665105104446411, "learning_rate": 1e-05, "loss": 0.9591, "step": 55500 }, { "epoch": 49.162976085031, "grad_norm": 0.2530321776866913, "learning_rate": 1e-05, "loss": 1.0281, "step": 55505 }, { "epoch": 49.1674047829938, "grad_norm": 0.2833568751811981, "learning_rate": 1e-05, "loss": 0.9399, "step": 55510 }, { "epoch": 49.1718334809566, "grad_norm": 0.24073496460914612, "learning_rate": 1e-05, "loss": 0.9772, "step": 55515 }, { "epoch": 49.176262178919394, "grad_norm": 0.23924696445465088, "learning_rate": 1e-05, "loss": 1.0373, "step": 55520 }, { "epoch": 49.180690876882196, "grad_norm": 0.24713070690631866, "learning_rate": 1e-05, "loss": 0.9503, "step": 55525 }, { "epoch": 49.185119574845, "grad_norm": 0.2487911432981491, "learning_rate": 1e-05, "loss": 0.9419, "step": 55530 }, { "epoch": 49.1895482728078, "grad_norm": 0.2104041874408722, "learning_rate": 1e-05, "loss": 0.9846, "step": 55535 }, { "epoch": 49.19397697077059, "grad_norm": 0.2619360387325287, "learning_rate": 1e-05, "loss": 0.9249, "step": 55540 }, { "epoch": 49.19840566873339, "grad_norm": 0.2509959638118744, "learning_rate": 1e-05, "loss": 0.9609, "step": 55545 }, { "epoch": 49.202834366696194, "grad_norm": 0.20325206220149994, "learning_rate": 1e-05, "loss": 0.9652, "step": 55550 }, { "epoch": 49.20726306465899, "grad_norm": 0.2482072114944458, "learning_rate": 1e-05, "loss": 0.9378, "step": 55555 }, { "epoch": 49.21169176262179, "grad_norm": 0.24647383391857147, "learning_rate": 1e-05, "loss": 0.9682, "step": 55560 }, { "epoch": 49.21612046058459, "grad_norm": 0.3046664595603943, "learning_rate": 1e-05, "loss": 1.0072, "step": 55565 }, { "epoch": 49.220549158547385, "grad_norm": 0.25379493832588196, "learning_rate": 1e-05, "loss": 1.0076, "step": 55570 }, { "epoch": 49.224977856510186, "grad_norm": 0.24232938885688782, "learning_rate": 1e-05, "loss": 0.9187, "step": 55575 }, { "epoch": 49.22940655447299, "grad_norm": 0.27989089488983154, "learning_rate": 1e-05, "loss": 0.9711, "step": 55580 }, { "epoch": 49.23383525243578, "grad_norm": 0.24518831074237823, "learning_rate": 1e-05, "loss": 0.9185, "step": 55585 }, { "epoch": 49.23826395039858, "grad_norm": 0.2454182207584381, "learning_rate": 1e-05, "loss": 0.9934, "step": 55590 }, { "epoch": 49.242692648361384, "grad_norm": 0.21499726176261902, "learning_rate": 1e-05, "loss": 0.9393, "step": 55595 }, { "epoch": 49.24712134632418, "grad_norm": 0.2654757797718048, "learning_rate": 1e-05, "loss": 1.0177, "step": 55600 }, { "epoch": 49.25155004428698, "grad_norm": 0.20775367319583893, "learning_rate": 1e-05, "loss": 1.0113, "step": 55605 }, { "epoch": 49.25597874224978, "grad_norm": 0.22467748820781708, "learning_rate": 1e-05, "loss": 0.9515, "step": 55610 }, { "epoch": 49.260407440212575, "grad_norm": 0.27343037724494934, "learning_rate": 1e-05, "loss": 1.0069, "step": 55615 }, { "epoch": 49.264836138175376, "grad_norm": 0.21698635816574097, "learning_rate": 1e-05, "loss": 1.0323, "step": 55620 }, { "epoch": 49.26926483613818, "grad_norm": 0.220206618309021, "learning_rate": 1e-05, "loss": 0.9539, "step": 55625 }, { "epoch": 49.27369353410097, "grad_norm": 0.23211440443992615, "learning_rate": 1e-05, "loss": 1.0313, "step": 55630 }, { "epoch": 49.27812223206377, "grad_norm": 0.20855194330215454, "learning_rate": 1e-05, "loss": 0.992, "step": 55635 }, { "epoch": 49.282550930026574, "grad_norm": 0.2288268357515335, "learning_rate": 1e-05, "loss": 0.9839, "step": 55640 }, { "epoch": 49.28697962798937, "grad_norm": 0.2961913049221039, "learning_rate": 1e-05, "loss": 1.0407, "step": 55645 }, { "epoch": 49.29140832595217, "grad_norm": 0.2488223910331726, "learning_rate": 1e-05, "loss": 0.9828, "step": 55650 }, { "epoch": 49.29583702391497, "grad_norm": 0.2510570287704468, "learning_rate": 1e-05, "loss": 1.0118, "step": 55655 }, { "epoch": 49.30026572187777, "grad_norm": 0.2550215423107147, "learning_rate": 1e-05, "loss": 0.957, "step": 55660 }, { "epoch": 49.304694419840565, "grad_norm": 0.2813078761100769, "learning_rate": 1e-05, "loss": 1.0561, "step": 55665 }, { "epoch": 49.30912311780337, "grad_norm": 0.2419459968805313, "learning_rate": 1e-05, "loss": 0.9541, "step": 55670 }, { "epoch": 49.31355181576617, "grad_norm": 0.22254474461078644, "learning_rate": 1e-05, "loss": 0.9188, "step": 55675 }, { "epoch": 49.31798051372896, "grad_norm": 0.20271334052085876, "learning_rate": 1e-05, "loss": 0.9141, "step": 55680 }, { "epoch": 49.32240921169176, "grad_norm": 0.22568435966968536, "learning_rate": 1e-05, "loss": 0.9757, "step": 55685 }, { "epoch": 49.326837909654564, "grad_norm": 0.2265196591615677, "learning_rate": 1e-05, "loss": 0.9737, "step": 55690 }, { "epoch": 49.33126660761736, "grad_norm": 0.20363369584083557, "learning_rate": 1e-05, "loss": 0.9264, "step": 55695 }, { "epoch": 49.33569530558016, "grad_norm": 0.2713947892189026, "learning_rate": 1e-05, "loss": 0.9336, "step": 55700 }, { "epoch": 49.34012400354296, "grad_norm": 0.21646949648857117, "learning_rate": 1e-05, "loss": 0.9778, "step": 55705 }, { "epoch": 49.344552701505755, "grad_norm": 0.24461214244365692, "learning_rate": 1e-05, "loss": 1.0102, "step": 55710 }, { "epoch": 49.348981399468556, "grad_norm": 0.21809403598308563, "learning_rate": 1e-05, "loss": 0.9636, "step": 55715 }, { "epoch": 49.35341009743136, "grad_norm": 0.23036445677280426, "learning_rate": 1e-05, "loss": 0.9849, "step": 55720 }, { "epoch": 49.35783879539415, "grad_norm": 0.24903655052185059, "learning_rate": 1e-05, "loss": 0.9601, "step": 55725 }, { "epoch": 49.36226749335695, "grad_norm": 0.22437074780464172, "learning_rate": 1e-05, "loss": 0.9973, "step": 55730 }, { "epoch": 49.366696191319754, "grad_norm": 0.2485104650259018, "learning_rate": 1e-05, "loss": 1.0094, "step": 55735 }, { "epoch": 49.37112488928255, "grad_norm": 0.27319252490997314, "learning_rate": 1e-05, "loss": 0.9309, "step": 55740 }, { "epoch": 49.37555358724535, "grad_norm": 0.24037888646125793, "learning_rate": 1e-05, "loss": 0.9928, "step": 55745 }, { "epoch": 49.37998228520815, "grad_norm": 0.2011907398700714, "learning_rate": 1e-05, "loss": 1.0278, "step": 55750 }, { "epoch": 49.384410983170945, "grad_norm": 0.2516098916530609, "learning_rate": 1e-05, "loss": 0.9476, "step": 55755 }, { "epoch": 49.388839681133746, "grad_norm": 0.21974478662014008, "learning_rate": 1e-05, "loss": 1.0609, "step": 55760 }, { "epoch": 49.39326837909655, "grad_norm": 0.22345365583896637, "learning_rate": 1e-05, "loss": 0.9019, "step": 55765 }, { "epoch": 49.39769707705934, "grad_norm": 0.28094354271888733, "learning_rate": 1e-05, "loss": 0.9952, "step": 55770 }, { "epoch": 49.40212577502214, "grad_norm": 0.2135746031999588, "learning_rate": 1e-05, "loss": 0.976, "step": 55775 }, { "epoch": 49.40655447298494, "grad_norm": 0.22173616290092468, "learning_rate": 1e-05, "loss": 0.9308, "step": 55780 }, { "epoch": 49.410983170947745, "grad_norm": 0.19864509999752045, "learning_rate": 1e-05, "loss": 0.9164, "step": 55785 }, { "epoch": 49.41541186891054, "grad_norm": 0.23265066742897034, "learning_rate": 1e-05, "loss": 0.9902, "step": 55790 }, { "epoch": 49.41984056687334, "grad_norm": 0.21120624244213104, "learning_rate": 1e-05, "loss": 0.973, "step": 55795 }, { "epoch": 49.42426926483614, "grad_norm": 0.22201330959796906, "learning_rate": 1e-05, "loss": 0.946, "step": 55800 }, { "epoch": 49.428697962798935, "grad_norm": 0.21634584665298462, "learning_rate": 1e-05, "loss": 0.9751, "step": 55805 }, { "epoch": 49.43312666076174, "grad_norm": 0.23441413044929504, "learning_rate": 1e-05, "loss": 0.9363, "step": 55810 }, { "epoch": 49.43755535872454, "grad_norm": 0.23186996579170227, "learning_rate": 1e-05, "loss": 0.9954, "step": 55815 }, { "epoch": 49.44198405668733, "grad_norm": 0.22654154896736145, "learning_rate": 1e-05, "loss": 0.9099, "step": 55820 }, { "epoch": 49.44641275465013, "grad_norm": 0.2407923936843872, "learning_rate": 1e-05, "loss": 0.9865, "step": 55825 }, { "epoch": 49.450841452612934, "grad_norm": 0.23158203065395355, "learning_rate": 1e-05, "loss": 0.956, "step": 55830 }, { "epoch": 49.45527015057573, "grad_norm": 0.2513697147369385, "learning_rate": 1e-05, "loss": 1.0309, "step": 55835 }, { "epoch": 49.45969884853853, "grad_norm": 0.23609429597854614, "learning_rate": 1e-05, "loss": 0.9993, "step": 55840 }, { "epoch": 49.46412754650133, "grad_norm": 0.27875301241874695, "learning_rate": 1e-05, "loss": 0.9905, "step": 55845 }, { "epoch": 49.468556244464125, "grad_norm": 0.2642456889152527, "learning_rate": 1e-05, "loss": 0.9304, "step": 55850 }, { "epoch": 49.472984942426926, "grad_norm": 0.24122361838817596, "learning_rate": 1e-05, "loss": 0.9487, "step": 55855 }, { "epoch": 49.47741364038973, "grad_norm": 0.22178712487220764, "learning_rate": 1e-05, "loss": 0.9365, "step": 55860 }, { "epoch": 49.48184233835252, "grad_norm": 0.23801103234291077, "learning_rate": 1e-05, "loss": 0.9894, "step": 55865 }, { "epoch": 49.48627103631532, "grad_norm": 0.2524906396865845, "learning_rate": 1e-05, "loss": 0.9576, "step": 55870 }, { "epoch": 49.490699734278124, "grad_norm": 0.22201824188232422, "learning_rate": 1e-05, "loss": 1.0086, "step": 55875 }, { "epoch": 49.49512843224092, "grad_norm": 0.2502390444278717, "learning_rate": 1e-05, "loss": 0.9893, "step": 55880 }, { "epoch": 49.49955713020372, "grad_norm": 0.23540426790714264, "learning_rate": 1e-05, "loss": 0.9962, "step": 55885 }, { "epoch": 49.50398582816652, "grad_norm": 0.23848573863506317, "learning_rate": 1e-05, "loss": 1.0155, "step": 55890 }, { "epoch": 49.508414526129314, "grad_norm": 0.22835133969783783, "learning_rate": 1e-05, "loss": 0.9924, "step": 55895 }, { "epoch": 49.512843224092116, "grad_norm": 0.22900861501693726, "learning_rate": 1e-05, "loss": 1.0224, "step": 55900 }, { "epoch": 49.51727192205492, "grad_norm": 0.27086400985717773, "learning_rate": 1e-05, "loss": 0.9728, "step": 55905 }, { "epoch": 49.52170062001772, "grad_norm": 0.2634629011154175, "learning_rate": 1e-05, "loss": 1.0256, "step": 55910 }, { "epoch": 49.52612931798051, "grad_norm": 0.22731907665729523, "learning_rate": 1e-05, "loss": 0.9885, "step": 55915 }, { "epoch": 49.53055801594331, "grad_norm": 0.26295971870422363, "learning_rate": 1e-05, "loss": 1.0084, "step": 55920 }, { "epoch": 49.534986713906115, "grad_norm": 0.2209186851978302, "learning_rate": 1e-05, "loss": 0.9199, "step": 55925 }, { "epoch": 49.53941541186891, "grad_norm": 0.274909108877182, "learning_rate": 1e-05, "loss": 1.0044, "step": 55930 }, { "epoch": 49.54384410983171, "grad_norm": 0.23730503022670746, "learning_rate": 1e-05, "loss": 0.9438, "step": 55935 }, { "epoch": 49.54827280779451, "grad_norm": 0.25347045063972473, "learning_rate": 1e-05, "loss": 0.9688, "step": 55940 }, { "epoch": 49.552701505757305, "grad_norm": 0.27422139048576355, "learning_rate": 1e-05, "loss": 0.9768, "step": 55945 }, { "epoch": 49.55713020372011, "grad_norm": 0.22927168011665344, "learning_rate": 1e-05, "loss": 1.0513, "step": 55950 }, { "epoch": 49.56155890168291, "grad_norm": 0.2302422821521759, "learning_rate": 1e-05, "loss": 0.9737, "step": 55955 }, { "epoch": 49.5659875996457, "grad_norm": 0.2383836954832077, "learning_rate": 1e-05, "loss": 0.9836, "step": 55960 }, { "epoch": 49.5704162976085, "grad_norm": 0.36718258261680603, "learning_rate": 1e-05, "loss": 0.9933, "step": 55965 }, { "epoch": 49.574844995571304, "grad_norm": 0.23085416853427887, "learning_rate": 1e-05, "loss": 0.9739, "step": 55970 }, { "epoch": 49.5792736935341, "grad_norm": 0.23889276385307312, "learning_rate": 1e-05, "loss": 0.9931, "step": 55975 }, { "epoch": 49.5837023914969, "grad_norm": 0.19973666965961456, "learning_rate": 1e-05, "loss": 0.9571, "step": 55980 }, { "epoch": 49.5881310894597, "grad_norm": 0.2542799115180969, "learning_rate": 1e-05, "loss": 0.9768, "step": 55985 }, { "epoch": 49.592559787422495, "grad_norm": 0.22943666577339172, "learning_rate": 1e-05, "loss": 0.9748, "step": 55990 }, { "epoch": 49.596988485385296, "grad_norm": 0.25890737771987915, "learning_rate": 1e-05, "loss": 0.9947, "step": 55995 }, { "epoch": 49.6014171833481, "grad_norm": 0.2690388560295105, "learning_rate": 1e-05, "loss": 0.9648, "step": 56000 }, { "epoch": 49.60584588131089, "grad_norm": 0.23407095670700073, "learning_rate": 1e-05, "loss": 0.9413, "step": 56005 }, { "epoch": 49.61027457927369, "grad_norm": 0.25506293773651123, "learning_rate": 1e-05, "loss": 0.9516, "step": 56010 }, { "epoch": 49.614703277236494, "grad_norm": 0.2488018125295639, "learning_rate": 1e-05, "loss": 1.0114, "step": 56015 }, { "epoch": 49.619131975199295, "grad_norm": 0.2653346061706543, "learning_rate": 1e-05, "loss": 1.0041, "step": 56020 }, { "epoch": 49.62356067316209, "grad_norm": 0.2499607354402542, "learning_rate": 1e-05, "loss": 0.9671, "step": 56025 }, { "epoch": 49.62798937112489, "grad_norm": 0.32554203271865845, "learning_rate": 1e-05, "loss": 0.9478, "step": 56030 }, { "epoch": 49.63241806908769, "grad_norm": 0.23772965371608734, "learning_rate": 1e-05, "loss": 1.0036, "step": 56035 }, { "epoch": 49.636846767050486, "grad_norm": 0.2505536377429962, "learning_rate": 1e-05, "loss": 1.0324, "step": 56040 }, { "epoch": 49.64127546501329, "grad_norm": 0.2918280363082886, "learning_rate": 1e-05, "loss": 0.9736, "step": 56045 }, { "epoch": 49.64570416297609, "grad_norm": 0.22287245094776154, "learning_rate": 1e-05, "loss": 1.0078, "step": 56050 }, { "epoch": 49.65013286093888, "grad_norm": 0.22217696905136108, "learning_rate": 1e-05, "loss": 0.9693, "step": 56055 }, { "epoch": 49.65456155890168, "grad_norm": 0.24594427645206451, "learning_rate": 1e-05, "loss": 0.9842, "step": 56060 }, { "epoch": 49.658990256864485, "grad_norm": 0.21668019890785217, "learning_rate": 1e-05, "loss": 0.9416, "step": 56065 }, { "epoch": 49.66341895482728, "grad_norm": 0.23349064588546753, "learning_rate": 1e-05, "loss": 1.0179, "step": 56070 }, { "epoch": 49.66784765279008, "grad_norm": 0.2514871656894684, "learning_rate": 1e-05, "loss": 1.0396, "step": 56075 }, { "epoch": 49.67227635075288, "grad_norm": 0.24177692830562592, "learning_rate": 1e-05, "loss": 0.9138, "step": 56080 }, { "epoch": 49.676705048715675, "grad_norm": 0.2102849930524826, "learning_rate": 1e-05, "loss": 0.925, "step": 56085 }, { "epoch": 49.681133746678476, "grad_norm": 0.21542243659496307, "learning_rate": 1e-05, "loss": 0.9354, "step": 56090 }, { "epoch": 49.68556244464128, "grad_norm": 0.2515002191066742, "learning_rate": 1e-05, "loss": 1.0193, "step": 56095 }, { "epoch": 49.68999114260407, "grad_norm": 0.24471041560173035, "learning_rate": 1e-05, "loss": 0.9407, "step": 56100 }, { "epoch": 49.69441984056687, "grad_norm": 0.23970235884189606, "learning_rate": 1e-05, "loss": 0.9778, "step": 56105 }, { "epoch": 49.698848538529674, "grad_norm": 0.2658633887767792, "learning_rate": 1e-05, "loss": 1.0124, "step": 56110 }, { "epoch": 49.70327723649247, "grad_norm": 0.2718665599822998, "learning_rate": 1e-05, "loss": 1.0024, "step": 56115 }, { "epoch": 49.70770593445527, "grad_norm": 0.23668673634529114, "learning_rate": 1e-05, "loss": 0.9261, "step": 56120 }, { "epoch": 49.71213463241807, "grad_norm": 0.2891448438167572, "learning_rate": 1e-05, "loss": 0.9581, "step": 56125 }, { "epoch": 49.716563330380865, "grad_norm": 0.19356776773929596, "learning_rate": 1e-05, "loss": 0.9302, "step": 56130 }, { "epoch": 49.720992028343666, "grad_norm": 0.23055894672870636, "learning_rate": 1e-05, "loss": 0.9231, "step": 56135 }, { "epoch": 49.72542072630647, "grad_norm": 0.21958231925964355, "learning_rate": 1e-05, "loss": 0.9155, "step": 56140 }, { "epoch": 49.72984942426926, "grad_norm": 0.21882109344005585, "learning_rate": 1e-05, "loss": 0.9233, "step": 56145 }, { "epoch": 49.73427812223206, "grad_norm": 0.21798716485500336, "learning_rate": 1e-05, "loss": 0.9789, "step": 56150 }, { "epoch": 49.738706820194864, "grad_norm": 0.23968777060508728, "learning_rate": 1e-05, "loss": 0.9439, "step": 56155 }, { "epoch": 49.743135518157665, "grad_norm": 0.2466309666633606, "learning_rate": 1e-05, "loss": 1.0076, "step": 56160 }, { "epoch": 49.74756421612046, "grad_norm": 0.2300512045621872, "learning_rate": 1e-05, "loss": 1.0137, "step": 56165 }, { "epoch": 49.75199291408326, "grad_norm": 0.2710813283920288, "learning_rate": 1e-05, "loss": 0.9323, "step": 56170 }, { "epoch": 49.75642161204606, "grad_norm": 0.258838027715683, "learning_rate": 1e-05, "loss": 0.9369, "step": 56175 }, { "epoch": 49.760850310008856, "grad_norm": 0.20486044883728027, "learning_rate": 1e-05, "loss": 0.9464, "step": 56180 }, { "epoch": 49.76527900797166, "grad_norm": 0.28276970982551575, "learning_rate": 1e-05, "loss": 0.9499, "step": 56185 }, { "epoch": 49.76970770593446, "grad_norm": 0.23578336834907532, "learning_rate": 1e-05, "loss": 0.9659, "step": 56190 }, { "epoch": 49.77413640389725, "grad_norm": 0.2509406805038452, "learning_rate": 1e-05, "loss": 0.9792, "step": 56195 }, { "epoch": 49.77856510186005, "grad_norm": 0.24274130165576935, "learning_rate": 1e-05, "loss": 0.9696, "step": 56200 }, { "epoch": 49.782993799822854, "grad_norm": 0.24467487633228302, "learning_rate": 1e-05, "loss": 0.9722, "step": 56205 }, { "epoch": 49.78742249778565, "grad_norm": 0.2643640339374542, "learning_rate": 1e-05, "loss": 0.9634, "step": 56210 }, { "epoch": 49.79185119574845, "grad_norm": 0.25002598762512207, "learning_rate": 1e-05, "loss": 1.0051, "step": 56215 }, { "epoch": 49.79627989371125, "grad_norm": 0.2515376806259155, "learning_rate": 1e-05, "loss": 0.9935, "step": 56220 }, { "epoch": 49.800708591674045, "grad_norm": 0.23220328986644745, "learning_rate": 1e-05, "loss": 0.997, "step": 56225 }, { "epoch": 49.805137289636846, "grad_norm": 0.22976285219192505, "learning_rate": 1e-05, "loss": 0.9607, "step": 56230 }, { "epoch": 49.80956598759965, "grad_norm": 0.2217559665441513, "learning_rate": 1e-05, "loss": 0.9461, "step": 56235 }, { "epoch": 49.81399468556244, "grad_norm": 0.249554842710495, "learning_rate": 1e-05, "loss": 0.9622, "step": 56240 }, { "epoch": 49.81842338352524, "grad_norm": 0.2501259446144104, "learning_rate": 1e-05, "loss": 0.9808, "step": 56245 }, { "epoch": 49.822852081488044, "grad_norm": 0.22955256700515747, "learning_rate": 1e-05, "loss": 0.9188, "step": 56250 }, { "epoch": 49.82728077945084, "grad_norm": 0.24748598039150238, "learning_rate": 1e-05, "loss": 1.0176, "step": 56255 }, { "epoch": 49.83170947741364, "grad_norm": 0.23254449665546417, "learning_rate": 1e-05, "loss": 1.0387, "step": 56260 }, { "epoch": 49.83613817537644, "grad_norm": 0.2125118225812912, "learning_rate": 1e-05, "loss": 0.9247, "step": 56265 }, { "epoch": 49.84056687333924, "grad_norm": 0.2508005201816559, "learning_rate": 1e-05, "loss": 0.9764, "step": 56270 }, { "epoch": 49.844995571302036, "grad_norm": 0.24274130165576935, "learning_rate": 1e-05, "loss": 0.9213, "step": 56275 }, { "epoch": 49.84942426926484, "grad_norm": 0.28861716389656067, "learning_rate": 1e-05, "loss": 0.9589, "step": 56280 }, { "epoch": 49.85385296722764, "grad_norm": 0.2913484275341034, "learning_rate": 1e-05, "loss": 0.9537, "step": 56285 }, { "epoch": 49.85828166519043, "grad_norm": 0.2529178559780121, "learning_rate": 1e-05, "loss": 0.9731, "step": 56290 }, { "epoch": 49.862710363153234, "grad_norm": 0.29265162348747253, "learning_rate": 1e-05, "loss": 0.9835, "step": 56295 }, { "epoch": 49.867139061116035, "grad_norm": 0.22390113770961761, "learning_rate": 1e-05, "loss": 0.9878, "step": 56300 }, { "epoch": 49.87156775907883, "grad_norm": 0.2138739824295044, "learning_rate": 1e-05, "loss": 0.9061, "step": 56305 }, { "epoch": 49.87599645704163, "grad_norm": 0.22618485987186432, "learning_rate": 1e-05, "loss": 0.9589, "step": 56310 }, { "epoch": 49.88042515500443, "grad_norm": 0.22366419434547424, "learning_rate": 1e-05, "loss": 0.9338, "step": 56315 }, { "epoch": 49.884853852967225, "grad_norm": 0.25958943367004395, "learning_rate": 1e-05, "loss": 0.9814, "step": 56320 }, { "epoch": 49.88928255093003, "grad_norm": 0.24752451479434967, "learning_rate": 1e-05, "loss": 0.9514, "step": 56325 }, { "epoch": 49.89371124889283, "grad_norm": 0.24582603573799133, "learning_rate": 1e-05, "loss": 0.99, "step": 56330 }, { "epoch": 49.89813994685562, "grad_norm": 0.2090013325214386, "learning_rate": 1e-05, "loss": 0.9462, "step": 56335 }, { "epoch": 49.90256864481842, "grad_norm": 0.216317281126976, "learning_rate": 1e-05, "loss": 0.9372, "step": 56340 }, { "epoch": 49.906997342781224, "grad_norm": 0.20565621554851532, "learning_rate": 1e-05, "loss": 0.9594, "step": 56345 }, { "epoch": 49.91142604074402, "grad_norm": 0.22992201149463654, "learning_rate": 1e-05, "loss": 0.9992, "step": 56350 }, { "epoch": 49.91585473870682, "grad_norm": 0.24856363236904144, "learning_rate": 1e-05, "loss": 0.9717, "step": 56355 }, { "epoch": 49.92028343666962, "grad_norm": 0.24793480336666107, "learning_rate": 1e-05, "loss": 0.9837, "step": 56360 }, { "epoch": 49.924712134632415, "grad_norm": 0.2484944462776184, "learning_rate": 1e-05, "loss": 0.9646, "step": 56365 }, { "epoch": 49.929140832595216, "grad_norm": 0.24044661223888397, "learning_rate": 1e-05, "loss": 0.9724, "step": 56370 }, { "epoch": 49.93356953055802, "grad_norm": 0.2586592733860016, "learning_rate": 1e-05, "loss": 0.9979, "step": 56375 }, { "epoch": 49.93799822852081, "grad_norm": 0.22751368582248688, "learning_rate": 1e-05, "loss": 1.0008, "step": 56380 }, { "epoch": 49.94242692648361, "grad_norm": 0.2624330222606659, "learning_rate": 1e-05, "loss": 1.0404, "step": 56385 }, { "epoch": 49.946855624446414, "grad_norm": 0.23961280286312103, "learning_rate": 1e-05, "loss": 0.9803, "step": 56390 }, { "epoch": 49.951284322409215, "grad_norm": 0.28060203790664673, "learning_rate": 1e-05, "loss": 0.9668, "step": 56395 }, { "epoch": 49.95571302037201, "grad_norm": 0.2486191689968109, "learning_rate": 1e-05, "loss": 1.0099, "step": 56400 }, { "epoch": 49.96014171833481, "grad_norm": 0.22898992896080017, "learning_rate": 1e-05, "loss": 0.9453, "step": 56405 }, { "epoch": 49.96457041629761, "grad_norm": 0.20768101513385773, "learning_rate": 1e-05, "loss": 1.0041, "step": 56410 }, { "epoch": 49.968999114260406, "grad_norm": 0.25775259733200073, "learning_rate": 1e-05, "loss": 0.9888, "step": 56415 }, { "epoch": 49.97342781222321, "grad_norm": 0.2138027846813202, "learning_rate": 1e-05, "loss": 0.9736, "step": 56420 }, { "epoch": 49.97785651018601, "grad_norm": 0.24015085399150848, "learning_rate": 1e-05, "loss": 0.9618, "step": 56425 }, { "epoch": 49.9822852081488, "grad_norm": 0.24268838763237, "learning_rate": 1e-05, "loss": 0.9274, "step": 56430 }, { "epoch": 49.9867139061116, "grad_norm": 0.23679347336292267, "learning_rate": 1e-05, "loss": 0.9925, "step": 56435 }, { "epoch": 49.991142604074405, "grad_norm": 0.2042813003063202, "learning_rate": 1e-05, "loss": 0.9637, "step": 56440 }, { "epoch": 49.9955713020372, "grad_norm": 0.2307308465242386, "learning_rate": 1e-05, "loss": 0.9839, "step": 56445 }, { "epoch": 50.0, "grad_norm": 0.20903560519218445, "learning_rate": 1e-05, "loss": 0.9109, "step": 56450 }, { "epoch": 50.0044286979628, "grad_norm": 0.256826251745224, "learning_rate": 1e-05, "loss": 0.9734, "step": 56455 }, { "epoch": 50.008857395925595, "grad_norm": 0.28415969014167786, "learning_rate": 1e-05, "loss": 0.9556, "step": 56460 }, { "epoch": 50.0132860938884, "grad_norm": 0.24987296760082245, "learning_rate": 1e-05, "loss": 0.9883, "step": 56465 }, { "epoch": 50.0177147918512, "grad_norm": 0.2883746027946472, "learning_rate": 1e-05, "loss": 0.9562, "step": 56470 }, { "epoch": 50.02214348981399, "grad_norm": 0.2436843365430832, "learning_rate": 1e-05, "loss": 0.9994, "step": 56475 }, { "epoch": 50.02657218777679, "grad_norm": 0.25531429052352905, "learning_rate": 1e-05, "loss": 0.9609, "step": 56480 }, { "epoch": 50.031000885739594, "grad_norm": 0.23130466043949127, "learning_rate": 1e-05, "loss": 1.0, "step": 56485 }, { "epoch": 50.03542958370239, "grad_norm": 0.28787776827812195, "learning_rate": 1e-05, "loss": 0.9387, "step": 56490 }, { "epoch": 50.03985828166519, "grad_norm": 0.22041937708854675, "learning_rate": 1e-05, "loss": 0.9747, "step": 56495 }, { "epoch": 50.04428697962799, "grad_norm": 0.22946397960186005, "learning_rate": 1e-05, "loss": 0.9617, "step": 56500 }, { "epoch": 50.048715677590785, "grad_norm": 0.2536393702030182, "learning_rate": 1e-05, "loss": 0.9361, "step": 56505 }, { "epoch": 50.053144375553586, "grad_norm": 0.24152427911758423, "learning_rate": 1e-05, "loss": 0.9273, "step": 56510 }, { "epoch": 50.05757307351639, "grad_norm": 0.2372935265302658, "learning_rate": 1e-05, "loss": 0.9547, "step": 56515 }, { "epoch": 50.06200177147919, "grad_norm": 0.2500895857810974, "learning_rate": 1e-05, "loss": 0.9712, "step": 56520 }, { "epoch": 50.06643046944198, "grad_norm": 0.258232980966568, "learning_rate": 1e-05, "loss": 1.002, "step": 56525 }, { "epoch": 50.070859167404784, "grad_norm": 0.27169135212898254, "learning_rate": 1e-05, "loss": 0.9731, "step": 56530 }, { "epoch": 50.075287865367585, "grad_norm": 0.22759337723255157, "learning_rate": 1e-05, "loss": 0.9256, "step": 56535 }, { "epoch": 50.07971656333038, "grad_norm": 0.2306303232908249, "learning_rate": 1e-05, "loss": 1.0034, "step": 56540 }, { "epoch": 50.08414526129318, "grad_norm": 0.3019965887069702, "learning_rate": 1e-05, "loss": 0.9839, "step": 56545 }, { "epoch": 50.08857395925598, "grad_norm": 0.2953367531299591, "learning_rate": 1e-05, "loss": 0.9194, "step": 56550 }, { "epoch": 50.093002657218776, "grad_norm": 0.22084151208400726, "learning_rate": 1e-05, "loss": 0.9571, "step": 56555 }, { "epoch": 50.09743135518158, "grad_norm": 0.2320505976676941, "learning_rate": 1e-05, "loss": 0.9709, "step": 56560 }, { "epoch": 50.10186005314438, "grad_norm": 0.23217327892780304, "learning_rate": 1e-05, "loss": 1.0213, "step": 56565 }, { "epoch": 50.10628875110717, "grad_norm": 0.21947097778320312, "learning_rate": 1e-05, "loss": 0.9472, "step": 56570 }, { "epoch": 50.11071744906997, "grad_norm": 0.219364732503891, "learning_rate": 1e-05, "loss": 0.9179, "step": 56575 }, { "epoch": 50.115146147032775, "grad_norm": 0.23172561824321747, "learning_rate": 1e-05, "loss": 0.9673, "step": 56580 }, { "epoch": 50.11957484499557, "grad_norm": 0.24873532354831696, "learning_rate": 1e-05, "loss": 0.9758, "step": 56585 }, { "epoch": 50.12400354295837, "grad_norm": 0.2821251451969147, "learning_rate": 1e-05, "loss": 0.9315, "step": 56590 }, { "epoch": 50.12843224092117, "grad_norm": 0.22603997588157654, "learning_rate": 1e-05, "loss": 0.9597, "step": 56595 }, { "epoch": 50.132860938883965, "grad_norm": 0.26354584097862244, "learning_rate": 1e-05, "loss": 0.9426, "step": 56600 }, { "epoch": 50.137289636846766, "grad_norm": 0.22468827664852142, "learning_rate": 1e-05, "loss": 1.0216, "step": 56605 }, { "epoch": 50.14171833480957, "grad_norm": 0.2797124981880188, "learning_rate": 1e-05, "loss": 0.9207, "step": 56610 }, { "epoch": 50.14614703277236, "grad_norm": 0.2565435767173767, "learning_rate": 1e-05, "loss": 0.9639, "step": 56615 }, { "epoch": 50.15057573073516, "grad_norm": 0.25486549735069275, "learning_rate": 1e-05, "loss": 0.971, "step": 56620 }, { "epoch": 50.155004428697964, "grad_norm": 0.22764211893081665, "learning_rate": 1e-05, "loss": 0.9671, "step": 56625 }, { "epoch": 50.15943312666076, "grad_norm": 0.2739883065223694, "learning_rate": 1e-05, "loss": 0.9894, "step": 56630 }, { "epoch": 50.16386182462356, "grad_norm": 0.23255641758441925, "learning_rate": 1e-05, "loss": 0.9592, "step": 56635 }, { "epoch": 50.16829052258636, "grad_norm": 0.28168371319770813, "learning_rate": 1e-05, "loss": 1.0176, "step": 56640 }, { "epoch": 50.17271922054916, "grad_norm": 0.2573017179965973, "learning_rate": 1e-05, "loss": 0.9133, "step": 56645 }, { "epoch": 50.177147918511956, "grad_norm": 0.24071170389652252, "learning_rate": 1e-05, "loss": 0.9966, "step": 56650 }, { "epoch": 50.18157661647476, "grad_norm": 0.27621400356292725, "learning_rate": 1e-05, "loss": 0.9834, "step": 56655 }, { "epoch": 50.18600531443756, "grad_norm": 0.21441973745822906, "learning_rate": 1e-05, "loss": 0.9239, "step": 56660 }, { "epoch": 50.19043401240035, "grad_norm": 0.2881956100463867, "learning_rate": 1e-05, "loss": 0.9442, "step": 56665 }, { "epoch": 50.194862710363154, "grad_norm": 0.24569925665855408, "learning_rate": 1e-05, "loss": 0.9798, "step": 56670 }, { "epoch": 50.199291408325955, "grad_norm": 0.21228636801242828, "learning_rate": 1e-05, "loss": 0.9941, "step": 56675 }, { "epoch": 50.20372010628875, "grad_norm": 0.23929989337921143, "learning_rate": 1e-05, "loss": 0.9726, "step": 56680 }, { "epoch": 50.20814880425155, "grad_norm": 0.24594345688819885, "learning_rate": 1e-05, "loss": 0.9336, "step": 56685 }, { "epoch": 50.21257750221435, "grad_norm": 0.23644809424877167, "learning_rate": 1e-05, "loss": 0.9806, "step": 56690 }, { "epoch": 50.217006200177146, "grad_norm": 0.23983396589756012, "learning_rate": 1e-05, "loss": 0.9711, "step": 56695 }, { "epoch": 50.22143489813995, "grad_norm": 0.2325279265642166, "learning_rate": 1e-05, "loss": 0.9679, "step": 56700 }, { "epoch": 50.22586359610275, "grad_norm": 0.24303388595581055, "learning_rate": 1e-05, "loss": 1.01, "step": 56705 }, { "epoch": 50.23029229406554, "grad_norm": 0.22000473737716675, "learning_rate": 1e-05, "loss": 0.9821, "step": 56710 }, { "epoch": 50.23472099202834, "grad_norm": 0.29801854491233826, "learning_rate": 1e-05, "loss": 0.965, "step": 56715 }, { "epoch": 50.239149689991144, "grad_norm": 0.1950961798429489, "learning_rate": 1e-05, "loss": 0.9095, "step": 56720 }, { "epoch": 50.24357838795394, "grad_norm": 0.2332957237958908, "learning_rate": 1e-05, "loss": 0.9686, "step": 56725 }, { "epoch": 50.24800708591674, "grad_norm": 0.24296385049819946, "learning_rate": 1e-05, "loss": 0.962, "step": 56730 }, { "epoch": 50.25243578387954, "grad_norm": 0.21270343661308289, "learning_rate": 1e-05, "loss": 0.9513, "step": 56735 }, { "epoch": 50.256864481842335, "grad_norm": 0.2131119966506958, "learning_rate": 1e-05, "loss": 0.9859, "step": 56740 }, { "epoch": 50.261293179805136, "grad_norm": 0.244968980550766, "learning_rate": 1e-05, "loss": 0.9908, "step": 56745 }, { "epoch": 50.26572187776794, "grad_norm": 0.23098643124103546, "learning_rate": 1e-05, "loss": 1.0405, "step": 56750 }, { "epoch": 50.27015057573073, "grad_norm": 0.28778502345085144, "learning_rate": 1e-05, "loss": 0.9649, "step": 56755 }, { "epoch": 50.27457927369353, "grad_norm": 0.2344450056552887, "learning_rate": 1e-05, "loss": 0.9694, "step": 56760 }, { "epoch": 50.279007971656334, "grad_norm": 0.2548731565475464, "learning_rate": 1e-05, "loss": 0.9111, "step": 56765 }, { "epoch": 50.283436669619135, "grad_norm": 0.26159363985061646, "learning_rate": 1e-05, "loss": 1.0261, "step": 56770 }, { "epoch": 50.28786536758193, "grad_norm": 0.2169611006975174, "learning_rate": 1e-05, "loss": 1.0249, "step": 56775 }, { "epoch": 50.29229406554473, "grad_norm": 0.2346806526184082, "learning_rate": 1e-05, "loss": 0.9713, "step": 56780 }, { "epoch": 50.29672276350753, "grad_norm": 0.22332145273685455, "learning_rate": 1e-05, "loss": 0.9563, "step": 56785 }, { "epoch": 50.301151461470326, "grad_norm": 0.24709761142730713, "learning_rate": 1e-05, "loss": 0.9978, "step": 56790 }, { "epoch": 50.30558015943313, "grad_norm": 0.25908616185188293, "learning_rate": 1e-05, "loss": 0.9895, "step": 56795 }, { "epoch": 50.31000885739593, "grad_norm": 0.2909523546695709, "learning_rate": 1e-05, "loss": 1.0034, "step": 56800 }, { "epoch": 50.31443755535872, "grad_norm": 0.2912805378437042, "learning_rate": 1e-05, "loss": 0.9285, "step": 56805 }, { "epoch": 50.318866253321524, "grad_norm": 0.26910462975502014, "learning_rate": 1e-05, "loss": 0.9516, "step": 56810 }, { "epoch": 50.323294951284325, "grad_norm": 0.3363611102104187, "learning_rate": 1e-05, "loss": 0.9788, "step": 56815 }, { "epoch": 50.32772364924712, "grad_norm": 0.24463380873203278, "learning_rate": 1e-05, "loss": 0.9438, "step": 56820 }, { "epoch": 50.33215234720992, "grad_norm": 0.19113120436668396, "learning_rate": 1e-05, "loss": 0.9328, "step": 56825 }, { "epoch": 50.33658104517272, "grad_norm": 0.2420957237482071, "learning_rate": 1e-05, "loss": 0.9602, "step": 56830 }, { "epoch": 50.341009743135515, "grad_norm": 0.2199445515871048, "learning_rate": 1e-05, "loss": 0.9692, "step": 56835 }, { "epoch": 50.34543844109832, "grad_norm": 0.2994040250778198, "learning_rate": 1e-05, "loss": 1.0082, "step": 56840 }, { "epoch": 50.34986713906112, "grad_norm": 0.22258982062339783, "learning_rate": 1e-05, "loss": 0.9889, "step": 56845 }, { "epoch": 50.35429583702391, "grad_norm": 0.2610372006893158, "learning_rate": 1e-05, "loss": 0.9771, "step": 56850 }, { "epoch": 50.35872453498671, "grad_norm": 0.28715774416923523, "learning_rate": 1e-05, "loss": 0.9847, "step": 56855 }, { "epoch": 50.363153232949514, "grad_norm": 0.25883904099464417, "learning_rate": 1e-05, "loss": 0.9646, "step": 56860 }, { "epoch": 50.36758193091231, "grad_norm": 0.24693803489208221, "learning_rate": 1e-05, "loss": 0.9966, "step": 56865 }, { "epoch": 50.37201062887511, "grad_norm": 0.21784591674804688, "learning_rate": 1e-05, "loss": 0.9395, "step": 56870 }, { "epoch": 50.37643932683791, "grad_norm": 0.22341829538345337, "learning_rate": 1e-05, "loss": 0.9495, "step": 56875 }, { "epoch": 50.380868024800705, "grad_norm": 0.23047322034835815, "learning_rate": 1e-05, "loss": 0.9912, "step": 56880 }, { "epoch": 50.385296722763506, "grad_norm": 0.20648445188999176, "learning_rate": 1e-05, "loss": 0.9783, "step": 56885 }, { "epoch": 50.38972542072631, "grad_norm": 0.21252861618995667, "learning_rate": 1e-05, "loss": 0.9476, "step": 56890 }, { "epoch": 50.39415411868911, "grad_norm": 0.22594702243804932, "learning_rate": 1e-05, "loss": 0.9596, "step": 56895 }, { "epoch": 50.3985828166519, "grad_norm": 0.23270262777805328, "learning_rate": 1e-05, "loss": 0.9945, "step": 56900 }, { "epoch": 50.403011514614704, "grad_norm": 0.2160016894340515, "learning_rate": 1e-05, "loss": 0.9543, "step": 56905 }, { "epoch": 50.407440212577505, "grad_norm": 0.24679569900035858, "learning_rate": 1e-05, "loss": 0.9685, "step": 56910 }, { "epoch": 50.4118689105403, "grad_norm": 0.2292606085538864, "learning_rate": 1e-05, "loss": 0.9799, "step": 56915 }, { "epoch": 50.4162976085031, "grad_norm": 0.2807747423648834, "learning_rate": 1e-05, "loss": 0.9195, "step": 56920 }, { "epoch": 50.4207263064659, "grad_norm": 0.23891091346740723, "learning_rate": 1e-05, "loss": 0.9892, "step": 56925 }, { "epoch": 50.425155004428696, "grad_norm": 0.24489007890224457, "learning_rate": 1e-05, "loss": 0.9184, "step": 56930 }, { "epoch": 50.4295837023915, "grad_norm": 0.2609945237636566, "learning_rate": 1e-05, "loss": 0.9881, "step": 56935 }, { "epoch": 50.4340124003543, "grad_norm": 0.2422347366809845, "learning_rate": 1e-05, "loss": 0.9858, "step": 56940 }, { "epoch": 50.43844109831709, "grad_norm": 0.2711182236671448, "learning_rate": 1e-05, "loss": 0.9788, "step": 56945 }, { "epoch": 50.44286979627989, "grad_norm": 0.23269061744213104, "learning_rate": 1e-05, "loss": 0.9823, "step": 56950 }, { "epoch": 50.447298494242695, "grad_norm": 0.20936839282512665, "learning_rate": 1e-05, "loss": 0.9774, "step": 56955 }, { "epoch": 50.45172719220549, "grad_norm": 0.2907756567001343, "learning_rate": 1e-05, "loss": 0.9681, "step": 56960 }, { "epoch": 50.45615589016829, "grad_norm": 0.2308727502822876, "learning_rate": 1e-05, "loss": 0.9813, "step": 56965 }, { "epoch": 50.46058458813109, "grad_norm": 0.28089532256126404, "learning_rate": 1e-05, "loss": 1.0139, "step": 56970 }, { "epoch": 50.465013286093885, "grad_norm": 0.25694093108177185, "learning_rate": 1e-05, "loss": 0.9802, "step": 56975 }, { "epoch": 50.46944198405669, "grad_norm": 0.21947243809700012, "learning_rate": 1e-05, "loss": 0.9839, "step": 56980 }, { "epoch": 50.47387068201949, "grad_norm": 0.21457254886627197, "learning_rate": 1e-05, "loss": 0.9343, "step": 56985 }, { "epoch": 50.47829937998228, "grad_norm": 0.2480718046426773, "learning_rate": 1e-05, "loss": 1.0227, "step": 56990 }, { "epoch": 50.48272807794508, "grad_norm": 0.2229345291852951, "learning_rate": 1e-05, "loss": 0.8955, "step": 56995 }, { "epoch": 50.487156775907884, "grad_norm": 0.2067663073539734, "learning_rate": 1e-05, "loss": 0.9929, "step": 57000 }, { "epoch": 50.491585473870686, "grad_norm": 0.2179500013589859, "learning_rate": 1e-05, "loss": 0.9986, "step": 57005 }, { "epoch": 50.49601417183348, "grad_norm": 0.21371547877788544, "learning_rate": 1e-05, "loss": 0.9592, "step": 57010 }, { "epoch": 50.50044286979628, "grad_norm": 0.2740658223628998, "learning_rate": 1e-05, "loss": 0.9763, "step": 57015 }, { "epoch": 50.50487156775908, "grad_norm": 0.2660622298717499, "learning_rate": 1e-05, "loss": 1.0206, "step": 57020 }, { "epoch": 50.509300265721876, "grad_norm": 0.2441960722208023, "learning_rate": 1e-05, "loss": 0.9165, "step": 57025 }, { "epoch": 50.51372896368468, "grad_norm": 0.24387258291244507, "learning_rate": 1e-05, "loss": 1.0255, "step": 57030 }, { "epoch": 50.51815766164748, "grad_norm": 0.24928133189678192, "learning_rate": 1e-05, "loss": 0.924, "step": 57035 }, { "epoch": 50.52258635961027, "grad_norm": 0.2652866840362549, "learning_rate": 1e-05, "loss": 0.9144, "step": 57040 }, { "epoch": 50.527015057573074, "grad_norm": 0.19332289695739746, "learning_rate": 1e-05, "loss": 0.9533, "step": 57045 }, { "epoch": 50.531443755535875, "grad_norm": 0.20250988006591797, "learning_rate": 1e-05, "loss": 0.9769, "step": 57050 }, { "epoch": 50.53587245349867, "grad_norm": 0.23639582097530365, "learning_rate": 1e-05, "loss": 1.0186, "step": 57055 }, { "epoch": 50.54030115146147, "grad_norm": 0.26621049642562866, "learning_rate": 1e-05, "loss": 0.9635, "step": 57060 }, { "epoch": 50.54472984942427, "grad_norm": 0.2117260992527008, "learning_rate": 1e-05, "loss": 0.9829, "step": 57065 }, { "epoch": 50.549158547387066, "grad_norm": 0.1955445408821106, "learning_rate": 1e-05, "loss": 0.997, "step": 57070 }, { "epoch": 50.55358724534987, "grad_norm": 0.2266274094581604, "learning_rate": 1e-05, "loss": 0.9723, "step": 57075 }, { "epoch": 50.55801594331267, "grad_norm": 0.23665519058704376, "learning_rate": 1e-05, "loss": 0.9356, "step": 57080 }, { "epoch": 50.56244464127546, "grad_norm": 0.260951966047287, "learning_rate": 1e-05, "loss": 1.0097, "step": 57085 }, { "epoch": 50.56687333923826, "grad_norm": 0.2310333549976349, "learning_rate": 1e-05, "loss": 0.9732, "step": 57090 }, { "epoch": 50.571302037201065, "grad_norm": 0.26228269934654236, "learning_rate": 1e-05, "loss": 0.9587, "step": 57095 }, { "epoch": 50.57573073516386, "grad_norm": 0.22783657908439636, "learning_rate": 1e-05, "loss": 0.9785, "step": 57100 }, { "epoch": 50.58015943312666, "grad_norm": 0.21104858815670013, "learning_rate": 1e-05, "loss": 0.9523, "step": 57105 }, { "epoch": 50.58458813108946, "grad_norm": 0.21797597408294678, "learning_rate": 1e-05, "loss": 0.9808, "step": 57110 }, { "epoch": 50.589016829052255, "grad_norm": 0.29294660687446594, "learning_rate": 1e-05, "loss": 0.9459, "step": 57115 }, { "epoch": 50.59344552701506, "grad_norm": 0.2762058973312378, "learning_rate": 1e-05, "loss": 1.0029, "step": 57120 }, { "epoch": 50.59787422497786, "grad_norm": 0.3349522054195404, "learning_rate": 1e-05, "loss": 0.9439, "step": 57125 }, { "epoch": 50.60230292294066, "grad_norm": 0.25857076048851013, "learning_rate": 1e-05, "loss": 0.9636, "step": 57130 }, { "epoch": 50.60673162090345, "grad_norm": 0.3034328520298004, "learning_rate": 1e-05, "loss": 1.0072, "step": 57135 }, { "epoch": 50.611160318866254, "grad_norm": 0.21597638726234436, "learning_rate": 1e-05, "loss": 1.0001, "step": 57140 }, { "epoch": 50.615589016829055, "grad_norm": 0.2535270154476166, "learning_rate": 1e-05, "loss": 1.0025, "step": 57145 }, { "epoch": 50.62001771479185, "grad_norm": 0.26540327072143555, "learning_rate": 1e-05, "loss": 1.0073, "step": 57150 }, { "epoch": 50.62444641275465, "grad_norm": 0.2667843997478485, "learning_rate": 1e-05, "loss": 1.0145, "step": 57155 }, { "epoch": 50.62887511071745, "grad_norm": 0.24924424290657043, "learning_rate": 1e-05, "loss": 0.9645, "step": 57160 }, { "epoch": 50.633303808680246, "grad_norm": 0.22557270526885986, "learning_rate": 1e-05, "loss": 0.9585, "step": 57165 }, { "epoch": 50.63773250664305, "grad_norm": 0.2317129224538803, "learning_rate": 1e-05, "loss": 0.975, "step": 57170 }, { "epoch": 50.64216120460585, "grad_norm": 0.22792044281959534, "learning_rate": 1e-05, "loss": 0.9604, "step": 57175 }, { "epoch": 50.64658990256864, "grad_norm": 0.2907612919807434, "learning_rate": 1e-05, "loss": 1.0099, "step": 57180 }, { "epoch": 50.651018600531444, "grad_norm": 0.25826042890548706, "learning_rate": 1e-05, "loss": 0.9919, "step": 57185 }, { "epoch": 50.655447298494245, "grad_norm": 0.23000043630599976, "learning_rate": 1e-05, "loss": 0.9054, "step": 57190 }, { "epoch": 50.65987599645704, "grad_norm": 0.21878407895565033, "learning_rate": 1e-05, "loss": 0.9315, "step": 57195 }, { "epoch": 50.66430469441984, "grad_norm": 0.2538566589355469, "learning_rate": 1e-05, "loss": 0.9317, "step": 57200 }, { "epoch": 50.66873339238264, "grad_norm": 0.2425651103258133, "learning_rate": 1e-05, "loss": 1.0058, "step": 57205 }, { "epoch": 50.673162090345436, "grad_norm": 0.23855812847614288, "learning_rate": 1e-05, "loss": 0.9235, "step": 57210 }, { "epoch": 50.67759078830824, "grad_norm": 0.2407747507095337, "learning_rate": 1e-05, "loss": 0.9395, "step": 57215 }, { "epoch": 50.68201948627104, "grad_norm": 0.23114387691020966, "learning_rate": 1e-05, "loss": 0.9894, "step": 57220 }, { "epoch": 50.68644818423383, "grad_norm": 0.24291351437568665, "learning_rate": 1e-05, "loss": 0.9723, "step": 57225 }, { "epoch": 50.69087688219663, "grad_norm": 0.21679618954658508, "learning_rate": 1e-05, "loss": 0.9197, "step": 57230 }, { "epoch": 50.695305580159435, "grad_norm": 0.2194240540266037, "learning_rate": 1e-05, "loss": 0.9862, "step": 57235 }, { "epoch": 50.69973427812223, "grad_norm": 0.2436395138502121, "learning_rate": 1e-05, "loss": 0.9729, "step": 57240 }, { "epoch": 50.70416297608503, "grad_norm": 0.26533743739128113, "learning_rate": 1e-05, "loss": 1.034, "step": 57245 }, { "epoch": 50.70859167404783, "grad_norm": 0.24759019911289215, "learning_rate": 1e-05, "loss": 0.9779, "step": 57250 }, { "epoch": 50.71302037201063, "grad_norm": 0.2354304939508438, "learning_rate": 1e-05, "loss": 0.997, "step": 57255 }, { "epoch": 50.717449069973426, "grad_norm": 0.23703746497631073, "learning_rate": 1e-05, "loss": 0.9455, "step": 57260 }, { "epoch": 50.72187776793623, "grad_norm": 0.23415827751159668, "learning_rate": 1e-05, "loss": 0.9517, "step": 57265 }, { "epoch": 50.72630646589903, "grad_norm": 0.22479312121868134, "learning_rate": 1e-05, "loss": 0.9959, "step": 57270 }, { "epoch": 50.73073516386182, "grad_norm": 0.2348138391971588, "learning_rate": 1e-05, "loss": 0.9662, "step": 57275 }, { "epoch": 50.735163861824624, "grad_norm": 0.2475343644618988, "learning_rate": 1e-05, "loss": 1.0065, "step": 57280 }, { "epoch": 50.739592559787425, "grad_norm": 0.222883939743042, "learning_rate": 1e-05, "loss": 1.0705, "step": 57285 }, { "epoch": 50.74402125775022, "grad_norm": 0.25917381048202515, "learning_rate": 1e-05, "loss": 0.8941, "step": 57290 }, { "epoch": 50.74844995571302, "grad_norm": 0.21381698548793793, "learning_rate": 1e-05, "loss": 1.0025, "step": 57295 }, { "epoch": 50.75287865367582, "grad_norm": 0.3045254349708557, "learning_rate": 1e-05, "loss": 0.9605, "step": 57300 }, { "epoch": 50.757307351638616, "grad_norm": 0.2285705804824829, "learning_rate": 1e-05, "loss": 0.9928, "step": 57305 }, { "epoch": 50.76173604960142, "grad_norm": 0.28514525294303894, "learning_rate": 1e-05, "loss": 0.9681, "step": 57310 }, { "epoch": 50.76616474756422, "grad_norm": 0.2468743771314621, "learning_rate": 1e-05, "loss": 0.9444, "step": 57315 }, { "epoch": 50.77059344552701, "grad_norm": 0.26387229561805725, "learning_rate": 1e-05, "loss": 0.9328, "step": 57320 }, { "epoch": 50.775022143489814, "grad_norm": 0.29074862599372864, "learning_rate": 1e-05, "loss": 0.9259, "step": 57325 }, { "epoch": 50.779450841452615, "grad_norm": 0.2334967404603958, "learning_rate": 1e-05, "loss": 0.9855, "step": 57330 }, { "epoch": 50.78387953941541, "grad_norm": 0.2385290414094925, "learning_rate": 1e-05, "loss": 0.9747, "step": 57335 }, { "epoch": 50.78830823737821, "grad_norm": 0.24056480824947357, "learning_rate": 1e-05, "loss": 0.966, "step": 57340 }, { "epoch": 50.79273693534101, "grad_norm": 0.25320765376091003, "learning_rate": 1e-05, "loss": 0.956, "step": 57345 }, { "epoch": 50.797165633303806, "grad_norm": 0.2529081404209137, "learning_rate": 1e-05, "loss": 0.9243, "step": 57350 }, { "epoch": 50.80159433126661, "grad_norm": 0.25356802344322205, "learning_rate": 1e-05, "loss": 0.9834, "step": 57355 }, { "epoch": 50.80602302922941, "grad_norm": 0.23200450837612152, "learning_rate": 1e-05, "loss": 0.9838, "step": 57360 }, { "epoch": 50.8104517271922, "grad_norm": 0.2691291868686676, "learning_rate": 1e-05, "loss": 1.0133, "step": 57365 }, { "epoch": 50.814880425155, "grad_norm": 0.22183488309383392, "learning_rate": 1e-05, "loss": 0.9799, "step": 57370 }, { "epoch": 50.819309123117804, "grad_norm": 0.2938096523284912, "learning_rate": 1e-05, "loss": 1.0272, "step": 57375 }, { "epoch": 50.823737821080606, "grad_norm": 0.20272104442119598, "learning_rate": 1e-05, "loss": 0.981, "step": 57380 }, { "epoch": 50.8281665190434, "grad_norm": 0.24421222507953644, "learning_rate": 1e-05, "loss": 0.9575, "step": 57385 }, { "epoch": 50.8325952170062, "grad_norm": 0.25468650460243225, "learning_rate": 1e-05, "loss": 1.0416, "step": 57390 }, { "epoch": 50.837023914969, "grad_norm": 0.21876370906829834, "learning_rate": 1e-05, "loss": 0.9244, "step": 57395 }, { "epoch": 50.841452612931796, "grad_norm": 0.27346208691596985, "learning_rate": 1e-05, "loss": 0.9552, "step": 57400 }, { "epoch": 50.8458813108946, "grad_norm": 0.26185932755470276, "learning_rate": 1e-05, "loss": 0.9746, "step": 57405 }, { "epoch": 50.8503100088574, "grad_norm": 0.28568413853645325, "learning_rate": 1e-05, "loss": 0.9654, "step": 57410 }, { "epoch": 50.85473870682019, "grad_norm": 0.22601120173931122, "learning_rate": 1e-05, "loss": 0.99, "step": 57415 }, { "epoch": 50.859167404782994, "grad_norm": 0.28095149993896484, "learning_rate": 1e-05, "loss": 0.9435, "step": 57420 }, { "epoch": 50.863596102745795, "grad_norm": 0.26968300342559814, "learning_rate": 1e-05, "loss": 0.95, "step": 57425 }, { "epoch": 50.86802480070859, "grad_norm": 0.27069154381752014, "learning_rate": 1e-05, "loss": 0.971, "step": 57430 }, { "epoch": 50.87245349867139, "grad_norm": 0.21626657247543335, "learning_rate": 1e-05, "loss": 0.9582, "step": 57435 }, { "epoch": 50.87688219663419, "grad_norm": 0.2365463227033615, "learning_rate": 1e-05, "loss": 0.9986, "step": 57440 }, { "epoch": 50.881310894596986, "grad_norm": 0.2627531886100769, "learning_rate": 1e-05, "loss": 0.9769, "step": 57445 }, { "epoch": 50.88573959255979, "grad_norm": 0.24699030816555023, "learning_rate": 1e-05, "loss": 0.9467, "step": 57450 }, { "epoch": 50.89016829052259, "grad_norm": 0.24027204513549805, "learning_rate": 1e-05, "loss": 0.9018, "step": 57455 }, { "epoch": 50.89459698848538, "grad_norm": 0.22851277887821198, "learning_rate": 1e-05, "loss": 0.9284, "step": 57460 }, { "epoch": 50.899025686448184, "grad_norm": 0.25088736414909363, "learning_rate": 1e-05, "loss": 0.9744, "step": 57465 }, { "epoch": 50.903454384410985, "grad_norm": 0.22899356484413147, "learning_rate": 1e-05, "loss": 0.959, "step": 57470 }, { "epoch": 50.90788308237378, "grad_norm": 0.22089873254299164, "learning_rate": 1e-05, "loss": 0.9714, "step": 57475 }, { "epoch": 50.91231178033658, "grad_norm": 0.2423372119665146, "learning_rate": 1e-05, "loss": 1.0039, "step": 57480 }, { "epoch": 50.91674047829938, "grad_norm": 0.2189023792743683, "learning_rate": 1e-05, "loss": 1.0067, "step": 57485 }, { "epoch": 50.921169176262175, "grad_norm": 0.2416541427373886, "learning_rate": 1e-05, "loss": 0.9619, "step": 57490 }, { "epoch": 50.92559787422498, "grad_norm": 0.2738267481327057, "learning_rate": 1e-05, "loss": 0.9585, "step": 57495 }, { "epoch": 50.93002657218778, "grad_norm": 0.233888640999794, "learning_rate": 1e-05, "loss": 0.9808, "step": 57500 }, { "epoch": 50.93445527015058, "grad_norm": 0.22839553654193878, "learning_rate": 1e-05, "loss": 1.005, "step": 57505 }, { "epoch": 50.93888396811337, "grad_norm": 0.2798800468444824, "learning_rate": 1e-05, "loss": 0.9462, "step": 57510 }, { "epoch": 50.943312666076174, "grad_norm": 0.20363402366638184, "learning_rate": 1e-05, "loss": 0.9537, "step": 57515 }, { "epoch": 50.947741364038976, "grad_norm": 0.27506139874458313, "learning_rate": 1e-05, "loss": 0.9894, "step": 57520 }, { "epoch": 50.95217006200177, "grad_norm": 0.2358459234237671, "learning_rate": 1e-05, "loss": 1.0208, "step": 57525 }, { "epoch": 50.95659875996457, "grad_norm": 0.22368596494197845, "learning_rate": 1e-05, "loss": 1.0072, "step": 57530 }, { "epoch": 50.96102745792737, "grad_norm": 0.26471343636512756, "learning_rate": 1e-05, "loss": 0.9928, "step": 57535 }, { "epoch": 50.965456155890166, "grad_norm": 0.24759797751903534, "learning_rate": 1e-05, "loss": 0.9689, "step": 57540 }, { "epoch": 50.96988485385297, "grad_norm": 0.21278832852840424, "learning_rate": 1e-05, "loss": 0.9742, "step": 57545 }, { "epoch": 50.97431355181577, "grad_norm": 0.22913242876529694, "learning_rate": 1e-05, "loss": 0.9868, "step": 57550 }, { "epoch": 50.97874224977856, "grad_norm": 0.28342562913894653, "learning_rate": 1e-05, "loss": 0.9382, "step": 57555 }, { "epoch": 50.983170947741364, "grad_norm": 0.2534969747066498, "learning_rate": 1e-05, "loss": 0.9759, "step": 57560 }, { "epoch": 50.987599645704165, "grad_norm": 0.3334161639213562, "learning_rate": 1e-05, "loss": 0.9988, "step": 57565 }, { "epoch": 50.99202834366696, "grad_norm": 0.24183982610702515, "learning_rate": 1e-05, "loss": 1.0285, "step": 57570 }, { "epoch": 50.99645704162976, "grad_norm": 0.23501867055892944, "learning_rate": 1e-05, "loss": 0.9546, "step": 57575 }, { "epoch": 51.00088573959256, "grad_norm": 0.2851235866546631, "learning_rate": 1e-05, "loss": 0.9409, "step": 57580 }, { "epoch": 51.005314437555356, "grad_norm": 0.21883146464824677, "learning_rate": 1e-05, "loss": 1.0316, "step": 57585 }, { "epoch": 51.00974313551816, "grad_norm": 0.2711237370967865, "learning_rate": 1e-05, "loss": 0.9581, "step": 57590 }, { "epoch": 51.01417183348096, "grad_norm": 0.22978302836418152, "learning_rate": 1e-05, "loss": 0.9915, "step": 57595 }, { "epoch": 51.01860053144375, "grad_norm": 0.21813155710697174, "learning_rate": 1e-05, "loss": 1.0094, "step": 57600 }, { "epoch": 51.02302922940655, "grad_norm": 0.2518755793571472, "learning_rate": 1e-05, "loss": 0.976, "step": 57605 }, { "epoch": 51.027457927369355, "grad_norm": 0.22469745576381683, "learning_rate": 1e-05, "loss": 0.9986, "step": 57610 }, { "epoch": 51.03188662533215, "grad_norm": 0.27151429653167725, "learning_rate": 1e-05, "loss": 0.971, "step": 57615 }, { "epoch": 51.03631532329495, "grad_norm": 0.2737997770309448, "learning_rate": 1e-05, "loss": 1.0624, "step": 57620 }, { "epoch": 51.04074402125775, "grad_norm": 0.21020102500915527, "learning_rate": 1e-05, "loss": 0.9175, "step": 57625 }, { "epoch": 51.04517271922055, "grad_norm": 0.2312561571598053, "learning_rate": 1e-05, "loss": 0.9977, "step": 57630 }, { "epoch": 51.04960141718335, "grad_norm": 0.2422817200422287, "learning_rate": 1e-05, "loss": 0.9696, "step": 57635 }, { "epoch": 51.05403011514615, "grad_norm": 0.21907754242420197, "learning_rate": 1e-05, "loss": 0.9747, "step": 57640 }, { "epoch": 51.05845881310895, "grad_norm": 0.2058318704366684, "learning_rate": 1e-05, "loss": 0.9864, "step": 57645 }, { "epoch": 51.06288751107174, "grad_norm": 0.24165132641792297, "learning_rate": 1e-05, "loss": 0.9841, "step": 57650 }, { "epoch": 51.067316209034544, "grad_norm": 0.2245476394891739, "learning_rate": 1e-05, "loss": 0.9428, "step": 57655 }, { "epoch": 51.071744906997345, "grad_norm": 0.21088151633739471, "learning_rate": 1e-05, "loss": 0.9917, "step": 57660 }, { "epoch": 51.07617360496014, "grad_norm": 0.2458590865135193, "learning_rate": 1e-05, "loss": 0.9597, "step": 57665 }, { "epoch": 51.08060230292294, "grad_norm": 0.2767459750175476, "learning_rate": 1e-05, "loss": 1.0048, "step": 57670 }, { "epoch": 51.08503100088574, "grad_norm": 0.22455215454101562, "learning_rate": 1e-05, "loss": 0.9918, "step": 57675 }, { "epoch": 51.089459698848536, "grad_norm": 0.2461453229188919, "learning_rate": 1e-05, "loss": 1.0039, "step": 57680 }, { "epoch": 51.09388839681134, "grad_norm": 0.20260339975357056, "learning_rate": 1e-05, "loss": 0.9885, "step": 57685 }, { "epoch": 51.09831709477414, "grad_norm": 0.2502760589122772, "learning_rate": 1e-05, "loss": 0.9046, "step": 57690 }, { "epoch": 51.10274579273693, "grad_norm": 0.2781901955604553, "learning_rate": 1e-05, "loss": 0.985, "step": 57695 }, { "epoch": 51.107174490699734, "grad_norm": 0.22900964319705963, "learning_rate": 1e-05, "loss": 0.9998, "step": 57700 }, { "epoch": 51.111603188662535, "grad_norm": 0.22861196100711823, "learning_rate": 1e-05, "loss": 0.9454, "step": 57705 }, { "epoch": 51.11603188662533, "grad_norm": 0.30818867683410645, "learning_rate": 1e-05, "loss": 0.9656, "step": 57710 }, { "epoch": 51.12046058458813, "grad_norm": 0.2083790898323059, "learning_rate": 1e-05, "loss": 0.9683, "step": 57715 }, { "epoch": 51.12488928255093, "grad_norm": 0.2377614825963974, "learning_rate": 1e-05, "loss": 0.9665, "step": 57720 }, { "epoch": 51.129317980513726, "grad_norm": 0.2846793830394745, "learning_rate": 1e-05, "loss": 0.9449, "step": 57725 }, { "epoch": 51.13374667847653, "grad_norm": 0.284907728433609, "learning_rate": 1e-05, "loss": 0.9415, "step": 57730 }, { "epoch": 51.13817537643933, "grad_norm": 0.2716125547885895, "learning_rate": 1e-05, "loss": 0.9296, "step": 57735 }, { "epoch": 51.14260407440213, "grad_norm": 0.25886595249176025, "learning_rate": 1e-05, "loss": 0.9766, "step": 57740 }, { "epoch": 51.14703277236492, "grad_norm": 0.29313427209854126, "learning_rate": 1e-05, "loss": 0.9547, "step": 57745 }, { "epoch": 51.151461470327725, "grad_norm": 0.2650250196456909, "learning_rate": 1e-05, "loss": 0.9833, "step": 57750 }, { "epoch": 51.155890168290526, "grad_norm": 0.25280410051345825, "learning_rate": 1e-05, "loss": 0.9556, "step": 57755 }, { "epoch": 51.16031886625332, "grad_norm": 0.2823229730129242, "learning_rate": 1e-05, "loss": 0.9509, "step": 57760 }, { "epoch": 51.16474756421612, "grad_norm": 0.2749771773815155, "learning_rate": 1e-05, "loss": 1.0363, "step": 57765 }, { "epoch": 51.16917626217892, "grad_norm": 0.26156389713287354, "learning_rate": 1e-05, "loss": 0.9718, "step": 57770 }, { "epoch": 51.173604960141716, "grad_norm": 0.23654894530773163, "learning_rate": 1e-05, "loss": 1.0199, "step": 57775 }, { "epoch": 51.17803365810452, "grad_norm": 0.2652944028377533, "learning_rate": 1e-05, "loss": 0.9951, "step": 57780 }, { "epoch": 51.18246235606732, "grad_norm": 0.2423141747713089, "learning_rate": 1e-05, "loss": 0.9352, "step": 57785 }, { "epoch": 51.18689105403011, "grad_norm": 0.25852829217910767, "learning_rate": 1e-05, "loss": 0.9585, "step": 57790 }, { "epoch": 51.191319751992914, "grad_norm": 0.22827711701393127, "learning_rate": 1e-05, "loss": 0.9797, "step": 57795 }, { "epoch": 51.195748449955715, "grad_norm": 0.2187909483909607, "learning_rate": 1e-05, "loss": 0.9933, "step": 57800 }, { "epoch": 51.20017714791851, "grad_norm": 0.2305816113948822, "learning_rate": 1e-05, "loss": 0.9864, "step": 57805 }, { "epoch": 51.20460584588131, "grad_norm": 0.24282309412956238, "learning_rate": 1e-05, "loss": 0.9973, "step": 57810 }, { "epoch": 51.20903454384411, "grad_norm": 0.21139930188655853, "learning_rate": 1e-05, "loss": 0.9923, "step": 57815 }, { "epoch": 51.213463241806906, "grad_norm": 0.2523021101951599, "learning_rate": 1e-05, "loss": 0.9717, "step": 57820 }, { "epoch": 51.21789193976971, "grad_norm": 0.24135370552539825, "learning_rate": 1e-05, "loss": 0.9759, "step": 57825 }, { "epoch": 51.22232063773251, "grad_norm": 0.27971288561820984, "learning_rate": 1e-05, "loss": 0.9963, "step": 57830 }, { "epoch": 51.2267493356953, "grad_norm": 0.222804456949234, "learning_rate": 1e-05, "loss": 0.968, "step": 57835 }, { "epoch": 51.231178033658104, "grad_norm": 0.20857460796833038, "learning_rate": 1e-05, "loss": 0.9956, "step": 57840 }, { "epoch": 51.235606731620905, "grad_norm": 0.228929802775383, "learning_rate": 1e-05, "loss": 0.9704, "step": 57845 }, { "epoch": 51.2400354295837, "grad_norm": 0.2285294383764267, "learning_rate": 1e-05, "loss": 0.9863, "step": 57850 }, { "epoch": 51.2444641275465, "grad_norm": 0.2479756772518158, "learning_rate": 1e-05, "loss": 0.9747, "step": 57855 }, { "epoch": 51.2488928255093, "grad_norm": 0.24195349216461182, "learning_rate": 1e-05, "loss": 1.0197, "step": 57860 }, { "epoch": 51.2533215234721, "grad_norm": 0.24460451304912567, "learning_rate": 1e-05, "loss": 0.9476, "step": 57865 }, { "epoch": 51.2577502214349, "grad_norm": 0.23649516701698303, "learning_rate": 1e-05, "loss": 1.0006, "step": 57870 }, { "epoch": 51.2621789193977, "grad_norm": 0.2504752278327942, "learning_rate": 1e-05, "loss": 0.9777, "step": 57875 }, { "epoch": 51.2666076173605, "grad_norm": 0.30173248052597046, "learning_rate": 1e-05, "loss": 0.9417, "step": 57880 }, { "epoch": 51.27103631532329, "grad_norm": 0.24462582170963287, "learning_rate": 1e-05, "loss": 0.9684, "step": 57885 }, { "epoch": 51.275465013286095, "grad_norm": 0.2838738262653351, "learning_rate": 1e-05, "loss": 0.9163, "step": 57890 }, { "epoch": 51.279893711248896, "grad_norm": 0.2863309383392334, "learning_rate": 1e-05, "loss": 0.9775, "step": 57895 }, { "epoch": 51.28432240921169, "grad_norm": 0.24349147081375122, "learning_rate": 1e-05, "loss": 1.0236, "step": 57900 }, { "epoch": 51.28875110717449, "grad_norm": 0.2216518521308899, "learning_rate": 1e-05, "loss": 0.9445, "step": 57905 }, { "epoch": 51.29317980513729, "grad_norm": 0.20430532097816467, "learning_rate": 1e-05, "loss": 0.9816, "step": 57910 }, { "epoch": 51.297608503100086, "grad_norm": 0.2482028603553772, "learning_rate": 1e-05, "loss": 0.9336, "step": 57915 }, { "epoch": 51.30203720106289, "grad_norm": 0.23034650087356567, "learning_rate": 1e-05, "loss": 1.0021, "step": 57920 }, { "epoch": 51.30646589902569, "grad_norm": 0.25499558448791504, "learning_rate": 1e-05, "loss": 0.9519, "step": 57925 }, { "epoch": 51.31089459698848, "grad_norm": 0.21269191801548004, "learning_rate": 1e-05, "loss": 0.9855, "step": 57930 }, { "epoch": 51.315323294951284, "grad_norm": 0.26415735483169556, "learning_rate": 1e-05, "loss": 1.0168, "step": 57935 }, { "epoch": 51.319751992914085, "grad_norm": 0.23134873807430267, "learning_rate": 1e-05, "loss": 0.9411, "step": 57940 }, { "epoch": 51.32418069087688, "grad_norm": 0.2758760154247284, "learning_rate": 1e-05, "loss": 0.9564, "step": 57945 }, { "epoch": 51.32860938883968, "grad_norm": 0.2471761554479599, "learning_rate": 1e-05, "loss": 0.9056, "step": 57950 }, { "epoch": 51.33303808680248, "grad_norm": 0.2567176818847656, "learning_rate": 1e-05, "loss": 1.0005, "step": 57955 }, { "epoch": 51.337466784765276, "grad_norm": 0.2770969271659851, "learning_rate": 1e-05, "loss": 0.9816, "step": 57960 }, { "epoch": 51.34189548272808, "grad_norm": 0.2107272446155548, "learning_rate": 1e-05, "loss": 0.9684, "step": 57965 }, { "epoch": 51.34632418069088, "grad_norm": 0.24066130816936493, "learning_rate": 1e-05, "loss": 1.0081, "step": 57970 }, { "epoch": 51.35075287865367, "grad_norm": 0.20916897058486938, "learning_rate": 1e-05, "loss": 1.0464, "step": 57975 }, { "epoch": 51.355181576616474, "grad_norm": 0.23597535490989685, "learning_rate": 1e-05, "loss": 0.9672, "step": 57980 }, { "epoch": 51.359610274579275, "grad_norm": 0.2893282473087311, "learning_rate": 1e-05, "loss": 0.9738, "step": 57985 }, { "epoch": 51.364038972542076, "grad_norm": 0.20720933377742767, "learning_rate": 1e-05, "loss": 0.9605, "step": 57990 }, { "epoch": 51.36846767050487, "grad_norm": 0.21616671979427338, "learning_rate": 1e-05, "loss": 0.9614, "step": 57995 }, { "epoch": 51.37289636846767, "grad_norm": 0.22247853875160217, "learning_rate": 1e-05, "loss": 1.0046, "step": 58000 }, { "epoch": 51.37732506643047, "grad_norm": 0.1928655356168747, "learning_rate": 1e-05, "loss": 0.9585, "step": 58005 }, { "epoch": 51.38175376439327, "grad_norm": 0.2537802755832672, "learning_rate": 1e-05, "loss": 0.9498, "step": 58010 }, { "epoch": 51.38618246235607, "grad_norm": 0.24991494417190552, "learning_rate": 1e-05, "loss": 0.9659, "step": 58015 }, { "epoch": 51.39061116031887, "grad_norm": 0.22834308445453644, "learning_rate": 1e-05, "loss": 0.9463, "step": 58020 }, { "epoch": 51.39503985828166, "grad_norm": 0.23812471330165863, "learning_rate": 1e-05, "loss": 0.9237, "step": 58025 }, { "epoch": 51.399468556244464, "grad_norm": 0.21515800058841705, "learning_rate": 1e-05, "loss": 0.9292, "step": 58030 }, { "epoch": 51.403897254207266, "grad_norm": 0.242784321308136, "learning_rate": 1e-05, "loss": 1.0316, "step": 58035 }, { "epoch": 51.40832595217006, "grad_norm": 0.23172993957996368, "learning_rate": 1e-05, "loss": 0.9636, "step": 58040 }, { "epoch": 51.41275465013286, "grad_norm": 0.2517952024936676, "learning_rate": 1e-05, "loss": 1.0032, "step": 58045 }, { "epoch": 51.41718334809566, "grad_norm": 0.23771822452545166, "learning_rate": 1e-05, "loss": 0.9703, "step": 58050 }, { "epoch": 51.421612046058456, "grad_norm": 0.2543613314628601, "learning_rate": 1e-05, "loss": 1.0384, "step": 58055 }, { "epoch": 51.42604074402126, "grad_norm": 0.25572437047958374, "learning_rate": 1e-05, "loss": 0.9605, "step": 58060 }, { "epoch": 51.43046944198406, "grad_norm": 0.2784786820411682, "learning_rate": 1e-05, "loss": 0.958, "step": 58065 }, { "epoch": 51.43489813994685, "grad_norm": 0.22722876071929932, "learning_rate": 1e-05, "loss": 0.9692, "step": 58070 }, { "epoch": 51.439326837909654, "grad_norm": 0.23458446562290192, "learning_rate": 1e-05, "loss": 0.9258, "step": 58075 }, { "epoch": 51.443755535872455, "grad_norm": 0.2506619691848755, "learning_rate": 1e-05, "loss": 1.011, "step": 58080 }, { "epoch": 51.44818423383525, "grad_norm": 0.22852088510990143, "learning_rate": 1e-05, "loss": 0.9728, "step": 58085 }, { "epoch": 51.45261293179805, "grad_norm": 0.23912429809570312, "learning_rate": 1e-05, "loss": 0.9601, "step": 58090 }, { "epoch": 51.45704162976085, "grad_norm": 0.22203591465950012, "learning_rate": 1e-05, "loss": 0.9186, "step": 58095 }, { "epoch": 51.461470327723646, "grad_norm": 0.28027209639549255, "learning_rate": 1e-05, "loss": 0.975, "step": 58100 }, { "epoch": 51.46589902568645, "grad_norm": 0.2391355186700821, "learning_rate": 1e-05, "loss": 0.9703, "step": 58105 }, { "epoch": 51.47032772364925, "grad_norm": 0.23409736156463623, "learning_rate": 1e-05, "loss": 1.0032, "step": 58110 }, { "epoch": 51.47475642161205, "grad_norm": 0.23817473649978638, "learning_rate": 1e-05, "loss": 0.9586, "step": 58115 }, { "epoch": 51.479185119574844, "grad_norm": 0.21340115368366241, "learning_rate": 1e-05, "loss": 0.9406, "step": 58120 }, { "epoch": 51.483613817537645, "grad_norm": 0.21516743302345276, "learning_rate": 1e-05, "loss": 0.9734, "step": 58125 }, { "epoch": 51.488042515500446, "grad_norm": 0.2183932363986969, "learning_rate": 1e-05, "loss": 0.9424, "step": 58130 }, { "epoch": 51.49247121346324, "grad_norm": 0.2642355263233185, "learning_rate": 1e-05, "loss": 0.958, "step": 58135 }, { "epoch": 51.49689991142604, "grad_norm": 0.243107870221138, "learning_rate": 1e-05, "loss": 0.9438, "step": 58140 }, { "epoch": 51.50132860938884, "grad_norm": 0.2183106541633606, "learning_rate": 1e-05, "loss": 1.0178, "step": 58145 }, { "epoch": 51.50575730735164, "grad_norm": 0.22349101305007935, "learning_rate": 1e-05, "loss": 0.9861, "step": 58150 }, { "epoch": 51.51018600531444, "grad_norm": 0.22907379269599915, "learning_rate": 1e-05, "loss": 1.005, "step": 58155 }, { "epoch": 51.51461470327724, "grad_norm": 0.24334098398685455, "learning_rate": 1e-05, "loss": 0.9638, "step": 58160 }, { "epoch": 51.51904340124003, "grad_norm": 0.24807412922382355, "learning_rate": 1e-05, "loss": 0.9978, "step": 58165 }, { "epoch": 51.523472099202834, "grad_norm": 0.21496319770812988, "learning_rate": 1e-05, "loss": 0.9424, "step": 58170 }, { "epoch": 51.527900797165636, "grad_norm": 0.27487337589263916, "learning_rate": 1e-05, "loss": 0.983, "step": 58175 }, { "epoch": 51.53232949512843, "grad_norm": 0.24121910333633423, "learning_rate": 1e-05, "loss": 0.977, "step": 58180 }, { "epoch": 51.53675819309123, "grad_norm": 0.19430440664291382, "learning_rate": 1e-05, "loss": 1.0438, "step": 58185 }, { "epoch": 51.54118689105403, "grad_norm": 0.22432756423950195, "learning_rate": 1e-05, "loss": 0.9995, "step": 58190 }, { "epoch": 51.545615589016826, "grad_norm": 0.25665029883384705, "learning_rate": 1e-05, "loss": 1.0006, "step": 58195 }, { "epoch": 51.55004428697963, "grad_norm": 0.24494412541389465, "learning_rate": 1e-05, "loss": 0.9829, "step": 58200 }, { "epoch": 51.55447298494243, "grad_norm": 0.20735104382038116, "learning_rate": 1e-05, "loss": 0.9595, "step": 58205 }, { "epoch": 51.55890168290522, "grad_norm": 0.23223921656608582, "learning_rate": 1e-05, "loss": 0.9904, "step": 58210 }, { "epoch": 51.563330380868024, "grad_norm": 0.2448100596666336, "learning_rate": 1e-05, "loss": 0.9655, "step": 58215 }, { "epoch": 51.567759078830825, "grad_norm": 0.28599679470062256, "learning_rate": 1e-05, "loss": 0.9404, "step": 58220 }, { "epoch": 51.57218777679362, "grad_norm": 0.2101174145936966, "learning_rate": 1e-05, "loss": 0.9526, "step": 58225 }, { "epoch": 51.57661647475642, "grad_norm": 0.23095417022705078, "learning_rate": 1e-05, "loss": 0.9208, "step": 58230 }, { "epoch": 51.58104517271922, "grad_norm": 0.22340860962867737, "learning_rate": 1e-05, "loss": 1.0015, "step": 58235 }, { "epoch": 51.58547387068202, "grad_norm": 0.22469645738601685, "learning_rate": 1e-05, "loss": 0.9737, "step": 58240 }, { "epoch": 51.58990256864482, "grad_norm": 0.21839606761932373, "learning_rate": 1e-05, "loss": 0.9769, "step": 58245 }, { "epoch": 51.59433126660762, "grad_norm": 0.3325742483139038, "learning_rate": 1e-05, "loss": 0.9877, "step": 58250 }, { "epoch": 51.59875996457042, "grad_norm": 0.2494598925113678, "learning_rate": 1e-05, "loss": 1.0228, "step": 58255 }, { "epoch": 51.60318866253321, "grad_norm": 0.2741064429283142, "learning_rate": 1e-05, "loss": 0.9573, "step": 58260 }, { "epoch": 51.607617360496015, "grad_norm": 0.2602717876434326, "learning_rate": 1e-05, "loss": 0.935, "step": 58265 }, { "epoch": 51.612046058458816, "grad_norm": 0.3043157756328583, "learning_rate": 1e-05, "loss": 0.9503, "step": 58270 }, { "epoch": 51.61647475642161, "grad_norm": 0.30509912967681885, "learning_rate": 1e-05, "loss": 0.9842, "step": 58275 }, { "epoch": 51.62090345438441, "grad_norm": 0.28390470147132874, "learning_rate": 1e-05, "loss": 0.9238, "step": 58280 }, { "epoch": 51.62533215234721, "grad_norm": 0.22268792986869812, "learning_rate": 1e-05, "loss": 0.9577, "step": 58285 }, { "epoch": 51.62976085031001, "grad_norm": 0.23918232321739197, "learning_rate": 1e-05, "loss": 0.9304, "step": 58290 }, { "epoch": 51.63418954827281, "grad_norm": 0.22927765548229218, "learning_rate": 1e-05, "loss": 0.9702, "step": 58295 }, { "epoch": 51.63861824623561, "grad_norm": 0.2979111671447754, "learning_rate": 1e-05, "loss": 0.9731, "step": 58300 }, { "epoch": 51.6430469441984, "grad_norm": 0.2556305527687073, "learning_rate": 1e-05, "loss": 1.0072, "step": 58305 }, { "epoch": 51.647475642161204, "grad_norm": 0.23678626120090485, "learning_rate": 1e-05, "loss": 1.0268, "step": 58310 }, { "epoch": 51.651904340124005, "grad_norm": 0.2691119909286499, "learning_rate": 1e-05, "loss": 0.9077, "step": 58315 }, { "epoch": 51.6563330380868, "grad_norm": 0.2591462731361389, "learning_rate": 1e-05, "loss": 1.0157, "step": 58320 }, { "epoch": 51.6607617360496, "grad_norm": 0.2804064154624939, "learning_rate": 1e-05, "loss": 0.9614, "step": 58325 }, { "epoch": 51.6651904340124, "grad_norm": 0.24454502761363983, "learning_rate": 1e-05, "loss": 0.9617, "step": 58330 }, { "epoch": 51.669619131975196, "grad_norm": 0.24861130118370056, "learning_rate": 1e-05, "loss": 0.9869, "step": 58335 }, { "epoch": 51.674047829938, "grad_norm": 0.26016995310783386, "learning_rate": 1e-05, "loss": 0.9937, "step": 58340 }, { "epoch": 51.6784765279008, "grad_norm": 0.2355981022119522, "learning_rate": 1e-05, "loss": 0.8913, "step": 58345 }, { "epoch": 51.68290522586359, "grad_norm": 0.24048180878162384, "learning_rate": 1e-05, "loss": 0.9757, "step": 58350 }, { "epoch": 51.687333923826394, "grad_norm": 0.22731292247772217, "learning_rate": 1e-05, "loss": 0.9539, "step": 58355 }, { "epoch": 51.691762621789195, "grad_norm": 0.2594451606273651, "learning_rate": 1e-05, "loss": 0.9679, "step": 58360 }, { "epoch": 51.696191319751996, "grad_norm": 0.21449124813079834, "learning_rate": 1e-05, "loss": 0.9646, "step": 58365 }, { "epoch": 51.70062001771479, "grad_norm": 0.24885571002960205, "learning_rate": 1e-05, "loss": 0.9631, "step": 58370 }, { "epoch": 51.70504871567759, "grad_norm": 0.24724626541137695, "learning_rate": 1e-05, "loss": 0.9628, "step": 58375 }, { "epoch": 51.70947741364039, "grad_norm": 0.2644142210483551, "learning_rate": 1e-05, "loss": 1.0189, "step": 58380 }, { "epoch": 51.71390611160319, "grad_norm": 0.21985891461372375, "learning_rate": 1e-05, "loss": 0.9896, "step": 58385 }, { "epoch": 51.71833480956599, "grad_norm": 0.2015070915222168, "learning_rate": 1e-05, "loss": 0.9821, "step": 58390 }, { "epoch": 51.72276350752879, "grad_norm": 0.2373541295528412, "learning_rate": 1e-05, "loss": 0.9488, "step": 58395 }, { "epoch": 51.72719220549158, "grad_norm": 0.20183350145816803, "learning_rate": 1e-05, "loss": 0.9655, "step": 58400 }, { "epoch": 51.731620903454385, "grad_norm": 0.21155519783496857, "learning_rate": 1e-05, "loss": 0.969, "step": 58405 }, { "epoch": 51.736049601417186, "grad_norm": 0.26506441831588745, "learning_rate": 1e-05, "loss": 0.9503, "step": 58410 }, { "epoch": 51.74047829937998, "grad_norm": 0.24553550779819489, "learning_rate": 1e-05, "loss": 0.9961, "step": 58415 }, { "epoch": 51.74490699734278, "grad_norm": 0.20289985835552216, "learning_rate": 1e-05, "loss": 1.016, "step": 58420 }, { "epoch": 51.74933569530558, "grad_norm": 0.19446690380573273, "learning_rate": 1e-05, "loss": 1.0352, "step": 58425 }, { "epoch": 51.753764393268376, "grad_norm": 0.26361045241355896, "learning_rate": 1e-05, "loss": 0.9809, "step": 58430 }, { "epoch": 51.75819309123118, "grad_norm": 0.28387776017189026, "learning_rate": 1e-05, "loss": 0.9442, "step": 58435 }, { "epoch": 51.76262178919398, "grad_norm": 0.2082238644361496, "learning_rate": 1e-05, "loss": 0.9242, "step": 58440 }, { "epoch": 51.76705048715677, "grad_norm": 0.22151020169258118, "learning_rate": 1e-05, "loss": 0.9247, "step": 58445 }, { "epoch": 51.771479185119574, "grad_norm": 0.21319808065891266, "learning_rate": 1e-05, "loss": 0.9265, "step": 58450 }, { "epoch": 51.775907883082375, "grad_norm": 0.1932530701160431, "learning_rate": 1e-05, "loss": 0.9285, "step": 58455 }, { "epoch": 51.78033658104517, "grad_norm": 0.22282566130161285, "learning_rate": 1e-05, "loss": 0.9891, "step": 58460 }, { "epoch": 51.78476527900797, "grad_norm": 0.2406737357378006, "learning_rate": 1e-05, "loss": 0.9211, "step": 58465 }, { "epoch": 51.78919397697077, "grad_norm": 0.255719393491745, "learning_rate": 1e-05, "loss": 0.9835, "step": 58470 }, { "epoch": 51.79362267493357, "grad_norm": 0.2293304055929184, "learning_rate": 1e-05, "loss": 1.0174, "step": 58475 }, { "epoch": 51.79805137289637, "grad_norm": 0.2705524265766144, "learning_rate": 1e-05, "loss": 1.0306, "step": 58480 }, { "epoch": 51.80248007085917, "grad_norm": 0.20759350061416626, "learning_rate": 1e-05, "loss": 0.9692, "step": 58485 }, { "epoch": 51.80690876882197, "grad_norm": 0.22513075172901154, "learning_rate": 1e-05, "loss": 1.0285, "step": 58490 }, { "epoch": 51.811337466784764, "grad_norm": 0.2505941390991211, "learning_rate": 1e-05, "loss": 0.9401, "step": 58495 }, { "epoch": 51.815766164747565, "grad_norm": 0.2880514860153198, "learning_rate": 1e-05, "loss": 0.9217, "step": 58500 }, { "epoch": 51.820194862710366, "grad_norm": 0.2535289525985718, "learning_rate": 1e-05, "loss": 1.0084, "step": 58505 }, { "epoch": 51.82462356067316, "grad_norm": 0.2720993757247925, "learning_rate": 1e-05, "loss": 0.9772, "step": 58510 }, { "epoch": 51.82905225863596, "grad_norm": 0.23074521124362946, "learning_rate": 1e-05, "loss": 0.984, "step": 58515 }, { "epoch": 51.83348095659876, "grad_norm": 0.24251681566238403, "learning_rate": 1e-05, "loss": 1.0156, "step": 58520 }, { "epoch": 51.83790965456156, "grad_norm": 0.24032023549079895, "learning_rate": 1e-05, "loss": 0.9657, "step": 58525 }, { "epoch": 51.84233835252436, "grad_norm": 0.22962750494480133, "learning_rate": 1e-05, "loss": 0.9704, "step": 58530 }, { "epoch": 51.84676705048716, "grad_norm": 0.2727820575237274, "learning_rate": 1e-05, "loss": 1.0029, "step": 58535 }, { "epoch": 51.85119574844995, "grad_norm": 0.27132317423820496, "learning_rate": 1e-05, "loss": 0.8938, "step": 58540 }, { "epoch": 51.855624446412754, "grad_norm": 0.27526429295539856, "learning_rate": 1e-05, "loss": 0.9191, "step": 58545 }, { "epoch": 51.860053144375556, "grad_norm": 0.27985382080078125, "learning_rate": 1e-05, "loss": 1.0087, "step": 58550 }, { "epoch": 51.86448184233835, "grad_norm": 0.2495328187942505, "learning_rate": 1e-05, "loss": 0.9927, "step": 58555 }, { "epoch": 51.86891054030115, "grad_norm": 0.24981878697872162, "learning_rate": 1e-05, "loss": 0.9354, "step": 58560 }, { "epoch": 51.87333923826395, "grad_norm": 0.2278611958026886, "learning_rate": 1e-05, "loss": 0.9923, "step": 58565 }, { "epoch": 51.877767936226746, "grad_norm": 0.2569010853767395, "learning_rate": 1e-05, "loss": 0.9829, "step": 58570 }, { "epoch": 51.88219663418955, "grad_norm": 0.22449199855327606, "learning_rate": 1e-05, "loss": 0.9925, "step": 58575 }, { "epoch": 51.88662533215235, "grad_norm": 0.27370843291282654, "learning_rate": 1e-05, "loss": 0.993, "step": 58580 }, { "epoch": 51.89105403011514, "grad_norm": 0.24513772130012512, "learning_rate": 1e-05, "loss": 0.9841, "step": 58585 }, { "epoch": 51.895482728077944, "grad_norm": 0.2431306540966034, "learning_rate": 1e-05, "loss": 0.9682, "step": 58590 }, { "epoch": 51.899911426040745, "grad_norm": 0.20609644055366516, "learning_rate": 1e-05, "loss": 0.981, "step": 58595 }, { "epoch": 51.90434012400354, "grad_norm": 0.24189017713069916, "learning_rate": 1e-05, "loss": 0.9811, "step": 58600 }, { "epoch": 51.90876882196634, "grad_norm": 0.29227396845817566, "learning_rate": 1e-05, "loss": 0.888, "step": 58605 }, { "epoch": 51.91319751992914, "grad_norm": 0.31686240434646606, "learning_rate": 1e-05, "loss": 0.9795, "step": 58610 }, { "epoch": 51.91762621789194, "grad_norm": 0.228986456990242, "learning_rate": 1e-05, "loss": 0.9816, "step": 58615 }, { "epoch": 51.92205491585474, "grad_norm": 0.24290288984775543, "learning_rate": 1e-05, "loss": 0.9546, "step": 58620 }, { "epoch": 51.92648361381754, "grad_norm": 0.20453818142414093, "learning_rate": 1e-05, "loss": 0.9831, "step": 58625 }, { "epoch": 51.93091231178034, "grad_norm": 0.20300108194351196, "learning_rate": 1e-05, "loss": 0.9469, "step": 58630 }, { "epoch": 51.935341009743134, "grad_norm": 0.2920350134372711, "learning_rate": 1e-05, "loss": 1.042, "step": 58635 }, { "epoch": 51.939769707705935, "grad_norm": 0.2604334354400635, "learning_rate": 1e-05, "loss": 0.9436, "step": 58640 }, { "epoch": 51.944198405668736, "grad_norm": 0.20409448444843292, "learning_rate": 1e-05, "loss": 0.945, "step": 58645 }, { "epoch": 51.94862710363153, "grad_norm": 0.24128533899784088, "learning_rate": 1e-05, "loss": 0.9517, "step": 58650 }, { "epoch": 51.95305580159433, "grad_norm": 0.2413007766008377, "learning_rate": 1e-05, "loss": 1.0052, "step": 58655 }, { "epoch": 51.95748449955713, "grad_norm": 0.25519412755966187, "learning_rate": 1e-05, "loss": 0.9581, "step": 58660 }, { "epoch": 51.96191319751993, "grad_norm": 0.2560592293739319, "learning_rate": 1e-05, "loss": 1.0094, "step": 58665 }, { "epoch": 51.96634189548273, "grad_norm": 0.2350081354379654, "learning_rate": 1e-05, "loss": 0.9251, "step": 58670 }, { "epoch": 51.97077059344553, "grad_norm": 0.2191975861787796, "learning_rate": 1e-05, "loss": 0.9469, "step": 58675 }, { "epoch": 51.97519929140832, "grad_norm": 0.260464608669281, "learning_rate": 1e-05, "loss": 0.9143, "step": 58680 }, { "epoch": 51.979627989371124, "grad_norm": 0.24714504182338715, "learning_rate": 1e-05, "loss": 0.9315, "step": 58685 }, { "epoch": 51.984056687333926, "grad_norm": 0.26472267508506775, "learning_rate": 1e-05, "loss": 0.9839, "step": 58690 }, { "epoch": 51.98848538529672, "grad_norm": 0.21740540862083435, "learning_rate": 1e-05, "loss": 0.9719, "step": 58695 }, { "epoch": 51.99291408325952, "grad_norm": 0.23431916534900665, "learning_rate": 1e-05, "loss": 0.9595, "step": 58700 }, { "epoch": 51.99734278122232, "grad_norm": 0.26568934321403503, "learning_rate": 1e-05, "loss": 0.9688, "step": 58705 }, { "epoch": 52.001771479185116, "grad_norm": 0.24277284741401672, "learning_rate": 1e-05, "loss": 1.0266, "step": 58710 }, { "epoch": 52.00620017714792, "grad_norm": 0.19721931219100952, "learning_rate": 1e-05, "loss": 1.015, "step": 58715 }, { "epoch": 52.01062887511072, "grad_norm": 0.2184523493051529, "learning_rate": 1e-05, "loss": 0.9707, "step": 58720 }, { "epoch": 52.01505757307352, "grad_norm": 0.2964482009410858, "learning_rate": 1e-05, "loss": 0.9821, "step": 58725 }, { "epoch": 52.019486271036314, "grad_norm": 0.23082707822322845, "learning_rate": 1e-05, "loss": 0.9771, "step": 58730 }, { "epoch": 52.023914968999115, "grad_norm": 0.20773455500602722, "learning_rate": 1e-05, "loss": 0.9524, "step": 58735 }, { "epoch": 52.028343666961916, "grad_norm": 0.20699819922447205, "learning_rate": 1e-05, "loss": 0.9633, "step": 58740 }, { "epoch": 52.03277236492471, "grad_norm": 0.23711200058460236, "learning_rate": 1e-05, "loss": 0.9304, "step": 58745 }, { "epoch": 52.03720106288751, "grad_norm": 0.2855316698551178, "learning_rate": 1e-05, "loss": 0.9669, "step": 58750 }, { "epoch": 52.04162976085031, "grad_norm": 0.2499338984489441, "learning_rate": 1e-05, "loss": 0.968, "step": 58755 }, { "epoch": 52.04605845881311, "grad_norm": 0.23982737958431244, "learning_rate": 1e-05, "loss": 0.9742, "step": 58760 }, { "epoch": 52.05048715677591, "grad_norm": 0.2482318878173828, "learning_rate": 1e-05, "loss": 0.9386, "step": 58765 }, { "epoch": 52.05491585473871, "grad_norm": 0.2540883719921112, "learning_rate": 1e-05, "loss": 0.9969, "step": 58770 }, { "epoch": 52.0593445527015, "grad_norm": 0.26081958413124084, "learning_rate": 1e-05, "loss": 0.9517, "step": 58775 }, { "epoch": 52.063773250664305, "grad_norm": 0.2717559337615967, "learning_rate": 1e-05, "loss": 1.0005, "step": 58780 }, { "epoch": 52.068201948627106, "grad_norm": 0.2518419027328491, "learning_rate": 1e-05, "loss": 0.9995, "step": 58785 }, { "epoch": 52.0726306465899, "grad_norm": 0.2804437577724457, "learning_rate": 1e-05, "loss": 0.9422, "step": 58790 }, { "epoch": 52.0770593445527, "grad_norm": 0.2536376416683197, "learning_rate": 1e-05, "loss": 1.0313, "step": 58795 }, { "epoch": 52.0814880425155, "grad_norm": 0.25808805227279663, "learning_rate": 1e-05, "loss": 1.0198, "step": 58800 }, { "epoch": 52.0859167404783, "grad_norm": 0.24577517807483673, "learning_rate": 1e-05, "loss": 0.9449, "step": 58805 }, { "epoch": 52.0903454384411, "grad_norm": 0.2653447091579437, "learning_rate": 1e-05, "loss": 0.9958, "step": 58810 }, { "epoch": 52.0947741364039, "grad_norm": 0.2296014428138733, "learning_rate": 1e-05, "loss": 0.9503, "step": 58815 }, { "epoch": 52.09920283436669, "grad_norm": 0.24378883838653564, "learning_rate": 1e-05, "loss": 0.9394, "step": 58820 }, { "epoch": 52.103631532329494, "grad_norm": 0.23796670138835907, "learning_rate": 1e-05, "loss": 0.9207, "step": 58825 }, { "epoch": 52.108060230292296, "grad_norm": 0.24171461164951324, "learning_rate": 1e-05, "loss": 0.9385, "step": 58830 }, { "epoch": 52.11248892825509, "grad_norm": 0.24397942423820496, "learning_rate": 1e-05, "loss": 0.9313, "step": 58835 }, { "epoch": 52.11691762621789, "grad_norm": 0.2165588140487671, "learning_rate": 1e-05, "loss": 0.9564, "step": 58840 }, { "epoch": 52.12134632418069, "grad_norm": 0.2648729979991913, "learning_rate": 1e-05, "loss": 0.9671, "step": 58845 }, { "epoch": 52.12577502214349, "grad_norm": 0.2547149658203125, "learning_rate": 1e-05, "loss": 0.9771, "step": 58850 }, { "epoch": 52.13020372010629, "grad_norm": 0.22421130537986755, "learning_rate": 1e-05, "loss": 0.9137, "step": 58855 }, { "epoch": 52.13463241806909, "grad_norm": 0.2462160885334015, "learning_rate": 1e-05, "loss": 1.0011, "step": 58860 }, { "epoch": 52.13906111603189, "grad_norm": 0.2373134046792984, "learning_rate": 1e-05, "loss": 0.9427, "step": 58865 }, { "epoch": 52.143489813994684, "grad_norm": 0.22970210015773773, "learning_rate": 1e-05, "loss": 0.9657, "step": 58870 }, { "epoch": 52.147918511957485, "grad_norm": 0.2565675377845764, "learning_rate": 1e-05, "loss": 0.9481, "step": 58875 }, { "epoch": 52.152347209920286, "grad_norm": 0.2633257806301117, "learning_rate": 1e-05, "loss": 0.9786, "step": 58880 }, { "epoch": 52.15677590788308, "grad_norm": 0.22295983135700226, "learning_rate": 1e-05, "loss": 0.9844, "step": 58885 }, { "epoch": 52.16120460584588, "grad_norm": 0.22809961438179016, "learning_rate": 1e-05, "loss": 0.9818, "step": 58890 }, { "epoch": 52.16563330380868, "grad_norm": 0.20048491656780243, "learning_rate": 1e-05, "loss": 0.9307, "step": 58895 }, { "epoch": 52.17006200177148, "grad_norm": 0.23810194432735443, "learning_rate": 1e-05, "loss": 1.0004, "step": 58900 }, { "epoch": 52.17449069973428, "grad_norm": 0.2021421492099762, "learning_rate": 1e-05, "loss": 0.9376, "step": 58905 }, { "epoch": 52.17891939769708, "grad_norm": 0.2306343913078308, "learning_rate": 1e-05, "loss": 1.02, "step": 58910 }, { "epoch": 52.18334809565987, "grad_norm": 0.21147963404655457, "learning_rate": 1e-05, "loss": 1.0458, "step": 58915 }, { "epoch": 52.187776793622675, "grad_norm": 0.2717270851135254, "learning_rate": 1e-05, "loss": 0.944, "step": 58920 }, { "epoch": 52.192205491585476, "grad_norm": 0.27564477920532227, "learning_rate": 1e-05, "loss": 0.9791, "step": 58925 }, { "epoch": 52.19663418954827, "grad_norm": 0.2741462290287018, "learning_rate": 1e-05, "loss": 1.0386, "step": 58930 }, { "epoch": 52.20106288751107, "grad_norm": 0.23094050586223602, "learning_rate": 1e-05, "loss": 0.9617, "step": 58935 }, { "epoch": 52.20549158547387, "grad_norm": 0.21067003905773163, "learning_rate": 1e-05, "loss": 1.0139, "step": 58940 }, { "epoch": 52.20992028343667, "grad_norm": 0.2271854728460312, "learning_rate": 1e-05, "loss": 0.9734, "step": 58945 }, { "epoch": 52.21434898139947, "grad_norm": 0.23402239382266998, "learning_rate": 1e-05, "loss": 1.0043, "step": 58950 }, { "epoch": 52.21877767936227, "grad_norm": 0.2510242760181427, "learning_rate": 1e-05, "loss": 0.989, "step": 58955 }, { "epoch": 52.22320637732506, "grad_norm": 0.2374059110879898, "learning_rate": 1e-05, "loss": 0.9227, "step": 58960 }, { "epoch": 52.227635075287864, "grad_norm": 0.2022409588098526, "learning_rate": 1e-05, "loss": 0.9685, "step": 58965 }, { "epoch": 52.232063773250665, "grad_norm": 0.2773072421550751, "learning_rate": 1e-05, "loss": 0.9454, "step": 58970 }, { "epoch": 52.23649247121347, "grad_norm": 0.27207493782043457, "learning_rate": 1e-05, "loss": 0.9957, "step": 58975 }, { "epoch": 52.24092116917626, "grad_norm": 0.21166494488716125, "learning_rate": 1e-05, "loss": 0.924, "step": 58980 }, { "epoch": 52.24534986713906, "grad_norm": 0.24492958188056946, "learning_rate": 1e-05, "loss": 0.968, "step": 58985 }, { "epoch": 52.24977856510186, "grad_norm": 0.22873732447624207, "learning_rate": 1e-05, "loss": 0.9229, "step": 58990 }, { "epoch": 52.25420726306466, "grad_norm": 0.21485452353954315, "learning_rate": 1e-05, "loss": 0.9249, "step": 58995 }, { "epoch": 52.25863596102746, "grad_norm": 0.2711961269378662, "learning_rate": 1e-05, "loss": 0.9696, "step": 59000 }, { "epoch": 52.26306465899026, "grad_norm": 0.21365612745285034, "learning_rate": 1e-05, "loss": 0.9666, "step": 59005 }, { "epoch": 52.267493356953054, "grad_norm": 0.23931793868541718, "learning_rate": 1e-05, "loss": 0.9764, "step": 59010 }, { "epoch": 52.271922054915855, "grad_norm": 0.24862323701381683, "learning_rate": 1e-05, "loss": 0.9663, "step": 59015 }, { "epoch": 52.276350752878656, "grad_norm": 0.261353999376297, "learning_rate": 1e-05, "loss": 0.9711, "step": 59020 }, { "epoch": 52.28077945084145, "grad_norm": 0.2403019219636917, "learning_rate": 1e-05, "loss": 0.9638, "step": 59025 }, { "epoch": 52.28520814880425, "grad_norm": 0.26119378209114075, "learning_rate": 1e-05, "loss": 0.9597, "step": 59030 }, { "epoch": 52.28963684676705, "grad_norm": 0.2288169264793396, "learning_rate": 1e-05, "loss": 0.9714, "step": 59035 }, { "epoch": 52.29406554472985, "grad_norm": 0.25187814235687256, "learning_rate": 1e-05, "loss": 1.0168, "step": 59040 }, { "epoch": 52.29849424269265, "grad_norm": 0.21173524856567383, "learning_rate": 1e-05, "loss": 0.8987, "step": 59045 }, { "epoch": 52.30292294065545, "grad_norm": 0.22896890342235565, "learning_rate": 1e-05, "loss": 0.9733, "step": 59050 }, { "epoch": 52.30735163861824, "grad_norm": 0.2732599079608917, "learning_rate": 1e-05, "loss": 0.9709, "step": 59055 }, { "epoch": 52.311780336581045, "grad_norm": 0.21000850200653076, "learning_rate": 1e-05, "loss": 0.9829, "step": 59060 }, { "epoch": 52.316209034543846, "grad_norm": 0.2537667751312256, "learning_rate": 1e-05, "loss": 0.9571, "step": 59065 }, { "epoch": 52.32063773250664, "grad_norm": 0.22175449132919312, "learning_rate": 1e-05, "loss": 1.0291, "step": 59070 }, { "epoch": 52.32506643046944, "grad_norm": 0.22910772264003754, "learning_rate": 1e-05, "loss": 0.9834, "step": 59075 }, { "epoch": 52.32949512843224, "grad_norm": 0.24023257195949554, "learning_rate": 1e-05, "loss": 0.9865, "step": 59080 }, { "epoch": 52.333923826395036, "grad_norm": 0.23657000064849854, "learning_rate": 1e-05, "loss": 0.9866, "step": 59085 }, { "epoch": 52.33835252435784, "grad_norm": 0.27525779604911804, "learning_rate": 1e-05, "loss": 0.9614, "step": 59090 }, { "epoch": 52.34278122232064, "grad_norm": 0.24265964329242706, "learning_rate": 1e-05, "loss": 1.0475, "step": 59095 }, { "epoch": 52.34720992028344, "grad_norm": 0.2675337493419647, "learning_rate": 1e-05, "loss": 1.0282, "step": 59100 }, { "epoch": 52.351638618246234, "grad_norm": 0.2790255546569824, "learning_rate": 1e-05, "loss": 1.0115, "step": 59105 }, { "epoch": 52.356067316209035, "grad_norm": 0.3175674080848694, "learning_rate": 1e-05, "loss": 0.9513, "step": 59110 }, { "epoch": 52.36049601417184, "grad_norm": 0.25975850224494934, "learning_rate": 1e-05, "loss": 0.9039, "step": 59115 }, { "epoch": 52.36492471213463, "grad_norm": 0.26649001240730286, "learning_rate": 1e-05, "loss": 1.0075, "step": 59120 }, { "epoch": 52.36935341009743, "grad_norm": 0.2806892991065979, "learning_rate": 1e-05, "loss": 0.9433, "step": 59125 }, { "epoch": 52.37378210806023, "grad_norm": 0.26368820667266846, "learning_rate": 1e-05, "loss": 1.001, "step": 59130 }, { "epoch": 52.37821080602303, "grad_norm": 0.23274797201156616, "learning_rate": 1e-05, "loss": 0.9846, "step": 59135 }, { "epoch": 52.38263950398583, "grad_norm": 0.22270150482654572, "learning_rate": 1e-05, "loss": 0.973, "step": 59140 }, { "epoch": 52.38706820194863, "grad_norm": 0.28753140568733215, "learning_rate": 1e-05, "loss": 0.9556, "step": 59145 }, { "epoch": 52.391496899911424, "grad_norm": 0.2770533561706543, "learning_rate": 1e-05, "loss": 0.9791, "step": 59150 }, { "epoch": 52.395925597874225, "grad_norm": 0.26124048233032227, "learning_rate": 1e-05, "loss": 0.9654, "step": 59155 }, { "epoch": 52.400354295837026, "grad_norm": 0.23832140862941742, "learning_rate": 1e-05, "loss": 0.9952, "step": 59160 }, { "epoch": 52.40478299379982, "grad_norm": 0.2152429223060608, "learning_rate": 1e-05, "loss": 0.9849, "step": 59165 }, { "epoch": 52.40921169176262, "grad_norm": 0.2796470820903778, "learning_rate": 1e-05, "loss": 0.93, "step": 59170 }, { "epoch": 52.41364038972542, "grad_norm": 0.22918136417865753, "learning_rate": 1e-05, "loss": 0.9881, "step": 59175 }, { "epoch": 52.41806908768822, "grad_norm": 0.2409706562757492, "learning_rate": 1e-05, "loss": 0.973, "step": 59180 }, { "epoch": 52.42249778565102, "grad_norm": 0.21738368272781372, "learning_rate": 1e-05, "loss": 0.9685, "step": 59185 }, { "epoch": 52.42692648361382, "grad_norm": 0.239488422870636, "learning_rate": 1e-05, "loss": 0.9696, "step": 59190 }, { "epoch": 52.43135518157661, "grad_norm": 0.2247210144996643, "learning_rate": 1e-05, "loss": 0.9377, "step": 59195 }, { "epoch": 52.435783879539414, "grad_norm": 0.22405464947223663, "learning_rate": 1e-05, "loss": 0.9466, "step": 59200 }, { "epoch": 52.440212577502216, "grad_norm": 0.2425481379032135, "learning_rate": 1e-05, "loss": 0.9734, "step": 59205 }, { "epoch": 52.44464127546502, "grad_norm": 0.23909392952919006, "learning_rate": 1e-05, "loss": 0.9631, "step": 59210 }, { "epoch": 52.44906997342781, "grad_norm": 0.23658297955989838, "learning_rate": 1e-05, "loss": 0.9674, "step": 59215 }, { "epoch": 52.45349867139061, "grad_norm": 0.23252840340137482, "learning_rate": 1e-05, "loss": 0.9691, "step": 59220 }, { "epoch": 52.45792736935341, "grad_norm": 0.2774755358695984, "learning_rate": 1e-05, "loss": 0.9824, "step": 59225 }, { "epoch": 52.46235606731621, "grad_norm": 0.2380921095609665, "learning_rate": 1e-05, "loss": 0.9753, "step": 59230 }, { "epoch": 52.46678476527901, "grad_norm": 0.25685325264930725, "learning_rate": 1e-05, "loss": 0.964, "step": 59235 }, { "epoch": 52.47121346324181, "grad_norm": 0.2130991518497467, "learning_rate": 1e-05, "loss": 0.983, "step": 59240 }, { "epoch": 52.475642161204604, "grad_norm": 0.24722576141357422, "learning_rate": 1e-05, "loss": 0.9937, "step": 59245 }, { "epoch": 52.480070859167405, "grad_norm": 0.21808280050754547, "learning_rate": 1e-05, "loss": 0.95, "step": 59250 }, { "epoch": 52.484499557130206, "grad_norm": 0.21170203387737274, "learning_rate": 1e-05, "loss": 1.0057, "step": 59255 }, { "epoch": 52.488928255093, "grad_norm": 0.32557445764541626, "learning_rate": 1e-05, "loss": 1.0337, "step": 59260 }, { "epoch": 52.4933569530558, "grad_norm": 0.24916352331638336, "learning_rate": 1e-05, "loss": 1.0162, "step": 59265 }, { "epoch": 52.4977856510186, "grad_norm": 0.2145821452140808, "learning_rate": 1e-05, "loss": 0.9056, "step": 59270 }, { "epoch": 52.5022143489814, "grad_norm": 0.22541280090808868, "learning_rate": 1e-05, "loss": 1.0019, "step": 59275 }, { "epoch": 52.5066430469442, "grad_norm": 0.2691725492477417, "learning_rate": 1e-05, "loss": 1.008, "step": 59280 }, { "epoch": 52.511071744907, "grad_norm": 0.2309570461511612, "learning_rate": 1e-05, "loss": 0.9231, "step": 59285 }, { "epoch": 52.515500442869794, "grad_norm": 0.2116842120885849, "learning_rate": 1e-05, "loss": 0.9579, "step": 59290 }, { "epoch": 52.519929140832595, "grad_norm": 0.2270590364933014, "learning_rate": 1e-05, "loss": 0.9473, "step": 59295 }, { "epoch": 52.524357838795396, "grad_norm": 0.22602643072605133, "learning_rate": 1e-05, "loss": 0.9708, "step": 59300 }, { "epoch": 52.52878653675819, "grad_norm": 0.23234984278678894, "learning_rate": 1e-05, "loss": 0.9874, "step": 59305 }, { "epoch": 52.53321523472099, "grad_norm": 0.27178335189819336, "learning_rate": 1e-05, "loss": 0.9894, "step": 59310 }, { "epoch": 52.53764393268379, "grad_norm": 0.20679497718811035, "learning_rate": 1e-05, "loss": 0.978, "step": 59315 }, { "epoch": 52.54207263064659, "grad_norm": 0.21883054077625275, "learning_rate": 1e-05, "loss": 0.9254, "step": 59320 }, { "epoch": 52.54650132860939, "grad_norm": 0.25714194774627686, "learning_rate": 1e-05, "loss": 0.9885, "step": 59325 }, { "epoch": 52.55093002657219, "grad_norm": 0.25423967838287354, "learning_rate": 1e-05, "loss": 0.9951, "step": 59330 }, { "epoch": 52.55535872453498, "grad_norm": 0.25764816999435425, "learning_rate": 1e-05, "loss": 0.9563, "step": 59335 }, { "epoch": 52.559787422497784, "grad_norm": 0.28911277651786804, "learning_rate": 1e-05, "loss": 1.0271, "step": 59340 }, { "epoch": 52.564216120460586, "grad_norm": 0.3020361661911011, "learning_rate": 1e-05, "loss": 0.9759, "step": 59345 }, { "epoch": 52.56864481842339, "grad_norm": 0.19778360426425934, "learning_rate": 1e-05, "loss": 0.9523, "step": 59350 }, { "epoch": 52.57307351638618, "grad_norm": 0.21709156036376953, "learning_rate": 1e-05, "loss": 1.024, "step": 59355 }, { "epoch": 52.57750221434898, "grad_norm": 0.25256097316741943, "learning_rate": 1e-05, "loss": 0.998, "step": 59360 }, { "epoch": 52.58193091231178, "grad_norm": 0.24452269077301025, "learning_rate": 1e-05, "loss": 0.9713, "step": 59365 }, { "epoch": 52.58635961027458, "grad_norm": 0.3096843361854553, "learning_rate": 1e-05, "loss": 0.9361, "step": 59370 }, { "epoch": 52.59078830823738, "grad_norm": 0.2754860520362854, "learning_rate": 1e-05, "loss": 0.9325, "step": 59375 }, { "epoch": 52.59521700620018, "grad_norm": 0.22189363837242126, "learning_rate": 1e-05, "loss": 0.9534, "step": 59380 }, { "epoch": 52.599645704162974, "grad_norm": 0.2509881854057312, "learning_rate": 1e-05, "loss": 0.9892, "step": 59385 }, { "epoch": 52.604074402125775, "grad_norm": 0.2636805474758148, "learning_rate": 1e-05, "loss": 0.9892, "step": 59390 }, { "epoch": 52.608503100088576, "grad_norm": 0.2145776003599167, "learning_rate": 1e-05, "loss": 0.996, "step": 59395 }, { "epoch": 52.61293179805137, "grad_norm": 0.25512391328811646, "learning_rate": 1e-05, "loss": 0.9556, "step": 59400 }, { "epoch": 52.61736049601417, "grad_norm": 0.24466027319431305, "learning_rate": 1e-05, "loss": 0.9953, "step": 59405 }, { "epoch": 52.62178919397697, "grad_norm": 0.2131464183330536, "learning_rate": 1e-05, "loss": 0.954, "step": 59410 }, { "epoch": 52.62621789193977, "grad_norm": 0.2435818314552307, "learning_rate": 1e-05, "loss": 0.9686, "step": 59415 }, { "epoch": 52.63064658990257, "grad_norm": 0.22133409976959229, "learning_rate": 1e-05, "loss": 1.0011, "step": 59420 }, { "epoch": 52.63507528786537, "grad_norm": 0.2613498568534851, "learning_rate": 1e-05, "loss": 0.9993, "step": 59425 }, { "epoch": 52.63950398582816, "grad_norm": 0.2328488826751709, "learning_rate": 1e-05, "loss": 0.9354, "step": 59430 }, { "epoch": 52.643932683790965, "grad_norm": 0.26889708638191223, "learning_rate": 1e-05, "loss": 0.9937, "step": 59435 }, { "epoch": 52.648361381753766, "grad_norm": 0.2561200261116028, "learning_rate": 1e-05, "loss": 1.0143, "step": 59440 }, { "epoch": 52.65279007971656, "grad_norm": 0.23625174164772034, "learning_rate": 1e-05, "loss": 0.9898, "step": 59445 }, { "epoch": 52.65721877767936, "grad_norm": 0.24090686440467834, "learning_rate": 1e-05, "loss": 0.9596, "step": 59450 }, { "epoch": 52.66164747564216, "grad_norm": 0.23700059950351715, "learning_rate": 1e-05, "loss": 0.9497, "step": 59455 }, { "epoch": 52.666076173604964, "grad_norm": 0.24252142012119293, "learning_rate": 1e-05, "loss": 0.9964, "step": 59460 }, { "epoch": 52.67050487156776, "grad_norm": 0.246984601020813, "learning_rate": 1e-05, "loss": 0.9698, "step": 59465 }, { "epoch": 52.67493356953056, "grad_norm": 0.21925924718379974, "learning_rate": 1e-05, "loss": 0.958, "step": 59470 }, { "epoch": 52.67936226749336, "grad_norm": 0.25783872604370117, "learning_rate": 1e-05, "loss": 0.968, "step": 59475 }, { "epoch": 52.683790965456154, "grad_norm": 0.2226601541042328, "learning_rate": 1e-05, "loss": 0.9688, "step": 59480 }, { "epoch": 52.688219663418955, "grad_norm": 0.27122756838798523, "learning_rate": 1e-05, "loss": 0.917, "step": 59485 }, { "epoch": 52.69264836138176, "grad_norm": 0.2551897168159485, "learning_rate": 1e-05, "loss": 0.947, "step": 59490 }, { "epoch": 52.69707705934455, "grad_norm": 0.23085863888263702, "learning_rate": 1e-05, "loss": 0.975, "step": 59495 }, { "epoch": 52.70150575730735, "grad_norm": 0.26000431180000305, "learning_rate": 1e-05, "loss": 1.0328, "step": 59500 }, { "epoch": 52.70593445527015, "grad_norm": 0.2330954223871231, "learning_rate": 1e-05, "loss": 0.9689, "step": 59505 }, { "epoch": 52.71036315323295, "grad_norm": 0.2411959320306778, "learning_rate": 1e-05, "loss": 0.993, "step": 59510 }, { "epoch": 52.71479185119575, "grad_norm": 0.24666187167167664, "learning_rate": 1e-05, "loss": 0.9716, "step": 59515 }, { "epoch": 52.71922054915855, "grad_norm": 0.25113222002983093, "learning_rate": 1e-05, "loss": 1.0056, "step": 59520 }, { "epoch": 52.723649247121344, "grad_norm": 0.2843528091907501, "learning_rate": 1e-05, "loss": 0.9638, "step": 59525 }, { "epoch": 52.728077945084145, "grad_norm": 0.24040670692920685, "learning_rate": 1e-05, "loss": 0.9647, "step": 59530 }, { "epoch": 52.732506643046946, "grad_norm": 0.241177499294281, "learning_rate": 1e-05, "loss": 0.9264, "step": 59535 }, { "epoch": 52.73693534100974, "grad_norm": 0.23298807442188263, "learning_rate": 1e-05, "loss": 0.9667, "step": 59540 }, { "epoch": 52.74136403897254, "grad_norm": 0.23927631974220276, "learning_rate": 1e-05, "loss": 1.0273, "step": 59545 }, { "epoch": 52.74579273693534, "grad_norm": 0.3062893748283386, "learning_rate": 1e-05, "loss": 0.9673, "step": 59550 }, { "epoch": 52.75022143489814, "grad_norm": 0.26091793179512024, "learning_rate": 1e-05, "loss": 0.9638, "step": 59555 }, { "epoch": 52.75465013286094, "grad_norm": 0.25685369968414307, "learning_rate": 1e-05, "loss": 0.9377, "step": 59560 }, { "epoch": 52.75907883082374, "grad_norm": 0.23045146465301514, "learning_rate": 1e-05, "loss": 0.9741, "step": 59565 }, { "epoch": 52.76350752878653, "grad_norm": 0.2705872058868408, "learning_rate": 1e-05, "loss": 0.9149, "step": 59570 }, { "epoch": 52.767936226749335, "grad_norm": 0.2371685951948166, "learning_rate": 1e-05, "loss": 0.9383, "step": 59575 }, { "epoch": 52.772364924712136, "grad_norm": 0.2210664302110672, "learning_rate": 1e-05, "loss": 0.9291, "step": 59580 }, { "epoch": 52.77679362267494, "grad_norm": 0.21319898962974548, "learning_rate": 1e-05, "loss": 0.9726, "step": 59585 }, { "epoch": 52.78122232063773, "grad_norm": 0.23021219670772552, "learning_rate": 1e-05, "loss": 0.918, "step": 59590 }, { "epoch": 52.78565101860053, "grad_norm": 0.2274142950773239, "learning_rate": 1e-05, "loss": 0.943, "step": 59595 }, { "epoch": 52.79007971656333, "grad_norm": 0.23048655688762665, "learning_rate": 1e-05, "loss": 0.9753, "step": 59600 }, { "epoch": 52.79450841452613, "grad_norm": 0.23316003382205963, "learning_rate": 1e-05, "loss": 0.9681, "step": 59605 }, { "epoch": 52.79893711248893, "grad_norm": 0.24283450841903687, "learning_rate": 1e-05, "loss": 0.9085, "step": 59610 }, { "epoch": 52.80336581045173, "grad_norm": 0.2414061725139618, "learning_rate": 1e-05, "loss": 0.8966, "step": 59615 }, { "epoch": 52.807794508414524, "grad_norm": 0.2697695791721344, "learning_rate": 1e-05, "loss": 0.9804, "step": 59620 }, { "epoch": 52.812223206377325, "grad_norm": 0.29279714822769165, "learning_rate": 1e-05, "loss": 0.9551, "step": 59625 }, { "epoch": 52.81665190434013, "grad_norm": 0.2392735630273819, "learning_rate": 1e-05, "loss": 0.9494, "step": 59630 }, { "epoch": 52.82108060230292, "grad_norm": 0.28301066160202026, "learning_rate": 1e-05, "loss": 0.9487, "step": 59635 }, { "epoch": 52.82550930026572, "grad_norm": 0.25010064244270325, "learning_rate": 1e-05, "loss": 0.9687, "step": 59640 }, { "epoch": 52.82993799822852, "grad_norm": 0.23396393656730652, "learning_rate": 1e-05, "loss": 0.9816, "step": 59645 }, { "epoch": 52.83436669619132, "grad_norm": 0.277478963136673, "learning_rate": 1e-05, "loss": 0.978, "step": 59650 }, { "epoch": 52.83879539415412, "grad_norm": 0.2712346613407135, "learning_rate": 1e-05, "loss": 0.9799, "step": 59655 }, { "epoch": 52.84322409211692, "grad_norm": 0.21306876838207245, "learning_rate": 1e-05, "loss": 0.9457, "step": 59660 }, { "epoch": 52.847652790079714, "grad_norm": 0.23523080348968506, "learning_rate": 1e-05, "loss": 0.931, "step": 59665 }, { "epoch": 52.852081488042515, "grad_norm": 0.22288550436496735, "learning_rate": 1e-05, "loss": 0.9828, "step": 59670 }, { "epoch": 52.856510186005316, "grad_norm": 0.22876958549022675, "learning_rate": 1e-05, "loss": 0.9383, "step": 59675 }, { "epoch": 52.86093888396811, "grad_norm": 0.24014483392238617, "learning_rate": 1e-05, "loss": 0.9236, "step": 59680 }, { "epoch": 52.86536758193091, "grad_norm": 0.2395474910736084, "learning_rate": 1e-05, "loss": 0.9482, "step": 59685 }, { "epoch": 52.86979627989371, "grad_norm": 0.23993068933486938, "learning_rate": 1e-05, "loss": 0.9557, "step": 59690 }, { "epoch": 52.87422497785651, "grad_norm": 0.20460699498653412, "learning_rate": 1e-05, "loss": 0.9526, "step": 59695 }, { "epoch": 52.87865367581931, "grad_norm": 0.22421720623970032, "learning_rate": 1e-05, "loss": 0.9673, "step": 59700 }, { "epoch": 52.88308237378211, "grad_norm": 0.21821144223213196, "learning_rate": 1e-05, "loss": 0.9956, "step": 59705 }, { "epoch": 52.88751107174491, "grad_norm": 0.21350087225437164, "learning_rate": 1e-05, "loss": 0.9795, "step": 59710 }, { "epoch": 52.891939769707704, "grad_norm": 0.22925147414207458, "learning_rate": 1e-05, "loss": 0.9863, "step": 59715 }, { "epoch": 52.896368467670506, "grad_norm": 0.2710317373275757, "learning_rate": 1e-05, "loss": 1.0173, "step": 59720 }, { "epoch": 52.90079716563331, "grad_norm": 0.23127196729183197, "learning_rate": 1e-05, "loss": 0.9595, "step": 59725 }, { "epoch": 52.9052258635961, "grad_norm": 0.24701432883739471, "learning_rate": 1e-05, "loss": 0.9311, "step": 59730 }, { "epoch": 52.9096545615589, "grad_norm": 0.24243749678134918, "learning_rate": 1e-05, "loss": 0.9941, "step": 59735 }, { "epoch": 52.9140832595217, "grad_norm": 0.2752993106842041, "learning_rate": 1e-05, "loss": 0.9463, "step": 59740 }, { "epoch": 52.9185119574845, "grad_norm": 0.269914448261261, "learning_rate": 1e-05, "loss": 0.9764, "step": 59745 }, { "epoch": 52.9229406554473, "grad_norm": 0.22686247527599335, "learning_rate": 1e-05, "loss": 0.958, "step": 59750 }, { "epoch": 52.9273693534101, "grad_norm": 0.2274935394525528, "learning_rate": 1e-05, "loss": 0.9873, "step": 59755 }, { "epoch": 52.931798051372894, "grad_norm": 0.2664961516857147, "learning_rate": 1e-05, "loss": 0.9447, "step": 59760 }, { "epoch": 52.936226749335695, "grad_norm": 0.24603243172168732, "learning_rate": 1e-05, "loss": 0.9559, "step": 59765 }, { "epoch": 52.9406554472985, "grad_norm": 0.2533240020275116, "learning_rate": 1e-05, "loss": 0.9682, "step": 59770 }, { "epoch": 52.94508414526129, "grad_norm": 0.2096920609474182, "learning_rate": 1e-05, "loss": 0.9322, "step": 59775 }, { "epoch": 52.94951284322409, "grad_norm": 0.21883811056613922, "learning_rate": 1e-05, "loss": 1.0038, "step": 59780 }, { "epoch": 52.95394154118689, "grad_norm": 0.23622551560401917, "learning_rate": 1e-05, "loss": 0.9863, "step": 59785 }, { "epoch": 52.95837023914969, "grad_norm": 0.2807080149650574, "learning_rate": 1e-05, "loss": 1.006, "step": 59790 }, { "epoch": 52.96279893711249, "grad_norm": 0.22725415229797363, "learning_rate": 1e-05, "loss": 0.9206, "step": 59795 }, { "epoch": 52.96722763507529, "grad_norm": 0.2504546046257019, "learning_rate": 1e-05, "loss": 0.9794, "step": 59800 }, { "epoch": 52.971656333038084, "grad_norm": 0.2076190710067749, "learning_rate": 1e-05, "loss": 0.9872, "step": 59805 }, { "epoch": 52.976085031000885, "grad_norm": 0.24882617592811584, "learning_rate": 1e-05, "loss": 0.9659, "step": 59810 }, { "epoch": 52.980513728963686, "grad_norm": 0.24668893218040466, "learning_rate": 1e-05, "loss": 0.9918, "step": 59815 }, { "epoch": 52.98494242692648, "grad_norm": 0.23608694970607758, "learning_rate": 1e-05, "loss": 0.9596, "step": 59820 }, { "epoch": 52.98937112488928, "grad_norm": 0.23497037589550018, "learning_rate": 1e-05, "loss": 1.0197, "step": 59825 }, { "epoch": 52.99379982285208, "grad_norm": 0.2623255252838135, "learning_rate": 1e-05, "loss": 0.9442, "step": 59830 }, { "epoch": 52.998228520814884, "grad_norm": 0.2677062749862671, "learning_rate": 1e-05, "loss": 0.9851, "step": 59835 }, { "epoch": 53.00265721877768, "grad_norm": 0.2218473106622696, "learning_rate": 1e-05, "loss": 0.9252, "step": 59840 }, { "epoch": 53.00708591674048, "grad_norm": 0.22999630868434906, "learning_rate": 1e-05, "loss": 0.9691, "step": 59845 }, { "epoch": 53.01151461470328, "grad_norm": 0.22826425731182098, "learning_rate": 1e-05, "loss": 0.9824, "step": 59850 }, { "epoch": 53.015943312666074, "grad_norm": 0.2073640376329422, "learning_rate": 1e-05, "loss": 1.0223, "step": 59855 }, { "epoch": 53.020372010628876, "grad_norm": 0.25264009833335876, "learning_rate": 1e-05, "loss": 0.9659, "step": 59860 }, { "epoch": 53.02480070859168, "grad_norm": 0.2786754369735718, "learning_rate": 1e-05, "loss": 1.0095, "step": 59865 }, { "epoch": 53.02922940655447, "grad_norm": 0.22319111227989197, "learning_rate": 1e-05, "loss": 0.9682, "step": 59870 }, { "epoch": 53.03365810451727, "grad_norm": 0.2614092528820038, "learning_rate": 1e-05, "loss": 0.9543, "step": 59875 }, { "epoch": 53.03808680248007, "grad_norm": 0.25744086503982544, "learning_rate": 1e-05, "loss": 0.9536, "step": 59880 }, { "epoch": 53.04251550044287, "grad_norm": 0.23858335614204407, "learning_rate": 1e-05, "loss": 0.9595, "step": 59885 }, { "epoch": 53.04694419840567, "grad_norm": 0.22827693819999695, "learning_rate": 1e-05, "loss": 0.9789, "step": 59890 }, { "epoch": 53.05137289636847, "grad_norm": 0.24994118511676788, "learning_rate": 1e-05, "loss": 1.0126, "step": 59895 }, { "epoch": 53.055801594331264, "grad_norm": 0.21131977438926697, "learning_rate": 1e-05, "loss": 0.9366, "step": 59900 }, { "epoch": 53.060230292294065, "grad_norm": 0.24249157309532166, "learning_rate": 1e-05, "loss": 1.0076, "step": 59905 }, { "epoch": 53.064658990256866, "grad_norm": 0.24080723524093628, "learning_rate": 1e-05, "loss": 0.9553, "step": 59910 }, { "epoch": 53.06908768821966, "grad_norm": 0.21769703924655914, "learning_rate": 1e-05, "loss": 1.0237, "step": 59915 }, { "epoch": 53.07351638618246, "grad_norm": 0.26333436369895935, "learning_rate": 1e-05, "loss": 1.0053, "step": 59920 }, { "epoch": 53.07794508414526, "grad_norm": 0.229929119348526, "learning_rate": 1e-05, "loss": 0.9253, "step": 59925 }, { "epoch": 53.08237378210806, "grad_norm": 0.2134549915790558, "learning_rate": 1e-05, "loss": 1.0387, "step": 59930 }, { "epoch": 53.08680248007086, "grad_norm": 0.22480005025863647, "learning_rate": 1e-05, "loss": 1.0026, "step": 59935 }, { "epoch": 53.09123117803366, "grad_norm": 0.19069701433181763, "learning_rate": 1e-05, "loss": 0.9989, "step": 59940 }, { "epoch": 53.09565987599645, "grad_norm": 0.23737424612045288, "learning_rate": 1e-05, "loss": 0.9896, "step": 59945 }, { "epoch": 53.100088573959255, "grad_norm": 0.2166704684495926, "learning_rate": 1e-05, "loss": 0.9958, "step": 59950 }, { "epoch": 53.104517271922056, "grad_norm": 0.23745734989643097, "learning_rate": 1e-05, "loss": 1.0161, "step": 59955 }, { "epoch": 53.10894596988486, "grad_norm": 0.23875974118709564, "learning_rate": 1e-05, "loss": 0.9137, "step": 59960 }, { "epoch": 53.11337466784765, "grad_norm": 0.22952696681022644, "learning_rate": 1e-05, "loss": 0.9649, "step": 59965 }, { "epoch": 53.11780336581045, "grad_norm": 0.2571762800216675, "learning_rate": 1e-05, "loss": 0.9535, "step": 59970 }, { "epoch": 53.122232063773254, "grad_norm": 0.23149101436138153, "learning_rate": 1e-05, "loss": 0.9226, "step": 59975 }, { "epoch": 53.12666076173605, "grad_norm": 0.2507995069026947, "learning_rate": 1e-05, "loss": 0.9804, "step": 59980 }, { "epoch": 53.13108945969885, "grad_norm": 0.22844278812408447, "learning_rate": 1e-05, "loss": 1.009, "step": 59985 }, { "epoch": 53.13551815766165, "grad_norm": 0.27623775601387024, "learning_rate": 1e-05, "loss": 0.9873, "step": 59990 }, { "epoch": 53.139946855624444, "grad_norm": 0.234405055642128, "learning_rate": 1e-05, "loss": 0.9945, "step": 59995 }, { "epoch": 53.144375553587246, "grad_norm": 0.23527589440345764, "learning_rate": 1e-05, "loss": 0.9326, "step": 60000 }, { "epoch": 53.14880425155005, "grad_norm": 0.22647306323051453, "learning_rate": 1e-05, "loss": 0.9559, "step": 60005 }, { "epoch": 53.15323294951284, "grad_norm": 0.25990360975265503, "learning_rate": 1e-05, "loss": 0.9663, "step": 60010 }, { "epoch": 53.15766164747564, "grad_norm": 0.2278909683227539, "learning_rate": 1e-05, "loss": 0.9301, "step": 60015 }, { "epoch": 53.16209034543844, "grad_norm": 0.22557573020458221, "learning_rate": 1e-05, "loss": 0.9494, "step": 60020 }, { "epoch": 53.16651904340124, "grad_norm": 0.21799437701702118, "learning_rate": 1e-05, "loss": 0.9626, "step": 60025 }, { "epoch": 53.17094774136404, "grad_norm": 0.22027651965618134, "learning_rate": 1e-05, "loss": 0.9435, "step": 60030 }, { "epoch": 53.17537643932684, "grad_norm": 0.2320096790790558, "learning_rate": 1e-05, "loss": 0.9565, "step": 60035 }, { "epoch": 53.179805137289634, "grad_norm": 0.23430219292640686, "learning_rate": 1e-05, "loss": 0.968, "step": 60040 }, { "epoch": 53.184233835252435, "grad_norm": 0.2303348183631897, "learning_rate": 1e-05, "loss": 0.9539, "step": 60045 }, { "epoch": 53.188662533215236, "grad_norm": 0.20613956451416016, "learning_rate": 1e-05, "loss": 0.9875, "step": 60050 }, { "epoch": 53.19309123117803, "grad_norm": 0.26425671577453613, "learning_rate": 1e-05, "loss": 0.9431, "step": 60055 }, { "epoch": 53.19751992914083, "grad_norm": 0.23431171476840973, "learning_rate": 1e-05, "loss": 0.9657, "step": 60060 }, { "epoch": 53.20194862710363, "grad_norm": 0.2612230181694031, "learning_rate": 1e-05, "loss": 0.9714, "step": 60065 }, { "epoch": 53.20637732506643, "grad_norm": 0.2589799463748932, "learning_rate": 1e-05, "loss": 0.9461, "step": 60070 }, { "epoch": 53.21080602302923, "grad_norm": 0.2895714342594147, "learning_rate": 1e-05, "loss": 0.9636, "step": 60075 }, { "epoch": 53.21523472099203, "grad_norm": 0.26066553592681885, "learning_rate": 1e-05, "loss": 0.9299, "step": 60080 }, { "epoch": 53.21966341895483, "grad_norm": 0.25754573941230774, "learning_rate": 1e-05, "loss": 0.9417, "step": 60085 }, { "epoch": 53.224092116917625, "grad_norm": 0.27685636281967163, "learning_rate": 1e-05, "loss": 1.0261, "step": 60090 }, { "epoch": 53.228520814880426, "grad_norm": 0.23839853703975677, "learning_rate": 1e-05, "loss": 0.9863, "step": 60095 }, { "epoch": 53.23294951284323, "grad_norm": 0.22421537339687347, "learning_rate": 1e-05, "loss": 1.0027, "step": 60100 }, { "epoch": 53.23737821080602, "grad_norm": 0.21918919682502747, "learning_rate": 1e-05, "loss": 0.9167, "step": 60105 }, { "epoch": 53.24180690876882, "grad_norm": 0.2267196774482727, "learning_rate": 1e-05, "loss": 0.9692, "step": 60110 }, { "epoch": 53.246235606731624, "grad_norm": 0.21427926421165466, "learning_rate": 1e-05, "loss": 0.9624, "step": 60115 }, { "epoch": 53.25066430469442, "grad_norm": 0.2027479112148285, "learning_rate": 1e-05, "loss": 0.9942, "step": 60120 }, { "epoch": 53.25509300265722, "grad_norm": 0.22369693219661713, "learning_rate": 1e-05, "loss": 0.9668, "step": 60125 }, { "epoch": 53.25952170062002, "grad_norm": 0.22161203622817993, "learning_rate": 1e-05, "loss": 0.9609, "step": 60130 }, { "epoch": 53.263950398582814, "grad_norm": 0.30367374420166016, "learning_rate": 1e-05, "loss": 0.9735, "step": 60135 }, { "epoch": 53.268379096545615, "grad_norm": 0.22306816279888153, "learning_rate": 1e-05, "loss": 0.9385, "step": 60140 }, { "epoch": 53.27280779450842, "grad_norm": 0.23401185870170593, "learning_rate": 1e-05, "loss": 1.0001, "step": 60145 }, { "epoch": 53.27723649247121, "grad_norm": 0.2387910783290863, "learning_rate": 1e-05, "loss": 0.9892, "step": 60150 }, { "epoch": 53.28166519043401, "grad_norm": 0.2351236492395401, "learning_rate": 1e-05, "loss": 1.0008, "step": 60155 }, { "epoch": 53.28609388839681, "grad_norm": 0.23330087959766388, "learning_rate": 1e-05, "loss": 0.9903, "step": 60160 }, { "epoch": 53.29052258635961, "grad_norm": 0.2882341146469116, "learning_rate": 1e-05, "loss": 0.9355, "step": 60165 }, { "epoch": 53.29495128432241, "grad_norm": 0.23369009792804718, "learning_rate": 1e-05, "loss": 0.9516, "step": 60170 }, { "epoch": 53.29937998228521, "grad_norm": 0.24204441905021667, "learning_rate": 1e-05, "loss": 0.9194, "step": 60175 }, { "epoch": 53.303808680248004, "grad_norm": 0.2050991952419281, "learning_rate": 1e-05, "loss": 1.0372, "step": 60180 }, { "epoch": 53.308237378210805, "grad_norm": 0.2358250766992569, "learning_rate": 1e-05, "loss": 1.0303, "step": 60185 }, { "epoch": 53.312666076173606, "grad_norm": 0.27120256423950195, "learning_rate": 1e-05, "loss": 0.9289, "step": 60190 }, { "epoch": 53.31709477413641, "grad_norm": 0.2972188889980316, "learning_rate": 1e-05, "loss": 1.0248, "step": 60195 }, { "epoch": 53.3215234720992, "grad_norm": 0.24034792184829712, "learning_rate": 1e-05, "loss": 0.9617, "step": 60200 }, { "epoch": 53.325952170062, "grad_norm": 0.19845075905323029, "learning_rate": 1e-05, "loss": 0.9514, "step": 60205 }, { "epoch": 53.330380868024804, "grad_norm": 0.23408180475234985, "learning_rate": 1e-05, "loss": 0.9937, "step": 60210 }, { "epoch": 53.3348095659876, "grad_norm": 0.20962047576904297, "learning_rate": 1e-05, "loss": 0.9789, "step": 60215 }, { "epoch": 53.3392382639504, "grad_norm": 0.25279760360717773, "learning_rate": 1e-05, "loss": 1.0162, "step": 60220 }, { "epoch": 53.3436669619132, "grad_norm": 0.26563024520874023, "learning_rate": 1e-05, "loss": 0.9703, "step": 60225 }, { "epoch": 53.348095659875995, "grad_norm": 0.27955740690231323, "learning_rate": 1e-05, "loss": 0.9142, "step": 60230 }, { "epoch": 53.352524357838796, "grad_norm": 0.26782798767089844, "learning_rate": 1e-05, "loss": 0.9064, "step": 60235 }, { "epoch": 53.3569530558016, "grad_norm": 0.2246531993150711, "learning_rate": 1e-05, "loss": 0.982, "step": 60240 }, { "epoch": 53.36138175376439, "grad_norm": 0.2505984902381897, "learning_rate": 1e-05, "loss": 0.9635, "step": 60245 }, { "epoch": 53.36581045172719, "grad_norm": 0.21696841716766357, "learning_rate": 1e-05, "loss": 0.8989, "step": 60250 }, { "epoch": 53.37023914968999, "grad_norm": 0.2331947237253189, "learning_rate": 1e-05, "loss": 0.9692, "step": 60255 }, { "epoch": 53.37466784765279, "grad_norm": 0.24772530794143677, "learning_rate": 1e-05, "loss": 0.9932, "step": 60260 }, { "epoch": 53.37909654561559, "grad_norm": 0.23740357160568237, "learning_rate": 1e-05, "loss": 0.9641, "step": 60265 }, { "epoch": 53.38352524357839, "grad_norm": 0.21484477818012238, "learning_rate": 1e-05, "loss": 0.9581, "step": 60270 }, { "epoch": 53.387953941541184, "grad_norm": 0.26013872027397156, "learning_rate": 1e-05, "loss": 0.9516, "step": 60275 }, { "epoch": 53.392382639503985, "grad_norm": 0.23608742654323578, "learning_rate": 1e-05, "loss": 0.9933, "step": 60280 }, { "epoch": 53.39681133746679, "grad_norm": 0.24695591628551483, "learning_rate": 1e-05, "loss": 0.9735, "step": 60285 }, { "epoch": 53.40124003542958, "grad_norm": 0.22766916453838348, "learning_rate": 1e-05, "loss": 0.9666, "step": 60290 }, { "epoch": 53.40566873339238, "grad_norm": 0.26797938346862793, "learning_rate": 1e-05, "loss": 0.9817, "step": 60295 }, { "epoch": 53.41009743135518, "grad_norm": 0.21601247787475586, "learning_rate": 1e-05, "loss": 0.9669, "step": 60300 }, { "epoch": 53.41452612931798, "grad_norm": 0.2364301234483719, "learning_rate": 1e-05, "loss": 0.9484, "step": 60305 }, { "epoch": 53.41895482728078, "grad_norm": 0.22183459997177124, "learning_rate": 1e-05, "loss": 0.9637, "step": 60310 }, { "epoch": 53.42338352524358, "grad_norm": 0.2335205227136612, "learning_rate": 1e-05, "loss": 0.9617, "step": 60315 }, { "epoch": 53.42781222320638, "grad_norm": 0.2170056849718094, "learning_rate": 1e-05, "loss": 0.9996, "step": 60320 }, { "epoch": 53.432240921169175, "grad_norm": 0.23669470846652985, "learning_rate": 1e-05, "loss": 1.0328, "step": 60325 }, { "epoch": 53.436669619131976, "grad_norm": 0.27642059326171875, "learning_rate": 1e-05, "loss": 0.9249, "step": 60330 }, { "epoch": 53.44109831709478, "grad_norm": 0.21286150813102722, "learning_rate": 1e-05, "loss": 0.9524, "step": 60335 }, { "epoch": 53.44552701505757, "grad_norm": 0.22230182588100433, "learning_rate": 1e-05, "loss": 0.9978, "step": 60340 }, { "epoch": 53.44995571302037, "grad_norm": 0.24709220230579376, "learning_rate": 1e-05, "loss": 0.9892, "step": 60345 }, { "epoch": 53.454384410983174, "grad_norm": 0.22660088539123535, "learning_rate": 1e-05, "loss": 0.9568, "step": 60350 }, { "epoch": 53.45881310894597, "grad_norm": 0.24670866131782532, "learning_rate": 1e-05, "loss": 0.9234, "step": 60355 }, { "epoch": 53.46324180690877, "grad_norm": 0.239283949136734, "learning_rate": 1e-05, "loss": 0.979, "step": 60360 }, { "epoch": 53.46767050487157, "grad_norm": 0.243422731757164, "learning_rate": 1e-05, "loss": 1.0216, "step": 60365 }, { "epoch": 53.472099202834364, "grad_norm": 0.2267739623785019, "learning_rate": 1e-05, "loss": 0.9235, "step": 60370 }, { "epoch": 53.476527900797166, "grad_norm": 0.23952731490135193, "learning_rate": 1e-05, "loss": 0.9669, "step": 60375 }, { "epoch": 53.48095659875997, "grad_norm": 0.24978460371494293, "learning_rate": 1e-05, "loss": 0.9792, "step": 60380 }, { "epoch": 53.48538529672276, "grad_norm": 0.20844578742980957, "learning_rate": 1e-05, "loss": 0.9457, "step": 60385 }, { "epoch": 53.48981399468556, "grad_norm": 0.26046037673950195, "learning_rate": 1e-05, "loss": 1.0156, "step": 60390 }, { "epoch": 53.49424269264836, "grad_norm": 0.2372545599937439, "learning_rate": 1e-05, "loss": 0.9682, "step": 60395 }, { "epoch": 53.49867139061116, "grad_norm": 0.21985235810279846, "learning_rate": 1e-05, "loss": 0.9567, "step": 60400 }, { "epoch": 53.50310008857396, "grad_norm": 0.24295611679553986, "learning_rate": 1e-05, "loss": 0.9382, "step": 60405 }, { "epoch": 53.50752878653676, "grad_norm": 0.23884452879428864, "learning_rate": 1e-05, "loss": 0.9631, "step": 60410 }, { "epoch": 53.511957484499554, "grad_norm": 0.21319353580474854, "learning_rate": 1e-05, "loss": 0.978, "step": 60415 }, { "epoch": 53.516386182462355, "grad_norm": 0.2133975625038147, "learning_rate": 1e-05, "loss": 0.9939, "step": 60420 }, { "epoch": 53.520814880425156, "grad_norm": 0.2694464325904846, "learning_rate": 1e-05, "loss": 0.9645, "step": 60425 }, { "epoch": 53.52524357838795, "grad_norm": 0.24523159861564636, "learning_rate": 1e-05, "loss": 0.9173, "step": 60430 }, { "epoch": 53.52967227635075, "grad_norm": 0.24195018410682678, "learning_rate": 1e-05, "loss": 0.9947, "step": 60435 }, { "epoch": 53.53410097431355, "grad_norm": 0.22272612154483795, "learning_rate": 1e-05, "loss": 0.9909, "step": 60440 }, { "epoch": 53.538529672276354, "grad_norm": 0.22450736165046692, "learning_rate": 1e-05, "loss": 0.9785, "step": 60445 }, { "epoch": 53.54295837023915, "grad_norm": 0.26652538776397705, "learning_rate": 1e-05, "loss": 0.9779, "step": 60450 }, { "epoch": 53.54738706820195, "grad_norm": 0.2680169939994812, "learning_rate": 1e-05, "loss": 0.9749, "step": 60455 }, { "epoch": 53.55181576616475, "grad_norm": 0.21304230391979218, "learning_rate": 1e-05, "loss": 0.9487, "step": 60460 }, { "epoch": 53.556244464127545, "grad_norm": 0.2085220366716385, "learning_rate": 1e-05, "loss": 0.953, "step": 60465 }, { "epoch": 53.560673162090346, "grad_norm": 0.21584953367710114, "learning_rate": 1e-05, "loss": 0.9663, "step": 60470 }, { "epoch": 53.56510186005315, "grad_norm": 0.2310914248228073, "learning_rate": 1e-05, "loss": 1.0239, "step": 60475 }, { "epoch": 53.56953055801594, "grad_norm": 0.21490155160427094, "learning_rate": 1e-05, "loss": 1.019, "step": 60480 }, { "epoch": 53.57395925597874, "grad_norm": 0.2255762219429016, "learning_rate": 1e-05, "loss": 1.0001, "step": 60485 }, { "epoch": 53.578387953941544, "grad_norm": 0.21962149441242218, "learning_rate": 1e-05, "loss": 1.0087, "step": 60490 }, { "epoch": 53.58281665190434, "grad_norm": 0.2350894659757614, "learning_rate": 1e-05, "loss": 0.9873, "step": 60495 }, { "epoch": 53.58724534986714, "grad_norm": 0.2455650269985199, "learning_rate": 1e-05, "loss": 0.9881, "step": 60500 }, { "epoch": 53.59167404782994, "grad_norm": 0.29344671964645386, "learning_rate": 1e-05, "loss": 0.9878, "step": 60505 }, { "epoch": 53.596102745792734, "grad_norm": 0.2824232280254364, "learning_rate": 1e-05, "loss": 0.932, "step": 60510 }, { "epoch": 53.600531443755536, "grad_norm": 0.20357902348041534, "learning_rate": 1e-05, "loss": 0.9847, "step": 60515 }, { "epoch": 53.60496014171834, "grad_norm": 0.23408834636211395, "learning_rate": 1e-05, "loss": 0.9727, "step": 60520 }, { "epoch": 53.60938883968113, "grad_norm": 0.23880112171173096, "learning_rate": 1e-05, "loss": 0.9575, "step": 60525 }, { "epoch": 53.61381753764393, "grad_norm": 0.24110502004623413, "learning_rate": 1e-05, "loss": 1.0044, "step": 60530 }, { "epoch": 53.61824623560673, "grad_norm": 0.2096918523311615, "learning_rate": 1e-05, "loss": 1.0031, "step": 60535 }, { "epoch": 53.62267493356953, "grad_norm": 0.2138698846101761, "learning_rate": 1e-05, "loss": 0.9405, "step": 60540 }, { "epoch": 53.62710363153233, "grad_norm": 0.23307427763938904, "learning_rate": 1e-05, "loss": 0.9512, "step": 60545 }, { "epoch": 53.63153232949513, "grad_norm": 0.26484254002571106, "learning_rate": 1e-05, "loss": 0.9709, "step": 60550 }, { "epoch": 53.635961027457924, "grad_norm": 0.23737332224845886, "learning_rate": 1e-05, "loss": 0.9863, "step": 60555 }, { "epoch": 53.640389725420725, "grad_norm": 0.20891714096069336, "learning_rate": 1e-05, "loss": 0.9611, "step": 60560 }, { "epoch": 53.644818423383526, "grad_norm": 0.23625357449054718, "learning_rate": 1e-05, "loss": 1.0014, "step": 60565 }, { "epoch": 53.64924712134633, "grad_norm": 0.22194480895996094, "learning_rate": 1e-05, "loss": 0.9776, "step": 60570 }, { "epoch": 53.65367581930912, "grad_norm": 0.23048807680606842, "learning_rate": 1e-05, "loss": 0.9723, "step": 60575 }, { "epoch": 53.65810451727192, "grad_norm": 0.23959128558635712, "learning_rate": 1e-05, "loss": 0.929, "step": 60580 }, { "epoch": 53.662533215234724, "grad_norm": 0.23040126264095306, "learning_rate": 1e-05, "loss": 0.98, "step": 60585 }, { "epoch": 53.66696191319752, "grad_norm": 0.2538955509662628, "learning_rate": 1e-05, "loss": 0.9412, "step": 60590 }, { "epoch": 53.67139061116032, "grad_norm": 0.2324790209531784, "learning_rate": 1e-05, "loss": 0.9742, "step": 60595 }, { "epoch": 53.67581930912312, "grad_norm": 0.19082011282444, "learning_rate": 1e-05, "loss": 1.0353, "step": 60600 }, { "epoch": 53.680248007085915, "grad_norm": 0.22211992740631104, "learning_rate": 1e-05, "loss": 0.977, "step": 60605 }, { "epoch": 53.684676705048716, "grad_norm": 0.22224803268909454, "learning_rate": 1e-05, "loss": 1.015, "step": 60610 }, { "epoch": 53.68910540301152, "grad_norm": 0.2658023536205292, "learning_rate": 1e-05, "loss": 0.9698, "step": 60615 }, { "epoch": 53.69353410097431, "grad_norm": 0.22065530717372894, "learning_rate": 1e-05, "loss": 0.951, "step": 60620 }, { "epoch": 53.69796279893711, "grad_norm": 0.2552250027656555, "learning_rate": 1e-05, "loss": 0.9515, "step": 60625 }, { "epoch": 53.702391496899914, "grad_norm": 0.23224309086799622, "learning_rate": 1e-05, "loss": 0.9517, "step": 60630 }, { "epoch": 53.70682019486271, "grad_norm": 0.2216968834400177, "learning_rate": 1e-05, "loss": 0.9621, "step": 60635 }, { "epoch": 53.71124889282551, "grad_norm": 0.25914743542671204, "learning_rate": 1e-05, "loss": 0.9714, "step": 60640 }, { "epoch": 53.71567759078831, "grad_norm": 0.27898186445236206, "learning_rate": 1e-05, "loss": 1.0263, "step": 60645 }, { "epoch": 53.720106288751104, "grad_norm": 0.2800960838794708, "learning_rate": 1e-05, "loss": 0.9848, "step": 60650 }, { "epoch": 53.724534986713905, "grad_norm": 0.230106383562088, "learning_rate": 1e-05, "loss": 0.9505, "step": 60655 }, { "epoch": 53.72896368467671, "grad_norm": 0.23093128204345703, "learning_rate": 1e-05, "loss": 1.0294, "step": 60660 }, { "epoch": 53.7333923826395, "grad_norm": 0.2400493025779724, "learning_rate": 1e-05, "loss": 0.9271, "step": 60665 }, { "epoch": 53.7378210806023, "grad_norm": 0.2102871537208557, "learning_rate": 1e-05, "loss": 0.994, "step": 60670 }, { "epoch": 53.7422497785651, "grad_norm": 0.2573719620704651, "learning_rate": 1e-05, "loss": 0.9199, "step": 60675 }, { "epoch": 53.7466784765279, "grad_norm": 0.24573856592178345, "learning_rate": 1e-05, "loss": 0.9081, "step": 60680 }, { "epoch": 53.7511071744907, "grad_norm": 0.24206885695457458, "learning_rate": 1e-05, "loss": 0.9544, "step": 60685 }, { "epoch": 53.7555358724535, "grad_norm": 0.2640511691570282, "learning_rate": 1e-05, "loss": 0.9913, "step": 60690 }, { "epoch": 53.7599645704163, "grad_norm": 0.2531708776950836, "learning_rate": 1e-05, "loss": 0.9378, "step": 60695 }, { "epoch": 53.764393268379095, "grad_norm": 0.24144858121871948, "learning_rate": 1e-05, "loss": 0.9903, "step": 60700 }, { "epoch": 53.768821966341896, "grad_norm": 0.2880392372608185, "learning_rate": 1e-05, "loss": 0.9875, "step": 60705 }, { "epoch": 53.7732506643047, "grad_norm": 0.2411138117313385, "learning_rate": 1e-05, "loss": 0.9212, "step": 60710 }, { "epoch": 53.77767936226749, "grad_norm": 0.3010050356388092, "learning_rate": 1e-05, "loss": 0.9402, "step": 60715 }, { "epoch": 53.78210806023029, "grad_norm": 0.21168889105319977, "learning_rate": 1e-05, "loss": 0.9489, "step": 60720 }, { "epoch": 53.786536758193094, "grad_norm": 0.24816155433654785, "learning_rate": 1e-05, "loss": 0.9455, "step": 60725 }, { "epoch": 53.79096545615589, "grad_norm": 0.30651214718818665, "learning_rate": 1e-05, "loss": 0.9605, "step": 60730 }, { "epoch": 53.79539415411869, "grad_norm": 0.27157482504844666, "learning_rate": 1e-05, "loss": 0.9528, "step": 60735 }, { "epoch": 53.79982285208149, "grad_norm": 0.2219117134809494, "learning_rate": 1e-05, "loss": 0.9507, "step": 60740 }, { "epoch": 53.804251550044285, "grad_norm": 0.26084062457084656, "learning_rate": 1e-05, "loss": 0.9563, "step": 60745 }, { "epoch": 53.808680248007086, "grad_norm": 0.2546119689941406, "learning_rate": 1e-05, "loss": 1.0296, "step": 60750 }, { "epoch": 53.81310894596989, "grad_norm": 0.23620547354221344, "learning_rate": 1e-05, "loss": 0.9611, "step": 60755 }, { "epoch": 53.81753764393268, "grad_norm": 0.277325838804245, "learning_rate": 1e-05, "loss": 0.984, "step": 60760 }, { "epoch": 53.82196634189548, "grad_norm": 0.25658735632896423, "learning_rate": 1e-05, "loss": 0.9188, "step": 60765 }, { "epoch": 53.826395039858284, "grad_norm": 0.24100135266780853, "learning_rate": 1e-05, "loss": 0.9786, "step": 60770 }, { "epoch": 53.83082373782108, "grad_norm": 0.2314073145389557, "learning_rate": 1e-05, "loss": 0.9721, "step": 60775 }, { "epoch": 53.83525243578388, "grad_norm": 0.25841328501701355, "learning_rate": 1e-05, "loss": 0.9945, "step": 60780 }, { "epoch": 53.83968113374668, "grad_norm": 0.24181579053401947, "learning_rate": 1e-05, "loss": 0.934, "step": 60785 }, { "epoch": 53.844109831709474, "grad_norm": 0.21728689968585968, "learning_rate": 1e-05, "loss": 0.963, "step": 60790 }, { "epoch": 53.848538529672275, "grad_norm": 0.22766830027103424, "learning_rate": 1e-05, "loss": 0.9249, "step": 60795 }, { "epoch": 53.85296722763508, "grad_norm": 0.22813519835472107, "learning_rate": 1e-05, "loss": 1.0012, "step": 60800 }, { "epoch": 53.85739592559787, "grad_norm": 0.2287793606519699, "learning_rate": 1e-05, "loss": 1.0034, "step": 60805 }, { "epoch": 53.86182462356067, "grad_norm": 0.2280627191066742, "learning_rate": 1e-05, "loss": 0.9751, "step": 60810 }, { "epoch": 53.86625332152347, "grad_norm": 0.23534254729747772, "learning_rate": 1e-05, "loss": 0.987, "step": 60815 }, { "epoch": 53.870682019486274, "grad_norm": 0.20020708441734314, "learning_rate": 1e-05, "loss": 0.9908, "step": 60820 }, { "epoch": 53.87511071744907, "grad_norm": 0.2288442701101303, "learning_rate": 1e-05, "loss": 0.9937, "step": 60825 }, { "epoch": 53.87953941541187, "grad_norm": 0.2380603700876236, "learning_rate": 1e-05, "loss": 0.954, "step": 60830 }, { "epoch": 53.88396811337467, "grad_norm": 0.2640037536621094, "learning_rate": 1e-05, "loss": 0.9705, "step": 60835 }, { "epoch": 53.888396811337465, "grad_norm": 0.2480287402868271, "learning_rate": 1e-05, "loss": 1.0181, "step": 60840 }, { "epoch": 53.892825509300266, "grad_norm": 0.21241746842861176, "learning_rate": 1e-05, "loss": 0.935, "step": 60845 }, { "epoch": 53.89725420726307, "grad_norm": 0.22319309413433075, "learning_rate": 1e-05, "loss": 0.9704, "step": 60850 }, { "epoch": 53.90168290522586, "grad_norm": 0.21784153580665588, "learning_rate": 1e-05, "loss": 0.939, "step": 60855 }, { "epoch": 53.90611160318866, "grad_norm": 0.23020143806934357, "learning_rate": 1e-05, "loss": 0.9871, "step": 60860 }, { "epoch": 53.910540301151464, "grad_norm": 0.24832095205783844, "learning_rate": 1e-05, "loss": 1.0043, "step": 60865 }, { "epoch": 53.91496899911426, "grad_norm": 0.19770771265029907, "learning_rate": 1e-05, "loss": 0.9511, "step": 60870 }, { "epoch": 53.91939769707706, "grad_norm": 0.25580593943595886, "learning_rate": 1e-05, "loss": 0.9768, "step": 60875 }, { "epoch": 53.92382639503986, "grad_norm": 0.2854255735874176, "learning_rate": 1e-05, "loss": 0.9855, "step": 60880 }, { "epoch": 53.928255093002655, "grad_norm": 0.24428914487361908, "learning_rate": 1e-05, "loss": 0.9672, "step": 60885 }, { "epoch": 53.932683790965456, "grad_norm": 0.2942395508289337, "learning_rate": 1e-05, "loss": 1.0376, "step": 60890 }, { "epoch": 53.93711248892826, "grad_norm": 0.28321489691734314, "learning_rate": 1e-05, "loss": 0.9694, "step": 60895 }, { "epoch": 53.94154118689105, "grad_norm": 0.24364061653614044, "learning_rate": 1e-05, "loss": 0.9447, "step": 60900 }, { "epoch": 53.94596988485385, "grad_norm": 0.22843430936336517, "learning_rate": 1e-05, "loss": 0.9592, "step": 60905 }, { "epoch": 53.95039858281665, "grad_norm": 0.2668422758579254, "learning_rate": 1e-05, "loss": 0.968, "step": 60910 }, { "epoch": 53.95482728077945, "grad_norm": 0.2423567920923233, "learning_rate": 1e-05, "loss": 0.9082, "step": 60915 }, { "epoch": 53.95925597874225, "grad_norm": 0.21341903507709503, "learning_rate": 1e-05, "loss": 1.0029, "step": 60920 }, { "epoch": 53.96368467670505, "grad_norm": 0.20688697695732117, "learning_rate": 1e-05, "loss": 0.9819, "step": 60925 }, { "epoch": 53.96811337466785, "grad_norm": 0.2544124722480774, "learning_rate": 1e-05, "loss": 0.9276, "step": 60930 }, { "epoch": 53.972542072630645, "grad_norm": 0.2535913586616516, "learning_rate": 1e-05, "loss": 0.9414, "step": 60935 }, { "epoch": 53.97697077059345, "grad_norm": 0.2406681627035141, "learning_rate": 1e-05, "loss": 0.9725, "step": 60940 }, { "epoch": 53.98139946855625, "grad_norm": 0.18434345722198486, "learning_rate": 1e-05, "loss": 0.9084, "step": 60945 }, { "epoch": 53.98582816651904, "grad_norm": 0.23859114944934845, "learning_rate": 1e-05, "loss": 0.9657, "step": 60950 }, { "epoch": 53.99025686448184, "grad_norm": 0.26423484086990356, "learning_rate": 1e-05, "loss": 0.938, "step": 60955 }, { "epoch": 53.994685562444644, "grad_norm": 0.2631922662258148, "learning_rate": 1e-05, "loss": 0.9301, "step": 60960 }, { "epoch": 53.99911426040744, "grad_norm": 0.24404844641685486, "learning_rate": 1e-05, "loss": 0.9588, "step": 60965 }, { "epoch": 54.00354295837024, "grad_norm": 0.22955921292304993, "learning_rate": 1e-05, "loss": 0.9285, "step": 60970 }, { "epoch": 54.00797165633304, "grad_norm": 0.22842659056186676, "learning_rate": 1e-05, "loss": 0.9815, "step": 60975 }, { "epoch": 54.012400354295835, "grad_norm": 0.25447097420692444, "learning_rate": 1e-05, "loss": 1.0037, "step": 60980 }, { "epoch": 54.016829052258636, "grad_norm": 0.28639480471611023, "learning_rate": 1e-05, "loss": 0.944, "step": 60985 }, { "epoch": 54.02125775022144, "grad_norm": 0.2449595332145691, "learning_rate": 1e-05, "loss": 0.9874, "step": 60990 }, { "epoch": 54.02568644818423, "grad_norm": 0.29549911618232727, "learning_rate": 1e-05, "loss": 0.9509, "step": 60995 }, { "epoch": 54.03011514614703, "grad_norm": 0.26542067527770996, "learning_rate": 1e-05, "loss": 1.0195, "step": 61000 }, { "epoch": 54.034543844109834, "grad_norm": 0.25493472814559937, "learning_rate": 1e-05, "loss": 0.9809, "step": 61005 }, { "epoch": 54.03897254207263, "grad_norm": 0.24698977172374725, "learning_rate": 1e-05, "loss": 0.9709, "step": 61010 }, { "epoch": 54.04340124003543, "grad_norm": 0.2290639728307724, "learning_rate": 1e-05, "loss": 0.9673, "step": 61015 }, { "epoch": 54.04782993799823, "grad_norm": 0.2510537803173065, "learning_rate": 1e-05, "loss": 0.9422, "step": 61020 }, { "epoch": 54.052258635961024, "grad_norm": 0.24928176403045654, "learning_rate": 1e-05, "loss": 0.9496, "step": 61025 }, { "epoch": 54.056687333923826, "grad_norm": 0.2595762312412262, "learning_rate": 1e-05, "loss": 1.003, "step": 61030 }, { "epoch": 54.06111603188663, "grad_norm": 0.22515444457530975, "learning_rate": 1e-05, "loss": 0.9889, "step": 61035 }, { "epoch": 54.06554472984942, "grad_norm": 0.23509569466114044, "learning_rate": 1e-05, "loss": 0.9121, "step": 61040 }, { "epoch": 54.06997342781222, "grad_norm": 0.2688516676425934, "learning_rate": 1e-05, "loss": 0.9292, "step": 61045 }, { "epoch": 54.07440212577502, "grad_norm": 0.21259772777557373, "learning_rate": 1e-05, "loss": 0.9947, "step": 61050 }, { "epoch": 54.078830823737825, "grad_norm": 0.22343476116657257, "learning_rate": 1e-05, "loss": 0.9887, "step": 61055 }, { "epoch": 54.08325952170062, "grad_norm": 0.210588738322258, "learning_rate": 1e-05, "loss": 0.9647, "step": 61060 }, { "epoch": 54.08768821966342, "grad_norm": 0.2256932258605957, "learning_rate": 1e-05, "loss": 0.9893, "step": 61065 }, { "epoch": 54.09211691762622, "grad_norm": 0.29084935784339905, "learning_rate": 1e-05, "loss": 0.9959, "step": 61070 }, { "epoch": 54.096545615589015, "grad_norm": 0.28121477365493774, "learning_rate": 1e-05, "loss": 1.0035, "step": 61075 }, { "epoch": 54.100974313551816, "grad_norm": 0.3363478481769562, "learning_rate": 1e-05, "loss": 1.0254, "step": 61080 }, { "epoch": 54.10540301151462, "grad_norm": 0.2818223237991333, "learning_rate": 1e-05, "loss": 0.9742, "step": 61085 }, { "epoch": 54.10983170947741, "grad_norm": 0.21597203612327576, "learning_rate": 1e-05, "loss": 0.9076, "step": 61090 }, { "epoch": 54.11426040744021, "grad_norm": 0.2366877943277359, "learning_rate": 1e-05, "loss": 0.9963, "step": 61095 }, { "epoch": 54.118689105403014, "grad_norm": 0.22500531375408173, "learning_rate": 1e-05, "loss": 0.9684, "step": 61100 }, { "epoch": 54.12311780336581, "grad_norm": 0.2209455519914627, "learning_rate": 1e-05, "loss": 0.9551, "step": 61105 }, { "epoch": 54.12754650132861, "grad_norm": 0.23634248971939087, "learning_rate": 1e-05, "loss": 0.9671, "step": 61110 }, { "epoch": 54.13197519929141, "grad_norm": 0.25187307596206665, "learning_rate": 1e-05, "loss": 0.9767, "step": 61115 }, { "epoch": 54.136403897254205, "grad_norm": 0.273495614528656, "learning_rate": 1e-05, "loss": 0.9899, "step": 61120 }, { "epoch": 54.140832595217006, "grad_norm": 0.3204086124897003, "learning_rate": 1e-05, "loss": 0.9697, "step": 61125 }, { "epoch": 54.14526129317981, "grad_norm": 0.3518579304218292, "learning_rate": 1e-05, "loss": 0.9582, "step": 61130 }, { "epoch": 54.1496899911426, "grad_norm": 0.3865751028060913, "learning_rate": 1e-05, "loss": 0.9612, "step": 61135 }, { "epoch": 54.1541186891054, "grad_norm": 0.22252608835697174, "learning_rate": 1e-05, "loss": 0.9854, "step": 61140 }, { "epoch": 54.158547387068204, "grad_norm": 0.2905064523220062, "learning_rate": 1e-05, "loss": 0.9654, "step": 61145 }, { "epoch": 54.162976085031, "grad_norm": 0.22203530371189117, "learning_rate": 1e-05, "loss": 0.9968, "step": 61150 }, { "epoch": 54.1674047829938, "grad_norm": 0.2612822353839874, "learning_rate": 1e-05, "loss": 1.0321, "step": 61155 }, { "epoch": 54.1718334809566, "grad_norm": 0.23783627152442932, "learning_rate": 1e-05, "loss": 0.9321, "step": 61160 }, { "epoch": 54.176262178919394, "grad_norm": 0.2373826652765274, "learning_rate": 1e-05, "loss": 0.9691, "step": 61165 }, { "epoch": 54.180690876882196, "grad_norm": 0.2468205988407135, "learning_rate": 1e-05, "loss": 0.9776, "step": 61170 }, { "epoch": 54.185119574845, "grad_norm": 0.2272808998823166, "learning_rate": 1e-05, "loss": 0.9742, "step": 61175 }, { "epoch": 54.1895482728078, "grad_norm": 0.27046507596969604, "learning_rate": 1e-05, "loss": 0.9948, "step": 61180 }, { "epoch": 54.19397697077059, "grad_norm": 0.23064535856246948, "learning_rate": 1e-05, "loss": 0.9697, "step": 61185 }, { "epoch": 54.19840566873339, "grad_norm": 0.22172294557094574, "learning_rate": 1e-05, "loss": 0.9622, "step": 61190 }, { "epoch": 54.202834366696194, "grad_norm": 0.226271390914917, "learning_rate": 1e-05, "loss": 0.9065, "step": 61195 }, { "epoch": 54.20726306465899, "grad_norm": 0.22542870044708252, "learning_rate": 1e-05, "loss": 1.0119, "step": 61200 }, { "epoch": 54.21169176262179, "grad_norm": 0.22811514139175415, "learning_rate": 1e-05, "loss": 0.992, "step": 61205 }, { "epoch": 54.21612046058459, "grad_norm": 0.26387643814086914, "learning_rate": 1e-05, "loss": 0.967, "step": 61210 }, { "epoch": 54.220549158547385, "grad_norm": 0.23597018420696259, "learning_rate": 1e-05, "loss": 0.9543, "step": 61215 }, { "epoch": 54.224977856510186, "grad_norm": 0.2678360939025879, "learning_rate": 1e-05, "loss": 0.9737, "step": 61220 }, { "epoch": 54.22940655447299, "grad_norm": 0.2404780387878418, "learning_rate": 1e-05, "loss": 0.9351, "step": 61225 }, { "epoch": 54.23383525243578, "grad_norm": 0.2540590167045593, "learning_rate": 1e-05, "loss": 0.9646, "step": 61230 }, { "epoch": 54.23826395039858, "grad_norm": 0.24822445213794708, "learning_rate": 1e-05, "loss": 0.9713, "step": 61235 }, { "epoch": 54.242692648361384, "grad_norm": 0.23118668794631958, "learning_rate": 1e-05, "loss": 0.9948, "step": 61240 }, { "epoch": 54.24712134632418, "grad_norm": 0.24964898824691772, "learning_rate": 1e-05, "loss": 1.0007, "step": 61245 }, { "epoch": 54.25155004428698, "grad_norm": 0.22792859375476837, "learning_rate": 1e-05, "loss": 0.9881, "step": 61250 }, { "epoch": 54.25597874224978, "grad_norm": 0.22979103028774261, "learning_rate": 1e-05, "loss": 0.9782, "step": 61255 }, { "epoch": 54.260407440212575, "grad_norm": 0.2890489399433136, "learning_rate": 1e-05, "loss": 0.9211, "step": 61260 }, { "epoch": 54.264836138175376, "grad_norm": 0.2525692582130432, "learning_rate": 1e-05, "loss": 1.0305, "step": 61265 }, { "epoch": 54.26926483613818, "grad_norm": 0.24704942107200623, "learning_rate": 1e-05, "loss": 0.9546, "step": 61270 }, { "epoch": 54.27369353410097, "grad_norm": 0.2595321834087372, "learning_rate": 1e-05, "loss": 0.9927, "step": 61275 }, { "epoch": 54.27812223206377, "grad_norm": 0.22018831968307495, "learning_rate": 1e-05, "loss": 0.9939, "step": 61280 }, { "epoch": 54.282550930026574, "grad_norm": 0.2448827475309372, "learning_rate": 1e-05, "loss": 0.9174, "step": 61285 }, { "epoch": 54.28697962798937, "grad_norm": 0.2333334982395172, "learning_rate": 1e-05, "loss": 0.9601, "step": 61290 }, { "epoch": 54.29140832595217, "grad_norm": 0.22767828404903412, "learning_rate": 1e-05, "loss": 0.9334, "step": 61295 }, { "epoch": 54.29583702391497, "grad_norm": 0.23467841744422913, "learning_rate": 1e-05, "loss": 0.9641, "step": 61300 }, { "epoch": 54.30026572187777, "grad_norm": 0.2241162359714508, "learning_rate": 1e-05, "loss": 1.0135, "step": 61305 }, { "epoch": 54.304694419840565, "grad_norm": 0.233202263712883, "learning_rate": 1e-05, "loss": 0.9912, "step": 61310 }, { "epoch": 54.30912311780337, "grad_norm": 0.21783636510372162, "learning_rate": 1e-05, "loss": 0.9361, "step": 61315 }, { "epoch": 54.31355181576617, "grad_norm": 0.2490084171295166, "learning_rate": 1e-05, "loss": 0.9487, "step": 61320 }, { "epoch": 54.31798051372896, "grad_norm": 0.21604113280773163, "learning_rate": 1e-05, "loss": 0.9943, "step": 61325 }, { "epoch": 54.32240921169176, "grad_norm": 0.21787811815738678, "learning_rate": 1e-05, "loss": 0.9973, "step": 61330 }, { "epoch": 54.326837909654564, "grad_norm": 0.2443985939025879, "learning_rate": 1e-05, "loss": 0.9403, "step": 61335 }, { "epoch": 54.33126660761736, "grad_norm": 0.23070906102657318, "learning_rate": 1e-05, "loss": 0.9406, "step": 61340 }, { "epoch": 54.33569530558016, "grad_norm": 0.23045994341373444, "learning_rate": 1e-05, "loss": 0.9708, "step": 61345 }, { "epoch": 54.34012400354296, "grad_norm": 0.27131208777427673, "learning_rate": 1e-05, "loss": 0.9886, "step": 61350 }, { "epoch": 54.344552701505755, "grad_norm": 0.2660194933414459, "learning_rate": 1e-05, "loss": 0.9576, "step": 61355 }, { "epoch": 54.348981399468556, "grad_norm": 0.23644393682479858, "learning_rate": 1e-05, "loss": 0.9552, "step": 61360 }, { "epoch": 54.35341009743136, "grad_norm": 0.26887020468711853, "learning_rate": 1e-05, "loss": 0.9651, "step": 61365 }, { "epoch": 54.35783879539415, "grad_norm": 0.2516176104545593, "learning_rate": 1e-05, "loss": 0.9641, "step": 61370 }, { "epoch": 54.36226749335695, "grad_norm": 0.21114002168178558, "learning_rate": 1e-05, "loss": 0.9469, "step": 61375 }, { "epoch": 54.366696191319754, "grad_norm": 0.25534120202064514, "learning_rate": 1e-05, "loss": 0.9424, "step": 61380 }, { "epoch": 54.37112488928255, "grad_norm": 0.21948450803756714, "learning_rate": 1e-05, "loss": 0.9308, "step": 61385 }, { "epoch": 54.37555358724535, "grad_norm": 0.21734066307544708, "learning_rate": 1e-05, "loss": 0.9442, "step": 61390 }, { "epoch": 54.37998228520815, "grad_norm": 0.25333961844444275, "learning_rate": 1e-05, "loss": 1.0166, "step": 61395 }, { "epoch": 54.384410983170945, "grad_norm": 0.21795569360256195, "learning_rate": 1e-05, "loss": 1.0022, "step": 61400 }, { "epoch": 54.388839681133746, "grad_norm": 0.26207274198532104, "learning_rate": 1e-05, "loss": 1.0238, "step": 61405 }, { "epoch": 54.39326837909655, "grad_norm": 0.2518703043460846, "learning_rate": 1e-05, "loss": 0.9566, "step": 61410 }, { "epoch": 54.39769707705934, "grad_norm": 0.2144148051738739, "learning_rate": 1e-05, "loss": 0.9763, "step": 61415 }, { "epoch": 54.40212577502214, "grad_norm": 0.231561079621315, "learning_rate": 1e-05, "loss": 0.952, "step": 61420 }, { "epoch": 54.40655447298494, "grad_norm": 0.268319696187973, "learning_rate": 1e-05, "loss": 1.0167, "step": 61425 }, { "epoch": 54.410983170947745, "grad_norm": 0.2864091992378235, "learning_rate": 1e-05, "loss": 0.9402, "step": 61430 }, { "epoch": 54.41541186891054, "grad_norm": 0.25611644983291626, "learning_rate": 1e-05, "loss": 0.9168, "step": 61435 }, { "epoch": 54.41984056687334, "grad_norm": 0.2154947817325592, "learning_rate": 1e-05, "loss": 0.9581, "step": 61440 }, { "epoch": 54.42426926483614, "grad_norm": 0.21742206811904907, "learning_rate": 1e-05, "loss": 0.9231, "step": 61445 }, { "epoch": 54.428697962798935, "grad_norm": 0.24300965666770935, "learning_rate": 1e-05, "loss": 1.0471, "step": 61450 }, { "epoch": 54.43312666076174, "grad_norm": 0.22295871376991272, "learning_rate": 1e-05, "loss": 0.9695, "step": 61455 }, { "epoch": 54.43755535872454, "grad_norm": 0.24372051656246185, "learning_rate": 1e-05, "loss": 0.9642, "step": 61460 }, { "epoch": 54.44198405668733, "grad_norm": 0.21082350611686707, "learning_rate": 1e-05, "loss": 0.9406, "step": 61465 }, { "epoch": 54.44641275465013, "grad_norm": 0.2071627825498581, "learning_rate": 1e-05, "loss": 0.9888, "step": 61470 }, { "epoch": 54.450841452612934, "grad_norm": 0.26812344789505005, "learning_rate": 1e-05, "loss": 0.9477, "step": 61475 }, { "epoch": 54.45527015057573, "grad_norm": 0.24720941483974457, "learning_rate": 1e-05, "loss": 0.9832, "step": 61480 }, { "epoch": 54.45969884853853, "grad_norm": 0.22771954536437988, "learning_rate": 1e-05, "loss": 0.973, "step": 61485 }, { "epoch": 54.46412754650133, "grad_norm": 0.2672770321369171, "learning_rate": 1e-05, "loss": 0.9799, "step": 61490 }, { "epoch": 54.468556244464125, "grad_norm": 0.25041714310646057, "learning_rate": 1e-05, "loss": 0.9745, "step": 61495 }, { "epoch": 54.472984942426926, "grad_norm": 0.263938844203949, "learning_rate": 1e-05, "loss": 1.0139, "step": 61500 }, { "epoch": 54.47741364038973, "grad_norm": 0.2439061999320984, "learning_rate": 1e-05, "loss": 0.9373, "step": 61505 }, { "epoch": 54.48184233835252, "grad_norm": 0.23846527934074402, "learning_rate": 1e-05, "loss": 0.9621, "step": 61510 }, { "epoch": 54.48627103631532, "grad_norm": 0.23370777070522308, "learning_rate": 1e-05, "loss": 0.9745, "step": 61515 }, { "epoch": 54.490699734278124, "grad_norm": 0.22891657054424286, "learning_rate": 1e-05, "loss": 0.9773, "step": 61520 }, { "epoch": 54.49512843224092, "grad_norm": 0.2401563972234726, "learning_rate": 1e-05, "loss": 0.9736, "step": 61525 }, { "epoch": 54.49955713020372, "grad_norm": 0.23497898876667023, "learning_rate": 1e-05, "loss": 0.9783, "step": 61530 }, { "epoch": 54.50398582816652, "grad_norm": 0.2689017355442047, "learning_rate": 1e-05, "loss": 0.9541, "step": 61535 }, { "epoch": 54.508414526129314, "grad_norm": 0.2731553912162781, "learning_rate": 1e-05, "loss": 1.0197, "step": 61540 }, { "epoch": 54.512843224092116, "grad_norm": 0.2824132442474365, "learning_rate": 1e-05, "loss": 0.9421, "step": 61545 }, { "epoch": 54.51727192205492, "grad_norm": 0.2665017247200012, "learning_rate": 1e-05, "loss": 0.9206, "step": 61550 }, { "epoch": 54.52170062001772, "grad_norm": 0.24270696938037872, "learning_rate": 1e-05, "loss": 1.0015, "step": 61555 }, { "epoch": 54.52612931798051, "grad_norm": 0.23882102966308594, "learning_rate": 1e-05, "loss": 0.9969, "step": 61560 }, { "epoch": 54.53055801594331, "grad_norm": 0.25425294041633606, "learning_rate": 1e-05, "loss": 0.9336, "step": 61565 }, { "epoch": 54.534986713906115, "grad_norm": 0.238195538520813, "learning_rate": 1e-05, "loss": 1.0277, "step": 61570 }, { "epoch": 54.53941541186891, "grad_norm": 0.26347872614860535, "learning_rate": 1e-05, "loss": 1.0057, "step": 61575 }, { "epoch": 54.54384410983171, "grad_norm": 0.2912469804286957, "learning_rate": 1e-05, "loss": 0.9779, "step": 61580 }, { "epoch": 54.54827280779451, "grad_norm": 0.2978285849094391, "learning_rate": 1e-05, "loss": 0.9867, "step": 61585 }, { "epoch": 54.552701505757305, "grad_norm": 0.252096027135849, "learning_rate": 1e-05, "loss": 1.0593, "step": 61590 }, { "epoch": 54.55713020372011, "grad_norm": 0.21461506187915802, "learning_rate": 1e-05, "loss": 0.9676, "step": 61595 }, { "epoch": 54.56155890168291, "grad_norm": 0.2181003987789154, "learning_rate": 1e-05, "loss": 0.9632, "step": 61600 }, { "epoch": 54.5659875996457, "grad_norm": 0.2642095386981964, "learning_rate": 1e-05, "loss": 1.017, "step": 61605 }, { "epoch": 54.5704162976085, "grad_norm": 0.276835173368454, "learning_rate": 1e-05, "loss": 1.0413, "step": 61610 }, { "epoch": 54.574844995571304, "grad_norm": 0.261502742767334, "learning_rate": 1e-05, "loss": 0.9119, "step": 61615 }, { "epoch": 54.5792736935341, "grad_norm": 0.23839789628982544, "learning_rate": 1e-05, "loss": 0.9693, "step": 61620 }, { "epoch": 54.5837023914969, "grad_norm": 0.243050679564476, "learning_rate": 1e-05, "loss": 0.9416, "step": 61625 }, { "epoch": 54.5881310894597, "grad_norm": 0.249448761343956, "learning_rate": 1e-05, "loss": 1.004, "step": 61630 }, { "epoch": 54.592559787422495, "grad_norm": 0.2467772364616394, "learning_rate": 1e-05, "loss": 0.9994, "step": 61635 }, { "epoch": 54.596988485385296, "grad_norm": 0.2678309381008148, "learning_rate": 1e-05, "loss": 0.8949, "step": 61640 }, { "epoch": 54.6014171833481, "grad_norm": 0.2426210343837738, "learning_rate": 1e-05, "loss": 0.9683, "step": 61645 }, { "epoch": 54.60584588131089, "grad_norm": 0.20903442800045013, "learning_rate": 1e-05, "loss": 0.9008, "step": 61650 }, { "epoch": 54.61027457927369, "grad_norm": 0.24812667071819305, "learning_rate": 1e-05, "loss": 1.0231, "step": 61655 }, { "epoch": 54.614703277236494, "grad_norm": 0.2490062117576599, "learning_rate": 1e-05, "loss": 0.9388, "step": 61660 }, { "epoch": 54.619131975199295, "grad_norm": 0.22590519487857819, "learning_rate": 1e-05, "loss": 0.9815, "step": 61665 }, { "epoch": 54.62356067316209, "grad_norm": 0.2515568137168884, "learning_rate": 1e-05, "loss": 1.0303, "step": 61670 }, { "epoch": 54.62798937112489, "grad_norm": 0.25598007440567017, "learning_rate": 1e-05, "loss": 0.9737, "step": 61675 }, { "epoch": 54.63241806908769, "grad_norm": 0.2397070676088333, "learning_rate": 1e-05, "loss": 0.9697, "step": 61680 }, { "epoch": 54.636846767050486, "grad_norm": 0.20996831357479095, "learning_rate": 1e-05, "loss": 1.0096, "step": 61685 }, { "epoch": 54.64127546501329, "grad_norm": 0.2177884876728058, "learning_rate": 1e-05, "loss": 0.9526, "step": 61690 }, { "epoch": 54.64570416297609, "grad_norm": 0.26674431562423706, "learning_rate": 1e-05, "loss": 0.9696, "step": 61695 }, { "epoch": 54.65013286093888, "grad_norm": 0.24462474882602692, "learning_rate": 1e-05, "loss": 0.9915, "step": 61700 }, { "epoch": 54.65456155890168, "grad_norm": 0.24660229682922363, "learning_rate": 1e-05, "loss": 0.9482, "step": 61705 }, { "epoch": 54.658990256864485, "grad_norm": 0.21257789433002472, "learning_rate": 1e-05, "loss": 0.9872, "step": 61710 }, { "epoch": 54.66341895482728, "grad_norm": 0.23424005508422852, "learning_rate": 1e-05, "loss": 0.9567, "step": 61715 }, { "epoch": 54.66784765279008, "grad_norm": 0.280011922121048, "learning_rate": 1e-05, "loss": 0.9614, "step": 61720 }, { "epoch": 54.67227635075288, "grad_norm": 0.2793463468551636, "learning_rate": 1e-05, "loss": 1.0064, "step": 61725 }, { "epoch": 54.676705048715675, "grad_norm": 0.23466026782989502, "learning_rate": 1e-05, "loss": 0.9731, "step": 61730 }, { "epoch": 54.681133746678476, "grad_norm": 0.20742113888263702, "learning_rate": 1e-05, "loss": 0.9892, "step": 61735 }, { "epoch": 54.68556244464128, "grad_norm": 0.24423709511756897, "learning_rate": 1e-05, "loss": 0.9607, "step": 61740 }, { "epoch": 54.68999114260407, "grad_norm": 0.2402326911687851, "learning_rate": 1e-05, "loss": 0.9877, "step": 61745 }, { "epoch": 54.69441984056687, "grad_norm": 0.22081421315670013, "learning_rate": 1e-05, "loss": 0.963, "step": 61750 }, { "epoch": 54.698848538529674, "grad_norm": 0.24656403064727783, "learning_rate": 1e-05, "loss": 0.9697, "step": 61755 }, { "epoch": 54.70327723649247, "grad_norm": 0.284909188747406, "learning_rate": 1e-05, "loss": 0.9632, "step": 61760 }, { "epoch": 54.70770593445527, "grad_norm": 0.23226577043533325, "learning_rate": 1e-05, "loss": 0.9355, "step": 61765 }, { "epoch": 54.71213463241807, "grad_norm": 0.21470679342746735, "learning_rate": 1e-05, "loss": 0.9657, "step": 61770 }, { "epoch": 54.716563330380865, "grad_norm": 0.2293403595685959, "learning_rate": 1e-05, "loss": 1.0112, "step": 61775 }, { "epoch": 54.720992028343666, "grad_norm": 0.2368682473897934, "learning_rate": 1e-05, "loss": 0.9216, "step": 61780 }, { "epoch": 54.72542072630647, "grad_norm": 0.22414761781692505, "learning_rate": 1e-05, "loss": 0.9665, "step": 61785 }, { "epoch": 54.72984942426926, "grad_norm": 0.23385803401470184, "learning_rate": 1e-05, "loss": 0.9705, "step": 61790 }, { "epoch": 54.73427812223206, "grad_norm": 0.21191293001174927, "learning_rate": 1e-05, "loss": 0.9929, "step": 61795 }, { "epoch": 54.738706820194864, "grad_norm": 0.24699129164218903, "learning_rate": 1e-05, "loss": 0.9962, "step": 61800 }, { "epoch": 54.743135518157665, "grad_norm": 0.22854012250900269, "learning_rate": 1e-05, "loss": 0.9751, "step": 61805 }, { "epoch": 54.74756421612046, "grad_norm": 0.2828068137168884, "learning_rate": 1e-05, "loss": 0.9358, "step": 61810 }, { "epoch": 54.75199291408326, "grad_norm": 0.2359173595905304, "learning_rate": 1e-05, "loss": 1.0127, "step": 61815 }, { "epoch": 54.75642161204606, "grad_norm": 0.23383831977844238, "learning_rate": 1e-05, "loss": 0.9594, "step": 61820 }, { "epoch": 54.760850310008856, "grad_norm": 0.26490044593811035, "learning_rate": 1e-05, "loss": 0.9738, "step": 61825 }, { "epoch": 54.76527900797166, "grad_norm": 0.24586886167526245, "learning_rate": 1e-05, "loss": 0.998, "step": 61830 }, { "epoch": 54.76970770593446, "grad_norm": 0.25765252113342285, "learning_rate": 1e-05, "loss": 0.9434, "step": 61835 }, { "epoch": 54.77413640389725, "grad_norm": 0.27040475606918335, "learning_rate": 1e-05, "loss": 0.9952, "step": 61840 }, { "epoch": 54.77856510186005, "grad_norm": 0.2563571035861969, "learning_rate": 1e-05, "loss": 0.988, "step": 61845 }, { "epoch": 54.782993799822854, "grad_norm": 0.25582483410835266, "learning_rate": 1e-05, "loss": 0.9834, "step": 61850 }, { "epoch": 54.78742249778565, "grad_norm": 0.22667090594768524, "learning_rate": 1e-05, "loss": 0.9962, "step": 61855 }, { "epoch": 54.79185119574845, "grad_norm": 0.2700563669204712, "learning_rate": 1e-05, "loss": 1.0083, "step": 61860 }, { "epoch": 54.79627989371125, "grad_norm": 0.23663084208965302, "learning_rate": 1e-05, "loss": 0.9945, "step": 61865 }, { "epoch": 54.800708591674045, "grad_norm": 0.22395679354667664, "learning_rate": 1e-05, "loss": 1.0679, "step": 61870 }, { "epoch": 54.805137289636846, "grad_norm": 0.21704262495040894, "learning_rate": 1e-05, "loss": 0.9209, "step": 61875 }, { "epoch": 54.80956598759965, "grad_norm": 0.21953852474689484, "learning_rate": 1e-05, "loss": 0.9629, "step": 61880 }, { "epoch": 54.81399468556244, "grad_norm": 0.22336454689502716, "learning_rate": 1e-05, "loss": 0.9529, "step": 61885 }, { "epoch": 54.81842338352524, "grad_norm": 0.24294808506965637, "learning_rate": 1e-05, "loss": 1.0035, "step": 61890 }, { "epoch": 54.822852081488044, "grad_norm": 0.25395455956459045, "learning_rate": 1e-05, "loss": 0.9611, "step": 61895 }, { "epoch": 54.82728077945084, "grad_norm": 0.21166537702083588, "learning_rate": 1e-05, "loss": 0.9877, "step": 61900 }, { "epoch": 54.83170947741364, "grad_norm": 0.2385307401418686, "learning_rate": 1e-05, "loss": 0.9638, "step": 61905 }, { "epoch": 54.83613817537644, "grad_norm": 0.24236778914928436, "learning_rate": 1e-05, "loss": 0.915, "step": 61910 }, { "epoch": 54.84056687333924, "grad_norm": 0.2466571033000946, "learning_rate": 1e-05, "loss": 0.9356, "step": 61915 }, { "epoch": 54.844995571302036, "grad_norm": 0.26242566108703613, "learning_rate": 1e-05, "loss": 0.9682, "step": 61920 }, { "epoch": 54.84942426926484, "grad_norm": 0.2473292201757431, "learning_rate": 1e-05, "loss": 0.9938, "step": 61925 }, { "epoch": 54.85385296722764, "grad_norm": 0.24475917220115662, "learning_rate": 1e-05, "loss": 0.9913, "step": 61930 }, { "epoch": 54.85828166519043, "grad_norm": 0.23263022303581238, "learning_rate": 1e-05, "loss": 1.0142, "step": 61935 }, { "epoch": 54.862710363153234, "grad_norm": 0.2234267145395279, "learning_rate": 1e-05, "loss": 1.0441, "step": 61940 }, { "epoch": 54.867139061116035, "grad_norm": 0.2610851228237152, "learning_rate": 1e-05, "loss": 0.9908, "step": 61945 }, { "epoch": 54.87156775907883, "grad_norm": 0.24277083575725555, "learning_rate": 1e-05, "loss": 1.0122, "step": 61950 }, { "epoch": 54.87599645704163, "grad_norm": 0.24271883070468903, "learning_rate": 1e-05, "loss": 1.0161, "step": 61955 }, { "epoch": 54.88042515500443, "grad_norm": 0.24794341623783112, "learning_rate": 1e-05, "loss": 0.9395, "step": 61960 }, { "epoch": 54.884853852967225, "grad_norm": 0.2069029062986374, "learning_rate": 1e-05, "loss": 0.9642, "step": 61965 }, { "epoch": 54.88928255093003, "grad_norm": 0.21977069973945618, "learning_rate": 1e-05, "loss": 0.9365, "step": 61970 }, { "epoch": 54.89371124889283, "grad_norm": 0.23933996260166168, "learning_rate": 1e-05, "loss": 0.9995, "step": 61975 }, { "epoch": 54.89813994685562, "grad_norm": 0.22514726221561432, "learning_rate": 1e-05, "loss": 0.9997, "step": 61980 }, { "epoch": 54.90256864481842, "grad_norm": 0.22039583325386047, "learning_rate": 1e-05, "loss": 0.9417, "step": 61985 }, { "epoch": 54.906997342781224, "grad_norm": 0.23116083443164825, "learning_rate": 1e-05, "loss": 0.9942, "step": 61990 }, { "epoch": 54.91142604074402, "grad_norm": 0.19800740480422974, "learning_rate": 1e-05, "loss": 0.9921, "step": 61995 }, { "epoch": 54.91585473870682, "grad_norm": 0.2333899438381195, "learning_rate": 1e-05, "loss": 0.9957, "step": 62000 }, { "epoch": 54.92028343666962, "grad_norm": 0.22694028913974762, "learning_rate": 1e-05, "loss": 0.9951, "step": 62005 }, { "epoch": 54.924712134632415, "grad_norm": 0.27045968174934387, "learning_rate": 1e-05, "loss": 0.9629, "step": 62010 }, { "epoch": 54.929140832595216, "grad_norm": 0.2575031518936157, "learning_rate": 1e-05, "loss": 0.959, "step": 62015 }, { "epoch": 54.93356953055802, "grad_norm": 0.24480172991752625, "learning_rate": 1e-05, "loss": 0.9685, "step": 62020 }, { "epoch": 54.93799822852081, "grad_norm": 0.2962493300437927, "learning_rate": 1e-05, "loss": 0.973, "step": 62025 }, { "epoch": 54.94242692648361, "grad_norm": 0.27831003069877625, "learning_rate": 1e-05, "loss": 0.9795, "step": 62030 }, { "epoch": 54.946855624446414, "grad_norm": 0.2670118510723114, "learning_rate": 1e-05, "loss": 0.9615, "step": 62035 }, { "epoch": 54.951284322409215, "grad_norm": 0.26412615180015564, "learning_rate": 1e-05, "loss": 0.9791, "step": 62040 }, { "epoch": 54.95571302037201, "grad_norm": 0.2617094814777374, "learning_rate": 1e-05, "loss": 0.9308, "step": 62045 }, { "epoch": 54.96014171833481, "grad_norm": 0.22700825333595276, "learning_rate": 1e-05, "loss": 0.9544, "step": 62050 }, { "epoch": 54.96457041629761, "grad_norm": 0.18606488406658173, "learning_rate": 1e-05, "loss": 0.9342, "step": 62055 }, { "epoch": 54.968999114260406, "grad_norm": 0.2505088448524475, "learning_rate": 1e-05, "loss": 0.9832, "step": 62060 }, { "epoch": 54.97342781222321, "grad_norm": 0.2616252601146698, "learning_rate": 1e-05, "loss": 0.9893, "step": 62065 }, { "epoch": 54.97785651018601, "grad_norm": 0.24996070563793182, "learning_rate": 1e-05, "loss": 0.961, "step": 62070 }, { "epoch": 54.9822852081488, "grad_norm": 0.22112376987934113, "learning_rate": 1e-05, "loss": 1.0018, "step": 62075 }, { "epoch": 54.9867139061116, "grad_norm": 0.21114081144332886, "learning_rate": 1e-05, "loss": 0.9359, "step": 62080 }, { "epoch": 54.991142604074405, "grad_norm": 0.2259313315153122, "learning_rate": 1e-05, "loss": 0.9977, "step": 62085 }, { "epoch": 54.9955713020372, "grad_norm": 0.21121808886528015, "learning_rate": 1e-05, "loss": 0.9947, "step": 62090 }, { "epoch": 55.0, "grad_norm": 0.25856801867485046, "learning_rate": 1e-05, "loss": 0.9636, "step": 62095 }, { "epoch": 55.0044286979628, "grad_norm": 0.2713790535926819, "learning_rate": 1e-05, "loss": 0.9965, "step": 62100 }, { "epoch": 55.008857395925595, "grad_norm": 0.2832414209842682, "learning_rate": 1e-05, "loss": 0.9167, "step": 62105 }, { "epoch": 55.0132860938884, "grad_norm": 0.23071420192718506, "learning_rate": 1e-05, "loss": 0.9741, "step": 62110 }, { "epoch": 55.0177147918512, "grad_norm": 0.22246162593364716, "learning_rate": 1e-05, "loss": 0.9649, "step": 62115 }, { "epoch": 55.02214348981399, "grad_norm": 0.2495613545179367, "learning_rate": 1e-05, "loss": 0.9513, "step": 62120 }, { "epoch": 55.02657218777679, "grad_norm": 0.270028293132782, "learning_rate": 1e-05, "loss": 0.962, "step": 62125 }, { "epoch": 55.031000885739594, "grad_norm": 0.2383924275636673, "learning_rate": 1e-05, "loss": 0.977, "step": 62130 }, { "epoch": 55.03542958370239, "grad_norm": 0.21703945100307465, "learning_rate": 1e-05, "loss": 0.989, "step": 62135 }, { "epoch": 55.03985828166519, "grad_norm": 0.24551594257354736, "learning_rate": 1e-05, "loss": 0.9583, "step": 62140 }, { "epoch": 55.04428697962799, "grad_norm": 0.2273833304643631, "learning_rate": 1e-05, "loss": 0.9746, "step": 62145 }, { "epoch": 55.048715677590785, "grad_norm": 0.25210875272750854, "learning_rate": 1e-05, "loss": 0.9249, "step": 62150 }, { "epoch": 55.053144375553586, "grad_norm": 0.26989150047302246, "learning_rate": 1e-05, "loss": 0.978, "step": 62155 }, { "epoch": 55.05757307351639, "grad_norm": 0.256803423166275, "learning_rate": 1e-05, "loss": 0.9482, "step": 62160 }, { "epoch": 55.06200177147919, "grad_norm": 0.22256067395210266, "learning_rate": 1e-05, "loss": 0.8808, "step": 62165 }, { "epoch": 55.06643046944198, "grad_norm": 0.2533958852291107, "learning_rate": 1e-05, "loss": 0.9978, "step": 62170 }, { "epoch": 55.070859167404784, "grad_norm": 0.2591835856437683, "learning_rate": 1e-05, "loss": 0.9193, "step": 62175 }, { "epoch": 55.075287865367585, "grad_norm": 0.22120441496372223, "learning_rate": 1e-05, "loss": 1.0012, "step": 62180 }, { "epoch": 55.07971656333038, "grad_norm": 0.20032048225402832, "learning_rate": 1e-05, "loss": 0.9895, "step": 62185 }, { "epoch": 55.08414526129318, "grad_norm": 0.27565130591392517, "learning_rate": 1e-05, "loss": 1.0251, "step": 62190 }, { "epoch": 55.08857395925598, "grad_norm": 0.20962965488433838, "learning_rate": 1e-05, "loss": 0.9793, "step": 62195 }, { "epoch": 55.093002657218776, "grad_norm": 0.2532493472099304, "learning_rate": 1e-05, "loss": 1.0158, "step": 62200 }, { "epoch": 55.09743135518158, "grad_norm": 0.22159592807292938, "learning_rate": 1e-05, "loss": 1.0076, "step": 62205 }, { "epoch": 55.10186005314438, "grad_norm": 0.23838955163955688, "learning_rate": 1e-05, "loss": 0.9466, "step": 62210 }, { "epoch": 55.10628875110717, "grad_norm": 0.230186328291893, "learning_rate": 1e-05, "loss": 1.0105, "step": 62215 }, { "epoch": 55.11071744906997, "grad_norm": 0.23161542415618896, "learning_rate": 1e-05, "loss": 0.9552, "step": 62220 }, { "epoch": 55.115146147032775, "grad_norm": 0.22316773235797882, "learning_rate": 1e-05, "loss": 0.9277, "step": 62225 }, { "epoch": 55.11957484499557, "grad_norm": 0.2775658965110779, "learning_rate": 1e-05, "loss": 0.9618, "step": 62230 }, { "epoch": 55.12400354295837, "grad_norm": 0.21759754419326782, "learning_rate": 1e-05, "loss": 0.9721, "step": 62235 }, { "epoch": 55.12843224092117, "grad_norm": 0.210435688495636, "learning_rate": 1e-05, "loss": 0.9191, "step": 62240 }, { "epoch": 55.132860938883965, "grad_norm": 0.2566128373146057, "learning_rate": 1e-05, "loss": 0.9372, "step": 62245 }, { "epoch": 55.137289636846766, "grad_norm": 0.22963926196098328, "learning_rate": 1e-05, "loss": 1.0394, "step": 62250 }, { "epoch": 55.14171833480957, "grad_norm": 0.31815585494041443, "learning_rate": 1e-05, "loss": 0.9973, "step": 62255 }, { "epoch": 55.14614703277236, "grad_norm": 0.26324036717414856, "learning_rate": 1e-05, "loss": 0.9798, "step": 62260 }, { "epoch": 55.15057573073516, "grad_norm": 0.2819535434246063, "learning_rate": 1e-05, "loss": 0.9756, "step": 62265 }, { "epoch": 55.155004428697964, "grad_norm": 0.263812392950058, "learning_rate": 1e-05, "loss": 1.0014, "step": 62270 }, { "epoch": 55.15943312666076, "grad_norm": 0.22461117804050446, "learning_rate": 1e-05, "loss": 0.9762, "step": 62275 }, { "epoch": 55.16386182462356, "grad_norm": 0.21655142307281494, "learning_rate": 1e-05, "loss": 0.9543, "step": 62280 }, { "epoch": 55.16829052258636, "grad_norm": 0.28236937522888184, "learning_rate": 1e-05, "loss": 0.9594, "step": 62285 }, { "epoch": 55.17271922054916, "grad_norm": 0.243770569562912, "learning_rate": 1e-05, "loss": 0.9499, "step": 62290 }, { "epoch": 55.177147918511956, "grad_norm": 0.24992871284484863, "learning_rate": 1e-05, "loss": 0.9649, "step": 62295 }, { "epoch": 55.18157661647476, "grad_norm": 0.24871692061424255, "learning_rate": 1e-05, "loss": 0.959, "step": 62300 }, { "epoch": 55.18600531443756, "grad_norm": 0.22770540416240692, "learning_rate": 1e-05, "loss": 0.9778, "step": 62305 }, { "epoch": 55.19043401240035, "grad_norm": 0.3503894507884979, "learning_rate": 1e-05, "loss": 0.9601, "step": 62310 }, { "epoch": 55.194862710363154, "grad_norm": 0.24219578504562378, "learning_rate": 1e-05, "loss": 0.9399, "step": 62315 }, { "epoch": 55.199291408325955, "grad_norm": 0.21635305881500244, "learning_rate": 1e-05, "loss": 0.9727, "step": 62320 }, { "epoch": 55.20372010628875, "grad_norm": 0.27642425894737244, "learning_rate": 1e-05, "loss": 0.9672, "step": 62325 }, { "epoch": 55.20814880425155, "grad_norm": 0.2553144097328186, "learning_rate": 1e-05, "loss": 0.9824, "step": 62330 }, { "epoch": 55.21257750221435, "grad_norm": 0.25659167766571045, "learning_rate": 1e-05, "loss": 0.9424, "step": 62335 }, { "epoch": 55.217006200177146, "grad_norm": 0.24272117018699646, "learning_rate": 1e-05, "loss": 0.9721, "step": 62340 }, { "epoch": 55.22143489813995, "grad_norm": 0.21669627726078033, "learning_rate": 1e-05, "loss": 0.9763, "step": 62345 }, { "epoch": 55.22586359610275, "grad_norm": 0.26535534858703613, "learning_rate": 1e-05, "loss": 0.9626, "step": 62350 }, { "epoch": 55.23029229406554, "grad_norm": 0.2685007154941559, "learning_rate": 1e-05, "loss": 0.982, "step": 62355 }, { "epoch": 55.23472099202834, "grad_norm": 0.24896568059921265, "learning_rate": 1e-05, "loss": 0.9599, "step": 62360 }, { "epoch": 55.239149689991144, "grad_norm": 0.21610885858535767, "learning_rate": 1e-05, "loss": 0.9798, "step": 62365 }, { "epoch": 55.24357838795394, "grad_norm": 0.21651025116443634, "learning_rate": 1e-05, "loss": 1.0341, "step": 62370 }, { "epoch": 55.24800708591674, "grad_norm": 0.19530245661735535, "learning_rate": 1e-05, "loss": 1.0292, "step": 62375 }, { "epoch": 55.25243578387954, "grad_norm": 0.2430613785982132, "learning_rate": 1e-05, "loss": 1.0261, "step": 62380 }, { "epoch": 55.256864481842335, "grad_norm": 0.23671753704547882, "learning_rate": 1e-05, "loss": 0.9375, "step": 62385 }, { "epoch": 55.261293179805136, "grad_norm": 0.22328652441501617, "learning_rate": 1e-05, "loss": 0.9919, "step": 62390 }, { "epoch": 55.26572187776794, "grad_norm": 0.24272359907627106, "learning_rate": 1e-05, "loss": 0.9637, "step": 62395 }, { "epoch": 55.27015057573073, "grad_norm": 0.2391354888677597, "learning_rate": 1e-05, "loss": 1.0434, "step": 62400 }, { "epoch": 55.27457927369353, "grad_norm": 0.23660340905189514, "learning_rate": 1e-05, "loss": 0.9712, "step": 62405 }, { "epoch": 55.279007971656334, "grad_norm": 0.28350868821144104, "learning_rate": 1e-05, "loss": 0.9441, "step": 62410 }, { "epoch": 55.283436669619135, "grad_norm": 0.24310103058815002, "learning_rate": 1e-05, "loss": 0.9453, "step": 62415 }, { "epoch": 55.28786536758193, "grad_norm": 0.2373107224702835, "learning_rate": 1e-05, "loss": 0.9263, "step": 62420 }, { "epoch": 55.29229406554473, "grad_norm": 0.2172693908214569, "learning_rate": 1e-05, "loss": 0.9876, "step": 62425 }, { "epoch": 55.29672276350753, "grad_norm": 0.2619110345840454, "learning_rate": 1e-05, "loss": 0.9615, "step": 62430 }, { "epoch": 55.301151461470326, "grad_norm": 0.26987820863723755, "learning_rate": 1e-05, "loss": 0.9916, "step": 62435 }, { "epoch": 55.30558015943313, "grad_norm": 0.2333763986825943, "learning_rate": 1e-05, "loss": 1.0064, "step": 62440 }, { "epoch": 55.31000885739593, "grad_norm": 0.21843530237674713, "learning_rate": 1e-05, "loss": 0.9136, "step": 62445 }, { "epoch": 55.31443755535872, "grad_norm": 0.2594922184944153, "learning_rate": 1e-05, "loss": 0.9364, "step": 62450 }, { "epoch": 55.318866253321524, "grad_norm": 0.28343212604522705, "learning_rate": 1e-05, "loss": 0.9971, "step": 62455 }, { "epoch": 55.323294951284325, "grad_norm": 0.2748631238937378, "learning_rate": 1e-05, "loss": 1.0134, "step": 62460 }, { "epoch": 55.32772364924712, "grad_norm": 0.26620927453041077, "learning_rate": 1e-05, "loss": 0.9751, "step": 62465 }, { "epoch": 55.33215234720992, "grad_norm": 0.2769966721534729, "learning_rate": 1e-05, "loss": 0.935, "step": 62470 }, { "epoch": 55.33658104517272, "grad_norm": 0.26412954926490784, "learning_rate": 1e-05, "loss": 0.947, "step": 62475 }, { "epoch": 55.341009743135515, "grad_norm": 0.29838427901268005, "learning_rate": 1e-05, "loss": 0.9925, "step": 62480 }, { "epoch": 55.34543844109832, "grad_norm": 0.24841853976249695, "learning_rate": 1e-05, "loss": 0.938, "step": 62485 }, { "epoch": 55.34986713906112, "grad_norm": 0.25717467069625854, "learning_rate": 1e-05, "loss": 0.9621, "step": 62490 }, { "epoch": 55.35429583702391, "grad_norm": 0.22899077832698822, "learning_rate": 1e-05, "loss": 0.9385, "step": 62495 }, { "epoch": 55.35872453498671, "grad_norm": 0.25663626194000244, "learning_rate": 1e-05, "loss": 0.9848, "step": 62500 }, { "epoch": 55.363153232949514, "grad_norm": 0.2554994523525238, "learning_rate": 1e-05, "loss": 0.9754, "step": 62505 }, { "epoch": 55.36758193091231, "grad_norm": 0.33785203099250793, "learning_rate": 1e-05, "loss": 1.0199, "step": 62510 }, { "epoch": 55.37201062887511, "grad_norm": 0.2501871585845947, "learning_rate": 1e-05, "loss": 0.9755, "step": 62515 }, { "epoch": 55.37643932683791, "grad_norm": 0.24996094405651093, "learning_rate": 1e-05, "loss": 0.9952, "step": 62520 }, { "epoch": 55.380868024800705, "grad_norm": 0.227278470993042, "learning_rate": 1e-05, "loss": 0.9687, "step": 62525 }, { "epoch": 55.385296722763506, "grad_norm": 0.26868489384651184, "learning_rate": 1e-05, "loss": 0.9793, "step": 62530 }, { "epoch": 55.38972542072631, "grad_norm": 0.27864688634872437, "learning_rate": 1e-05, "loss": 0.9355, "step": 62535 }, { "epoch": 55.39415411868911, "grad_norm": 0.2396826595067978, "learning_rate": 1e-05, "loss": 1.0005, "step": 62540 }, { "epoch": 55.3985828166519, "grad_norm": 0.2545613944530487, "learning_rate": 1e-05, "loss": 0.9897, "step": 62545 }, { "epoch": 55.403011514614704, "grad_norm": 0.22027942538261414, "learning_rate": 1e-05, "loss": 0.9717, "step": 62550 }, { "epoch": 55.407440212577505, "grad_norm": 0.265720397233963, "learning_rate": 1e-05, "loss": 0.9749, "step": 62555 }, { "epoch": 55.4118689105403, "grad_norm": 0.2706334888935089, "learning_rate": 1e-05, "loss": 0.9873, "step": 62560 }, { "epoch": 55.4162976085031, "grad_norm": 0.23240944743156433, "learning_rate": 1e-05, "loss": 0.9385, "step": 62565 }, { "epoch": 55.4207263064659, "grad_norm": 0.2491762936115265, "learning_rate": 1e-05, "loss": 0.9625, "step": 62570 }, { "epoch": 55.425155004428696, "grad_norm": 0.20693622529506683, "learning_rate": 1e-05, "loss": 0.9337, "step": 62575 }, { "epoch": 55.4295837023915, "grad_norm": 0.2690315544605255, "learning_rate": 1e-05, "loss": 1.0097, "step": 62580 }, { "epoch": 55.4340124003543, "grad_norm": 0.22171130776405334, "learning_rate": 1e-05, "loss": 0.9436, "step": 62585 }, { "epoch": 55.43844109831709, "grad_norm": 0.27453330159187317, "learning_rate": 1e-05, "loss": 0.9369, "step": 62590 }, { "epoch": 55.44286979627989, "grad_norm": 0.2499241679906845, "learning_rate": 1e-05, "loss": 0.9819, "step": 62595 }, { "epoch": 55.447298494242695, "grad_norm": 0.28359460830688477, "learning_rate": 1e-05, "loss": 0.9454, "step": 62600 }, { "epoch": 55.45172719220549, "grad_norm": 0.22056515514850616, "learning_rate": 1e-05, "loss": 1.0015, "step": 62605 }, { "epoch": 55.45615589016829, "grad_norm": 0.26162782311439514, "learning_rate": 1e-05, "loss": 1.0007, "step": 62610 }, { "epoch": 55.46058458813109, "grad_norm": 0.24669480323791504, "learning_rate": 1e-05, "loss": 0.9549, "step": 62615 }, { "epoch": 55.465013286093885, "grad_norm": 0.2601143419742584, "learning_rate": 1e-05, "loss": 0.9314, "step": 62620 }, { "epoch": 55.46944198405669, "grad_norm": 0.2427825629711151, "learning_rate": 1e-05, "loss": 0.9532, "step": 62625 }, { "epoch": 55.47387068201949, "grad_norm": 0.19705727696418762, "learning_rate": 1e-05, "loss": 0.9995, "step": 62630 }, { "epoch": 55.47829937998228, "grad_norm": 0.2382509559392929, "learning_rate": 1e-05, "loss": 0.9777, "step": 62635 }, { "epoch": 55.48272807794508, "grad_norm": 0.21953454613685608, "learning_rate": 1e-05, "loss": 0.9006, "step": 62640 }, { "epoch": 55.487156775907884, "grad_norm": 0.2171293944120407, "learning_rate": 1e-05, "loss": 1.0157, "step": 62645 }, { "epoch": 55.491585473870686, "grad_norm": 0.24557551741600037, "learning_rate": 1e-05, "loss": 0.9641, "step": 62650 }, { "epoch": 55.49601417183348, "grad_norm": 0.20853084325790405, "learning_rate": 1e-05, "loss": 0.9915, "step": 62655 }, { "epoch": 55.50044286979628, "grad_norm": 0.25548630952835083, "learning_rate": 1e-05, "loss": 0.9639, "step": 62660 }, { "epoch": 55.50487156775908, "grad_norm": 0.23453408479690552, "learning_rate": 1e-05, "loss": 0.9701, "step": 62665 }, { "epoch": 55.509300265721876, "grad_norm": 0.23328697681427002, "learning_rate": 1e-05, "loss": 1.0039, "step": 62670 }, { "epoch": 55.51372896368468, "grad_norm": 0.2442711889743805, "learning_rate": 1e-05, "loss": 0.9786, "step": 62675 }, { "epoch": 55.51815766164748, "grad_norm": 0.25138574838638306, "learning_rate": 1e-05, "loss": 0.9352, "step": 62680 }, { "epoch": 55.52258635961027, "grad_norm": 0.22825178503990173, "learning_rate": 1e-05, "loss": 0.9801, "step": 62685 }, { "epoch": 55.527015057573074, "grad_norm": 0.25139036774635315, "learning_rate": 1e-05, "loss": 0.9541, "step": 62690 }, { "epoch": 55.531443755535875, "grad_norm": 0.2730069160461426, "learning_rate": 1e-05, "loss": 0.9554, "step": 62695 }, { "epoch": 55.53587245349867, "grad_norm": 0.23859600722789764, "learning_rate": 1e-05, "loss": 0.9691, "step": 62700 }, { "epoch": 55.54030115146147, "grad_norm": 0.2873833477497101, "learning_rate": 1e-05, "loss": 0.981, "step": 62705 }, { "epoch": 55.54472984942427, "grad_norm": 0.2638431191444397, "learning_rate": 1e-05, "loss": 0.9365, "step": 62710 }, { "epoch": 55.549158547387066, "grad_norm": 0.2742599844932556, "learning_rate": 1e-05, "loss": 0.9337, "step": 62715 }, { "epoch": 55.55358724534987, "grad_norm": 0.24800468981266022, "learning_rate": 1e-05, "loss": 0.9607, "step": 62720 }, { "epoch": 55.55801594331267, "grad_norm": 0.22815638780593872, "learning_rate": 1e-05, "loss": 0.9586, "step": 62725 }, { "epoch": 55.56244464127546, "grad_norm": 0.31424880027770996, "learning_rate": 1e-05, "loss": 0.9958, "step": 62730 }, { "epoch": 55.56687333923826, "grad_norm": 0.20046547055244446, "learning_rate": 1e-05, "loss": 0.9976, "step": 62735 }, { "epoch": 55.571302037201065, "grad_norm": 0.24561159312725067, "learning_rate": 1e-05, "loss": 1.0186, "step": 62740 }, { "epoch": 55.57573073516386, "grad_norm": 0.21277067065238953, "learning_rate": 1e-05, "loss": 0.9751, "step": 62745 }, { "epoch": 55.58015943312666, "grad_norm": 0.2338026463985443, "learning_rate": 1e-05, "loss": 0.9587, "step": 62750 }, { "epoch": 55.58458813108946, "grad_norm": 0.22151383757591248, "learning_rate": 1e-05, "loss": 0.9677, "step": 62755 }, { "epoch": 55.589016829052255, "grad_norm": 0.21774446964263916, "learning_rate": 1e-05, "loss": 0.9937, "step": 62760 }, { "epoch": 55.59344552701506, "grad_norm": 0.23514288663864136, "learning_rate": 1e-05, "loss": 1.0057, "step": 62765 }, { "epoch": 55.59787422497786, "grad_norm": 0.26878175139427185, "learning_rate": 1e-05, "loss": 0.941, "step": 62770 }, { "epoch": 55.60230292294066, "grad_norm": 0.22452412545681, "learning_rate": 1e-05, "loss": 0.9865, "step": 62775 }, { "epoch": 55.60673162090345, "grad_norm": 0.26823243498802185, "learning_rate": 1e-05, "loss": 0.9785, "step": 62780 }, { "epoch": 55.611160318866254, "grad_norm": 0.22869546711444855, "learning_rate": 1e-05, "loss": 0.9864, "step": 62785 }, { "epoch": 55.615589016829055, "grad_norm": 0.23047584295272827, "learning_rate": 1e-05, "loss": 0.9803, "step": 62790 }, { "epoch": 55.62001771479185, "grad_norm": 0.21435987949371338, "learning_rate": 1e-05, "loss": 1.0024, "step": 62795 }, { "epoch": 55.62444641275465, "grad_norm": 0.2282906174659729, "learning_rate": 1e-05, "loss": 0.9746, "step": 62800 }, { "epoch": 55.62887511071745, "grad_norm": 0.22043752670288086, "learning_rate": 1e-05, "loss": 0.9689, "step": 62805 }, { "epoch": 55.633303808680246, "grad_norm": 0.23046617209911346, "learning_rate": 1e-05, "loss": 1.0031, "step": 62810 }, { "epoch": 55.63773250664305, "grad_norm": 0.24408136308193207, "learning_rate": 1e-05, "loss": 1.0201, "step": 62815 }, { "epoch": 55.64216120460585, "grad_norm": 0.24182654917240143, "learning_rate": 1e-05, "loss": 0.9758, "step": 62820 }, { "epoch": 55.64658990256864, "grad_norm": 0.2297814041376114, "learning_rate": 1e-05, "loss": 0.9425, "step": 62825 }, { "epoch": 55.651018600531444, "grad_norm": 0.2758806645870209, "learning_rate": 1e-05, "loss": 0.9735, "step": 62830 }, { "epoch": 55.655447298494245, "grad_norm": 0.2332402616739273, "learning_rate": 1e-05, "loss": 0.9997, "step": 62835 }, { "epoch": 55.65987599645704, "grad_norm": 0.23059028387069702, "learning_rate": 1e-05, "loss": 0.9884, "step": 62840 }, { "epoch": 55.66430469441984, "grad_norm": 0.2054479569196701, "learning_rate": 1e-05, "loss": 0.9467, "step": 62845 }, { "epoch": 55.66873339238264, "grad_norm": 0.227760449051857, "learning_rate": 1e-05, "loss": 0.8962, "step": 62850 }, { "epoch": 55.673162090345436, "grad_norm": 0.22935718297958374, "learning_rate": 1e-05, "loss": 0.9297, "step": 62855 }, { "epoch": 55.67759078830824, "grad_norm": 0.21174666285514832, "learning_rate": 1e-05, "loss": 0.9781, "step": 62860 }, { "epoch": 55.68201948627104, "grad_norm": 0.2854756712913513, "learning_rate": 1e-05, "loss": 0.9601, "step": 62865 }, { "epoch": 55.68644818423383, "grad_norm": 0.2622082233428955, "learning_rate": 1e-05, "loss": 0.9692, "step": 62870 }, { "epoch": 55.69087688219663, "grad_norm": 0.24696001410484314, "learning_rate": 1e-05, "loss": 1.0091, "step": 62875 }, { "epoch": 55.695305580159435, "grad_norm": 0.22844815254211426, "learning_rate": 1e-05, "loss": 1.0027, "step": 62880 }, { "epoch": 55.69973427812223, "grad_norm": 0.2736019492149353, "learning_rate": 1e-05, "loss": 0.9396, "step": 62885 }, { "epoch": 55.70416297608503, "grad_norm": 0.2845125198364258, "learning_rate": 1e-05, "loss": 0.9677, "step": 62890 }, { "epoch": 55.70859167404783, "grad_norm": 0.25794246792793274, "learning_rate": 1e-05, "loss": 0.9624, "step": 62895 }, { "epoch": 55.71302037201063, "grad_norm": 0.2491748183965683, "learning_rate": 1e-05, "loss": 0.9499, "step": 62900 }, { "epoch": 55.717449069973426, "grad_norm": 0.22434252500534058, "learning_rate": 1e-05, "loss": 1.0114, "step": 62905 }, { "epoch": 55.72187776793623, "grad_norm": 0.257744163274765, "learning_rate": 1e-05, "loss": 0.9505, "step": 62910 }, { "epoch": 55.72630646589903, "grad_norm": 0.28021812438964844, "learning_rate": 1e-05, "loss": 0.9678, "step": 62915 }, { "epoch": 55.73073516386182, "grad_norm": 0.2581353783607483, "learning_rate": 1e-05, "loss": 1.0141, "step": 62920 }, { "epoch": 55.735163861824624, "grad_norm": 0.2833695709705353, "learning_rate": 1e-05, "loss": 1.0118, "step": 62925 }, { "epoch": 55.739592559787425, "grad_norm": 0.23834869265556335, "learning_rate": 1e-05, "loss": 0.9839, "step": 62930 }, { "epoch": 55.74402125775022, "grad_norm": 0.24291785061359406, "learning_rate": 1e-05, "loss": 0.9574, "step": 62935 }, { "epoch": 55.74844995571302, "grad_norm": 0.2367095947265625, "learning_rate": 1e-05, "loss": 0.9474, "step": 62940 }, { "epoch": 55.75287865367582, "grad_norm": 0.22828327119350433, "learning_rate": 1e-05, "loss": 0.9577, "step": 62945 }, { "epoch": 55.757307351638616, "grad_norm": 0.1983243227005005, "learning_rate": 1e-05, "loss": 0.9299, "step": 62950 }, { "epoch": 55.76173604960142, "grad_norm": 0.21977515518665314, "learning_rate": 1e-05, "loss": 0.9506, "step": 62955 }, { "epoch": 55.76616474756422, "grad_norm": 0.2196301370859146, "learning_rate": 1e-05, "loss": 0.998, "step": 62960 }, { "epoch": 55.77059344552701, "grad_norm": 0.2925870418548584, "learning_rate": 1e-05, "loss": 1.0181, "step": 62965 }, { "epoch": 55.775022143489814, "grad_norm": 0.2700136601924896, "learning_rate": 1e-05, "loss": 0.9546, "step": 62970 }, { "epoch": 55.779450841452615, "grad_norm": 0.24620164930820465, "learning_rate": 1e-05, "loss": 0.9892, "step": 62975 }, { "epoch": 55.78387953941541, "grad_norm": 0.2316984087228775, "learning_rate": 1e-05, "loss": 0.9763, "step": 62980 }, { "epoch": 55.78830823737821, "grad_norm": 0.2617681920528412, "learning_rate": 1e-05, "loss": 0.9996, "step": 62985 }, { "epoch": 55.79273693534101, "grad_norm": 0.2237551361322403, "learning_rate": 1e-05, "loss": 0.938, "step": 62990 }, { "epoch": 55.797165633303806, "grad_norm": 0.24159231781959534, "learning_rate": 1e-05, "loss": 0.9324, "step": 62995 }, { "epoch": 55.80159433126661, "grad_norm": 0.24352505803108215, "learning_rate": 1e-05, "loss": 1.0108, "step": 63000 }, { "epoch": 55.80602302922941, "grad_norm": 0.23825091123580933, "learning_rate": 1e-05, "loss": 0.979, "step": 63005 }, { "epoch": 55.8104517271922, "grad_norm": 0.24412472546100616, "learning_rate": 1e-05, "loss": 0.9821, "step": 63010 }, { "epoch": 55.814880425155, "grad_norm": 0.21329577267169952, "learning_rate": 1e-05, "loss": 0.9766, "step": 63015 }, { "epoch": 55.819309123117804, "grad_norm": 0.2143968641757965, "learning_rate": 1e-05, "loss": 0.9805, "step": 63020 }, { "epoch": 55.823737821080606, "grad_norm": 0.2275923490524292, "learning_rate": 1e-05, "loss": 1.0318, "step": 63025 }, { "epoch": 55.8281665190434, "grad_norm": 0.22037973999977112, "learning_rate": 1e-05, "loss": 0.9797, "step": 63030 }, { "epoch": 55.8325952170062, "grad_norm": 0.23195819556713104, "learning_rate": 1e-05, "loss": 0.9704, "step": 63035 }, { "epoch": 55.837023914969, "grad_norm": 0.23441609740257263, "learning_rate": 1e-05, "loss": 1.0513, "step": 63040 }, { "epoch": 55.841452612931796, "grad_norm": 0.26434823870658875, "learning_rate": 1e-05, "loss": 1.0424, "step": 63045 }, { "epoch": 55.8458813108946, "grad_norm": 0.2524208724498749, "learning_rate": 1e-05, "loss": 1.0403, "step": 63050 }, { "epoch": 55.8503100088574, "grad_norm": 0.278835266828537, "learning_rate": 1e-05, "loss": 0.9885, "step": 63055 }, { "epoch": 55.85473870682019, "grad_norm": 0.20644482970237732, "learning_rate": 1e-05, "loss": 0.9659, "step": 63060 }, { "epoch": 55.859167404782994, "grad_norm": 0.21144837141036987, "learning_rate": 1e-05, "loss": 0.9653, "step": 63065 }, { "epoch": 55.863596102745795, "grad_norm": 0.22869908809661865, "learning_rate": 1e-05, "loss": 0.9552, "step": 63070 }, { "epoch": 55.86802480070859, "grad_norm": 0.24267901480197906, "learning_rate": 1e-05, "loss": 0.9884, "step": 63075 }, { "epoch": 55.87245349867139, "grad_norm": 0.2329319566488266, "learning_rate": 1e-05, "loss": 0.9896, "step": 63080 }, { "epoch": 55.87688219663419, "grad_norm": 0.2512611746788025, "learning_rate": 1e-05, "loss": 1.0007, "step": 63085 }, { "epoch": 55.881310894596986, "grad_norm": 0.25006699562072754, "learning_rate": 1e-05, "loss": 0.9933, "step": 63090 }, { "epoch": 55.88573959255979, "grad_norm": 0.2464587241411209, "learning_rate": 1e-05, "loss": 0.9851, "step": 63095 }, { "epoch": 55.89016829052259, "grad_norm": 0.24423974752426147, "learning_rate": 1e-05, "loss": 0.9431, "step": 63100 }, { "epoch": 55.89459698848538, "grad_norm": 0.22483476996421814, "learning_rate": 1e-05, "loss": 0.9972, "step": 63105 }, { "epoch": 55.899025686448184, "grad_norm": 0.27327942848205566, "learning_rate": 1e-05, "loss": 1.0, "step": 63110 }, { "epoch": 55.903454384410985, "grad_norm": 0.22841021418571472, "learning_rate": 1e-05, "loss": 0.967, "step": 63115 }, { "epoch": 55.90788308237378, "grad_norm": 0.21562814712524414, "learning_rate": 1e-05, "loss": 0.9743, "step": 63120 }, { "epoch": 55.91231178033658, "grad_norm": 0.2504241466522217, "learning_rate": 1e-05, "loss": 0.9676, "step": 63125 }, { "epoch": 55.91674047829938, "grad_norm": 0.23530380427837372, "learning_rate": 1e-05, "loss": 0.9464, "step": 63130 }, { "epoch": 55.921169176262175, "grad_norm": 0.2534446120262146, "learning_rate": 1e-05, "loss": 0.9632, "step": 63135 }, { "epoch": 55.92559787422498, "grad_norm": 0.2118387073278427, "learning_rate": 1e-05, "loss": 1.0018, "step": 63140 }, { "epoch": 55.93002657218778, "grad_norm": 0.23165062069892883, "learning_rate": 1e-05, "loss": 0.9351, "step": 63145 }, { "epoch": 55.93445527015058, "grad_norm": 0.23897498846054077, "learning_rate": 1e-05, "loss": 0.9728, "step": 63150 }, { "epoch": 55.93888396811337, "grad_norm": 0.18769510090351105, "learning_rate": 1e-05, "loss": 0.9585, "step": 63155 }, { "epoch": 55.943312666076174, "grad_norm": 0.21453003585338593, "learning_rate": 1e-05, "loss": 0.9687, "step": 63160 }, { "epoch": 55.947741364038976, "grad_norm": 0.25377440452575684, "learning_rate": 1e-05, "loss": 0.909, "step": 63165 }, { "epoch": 55.95217006200177, "grad_norm": 0.25588148832321167, "learning_rate": 1e-05, "loss": 0.9771, "step": 63170 }, { "epoch": 55.95659875996457, "grad_norm": 0.2416241466999054, "learning_rate": 1e-05, "loss": 0.9845, "step": 63175 }, { "epoch": 55.96102745792737, "grad_norm": 0.28508681058883667, "learning_rate": 1e-05, "loss": 0.9428, "step": 63180 }, { "epoch": 55.965456155890166, "grad_norm": 0.25988131761550903, "learning_rate": 1e-05, "loss": 0.9527, "step": 63185 }, { "epoch": 55.96988485385297, "grad_norm": 0.2363196611404419, "learning_rate": 1e-05, "loss": 0.9619, "step": 63190 }, { "epoch": 55.97431355181577, "grad_norm": 0.21184957027435303, "learning_rate": 1e-05, "loss": 0.9844, "step": 63195 }, { "epoch": 55.97874224977856, "grad_norm": 0.25400838255882263, "learning_rate": 1e-05, "loss": 0.9399, "step": 63200 }, { "epoch": 55.983170947741364, "grad_norm": 0.2314334362745285, "learning_rate": 1e-05, "loss": 0.9398, "step": 63205 }, { "epoch": 55.987599645704165, "grad_norm": 0.23777985572814941, "learning_rate": 1e-05, "loss": 1.0291, "step": 63210 }, { "epoch": 55.99202834366696, "grad_norm": 0.27431249618530273, "learning_rate": 1e-05, "loss": 0.9614, "step": 63215 }, { "epoch": 55.99645704162976, "grad_norm": 0.24531115591526031, "learning_rate": 1e-05, "loss": 0.9832, "step": 63220 }, { "epoch": 56.00088573959256, "grad_norm": 0.23937658965587616, "learning_rate": 1e-05, "loss": 1.0392, "step": 63225 }, { "epoch": 56.005314437555356, "grad_norm": 0.25790509581565857, "learning_rate": 1e-05, "loss": 0.9703, "step": 63230 }, { "epoch": 56.00974313551816, "grad_norm": 0.22111348807811737, "learning_rate": 1e-05, "loss": 0.9572, "step": 63235 }, { "epoch": 56.01417183348096, "grad_norm": 0.24599124491214752, "learning_rate": 1e-05, "loss": 0.9977, "step": 63240 }, { "epoch": 56.01860053144375, "grad_norm": 0.24326390027999878, "learning_rate": 1e-05, "loss": 0.9496, "step": 63245 }, { "epoch": 56.02302922940655, "grad_norm": 0.26520904898643494, "learning_rate": 1e-05, "loss": 0.9794, "step": 63250 }, { "epoch": 56.027457927369355, "grad_norm": 0.2518685460090637, "learning_rate": 1e-05, "loss": 0.9826, "step": 63255 }, { "epoch": 56.03188662533215, "grad_norm": 0.22588272392749786, "learning_rate": 1e-05, "loss": 0.9608, "step": 63260 }, { "epoch": 56.03631532329495, "grad_norm": 0.2146756649017334, "learning_rate": 1e-05, "loss": 0.9513, "step": 63265 }, { "epoch": 56.04074402125775, "grad_norm": 0.2459445744752884, "learning_rate": 1e-05, "loss": 0.9385, "step": 63270 }, { "epoch": 56.04517271922055, "grad_norm": 0.2339220941066742, "learning_rate": 1e-05, "loss": 0.9822, "step": 63275 }, { "epoch": 56.04960141718335, "grad_norm": 0.2387182116508484, "learning_rate": 1e-05, "loss": 0.908, "step": 63280 }, { "epoch": 56.05403011514615, "grad_norm": 0.20047685503959656, "learning_rate": 1e-05, "loss": 0.963, "step": 63285 }, { "epoch": 56.05845881310895, "grad_norm": 0.18126839399337769, "learning_rate": 1e-05, "loss": 1.0169, "step": 63290 }, { "epoch": 56.06288751107174, "grad_norm": 0.2056189626455307, "learning_rate": 1e-05, "loss": 0.9949, "step": 63295 }, { "epoch": 56.067316209034544, "grad_norm": 0.20262739062309265, "learning_rate": 1e-05, "loss": 1.002, "step": 63300 }, { "epoch": 56.071744906997345, "grad_norm": 0.2546745538711548, "learning_rate": 1e-05, "loss": 0.9504, "step": 63305 }, { "epoch": 56.07617360496014, "grad_norm": 0.24659445881843567, "learning_rate": 1e-05, "loss": 0.9977, "step": 63310 }, { "epoch": 56.08060230292294, "grad_norm": 0.2330092042684555, "learning_rate": 1e-05, "loss": 1.0211, "step": 63315 }, { "epoch": 56.08503100088574, "grad_norm": 0.23480361700057983, "learning_rate": 1e-05, "loss": 1.0058, "step": 63320 }, { "epoch": 56.089459698848536, "grad_norm": 0.2409174144268036, "learning_rate": 1e-05, "loss": 0.9684, "step": 63325 }, { "epoch": 56.09388839681134, "grad_norm": 0.26574721932411194, "learning_rate": 1e-05, "loss": 0.9656, "step": 63330 }, { "epoch": 56.09831709477414, "grad_norm": 0.2549513280391693, "learning_rate": 1e-05, "loss": 0.9338, "step": 63335 }, { "epoch": 56.10274579273693, "grad_norm": 0.25018665194511414, "learning_rate": 1e-05, "loss": 0.9996, "step": 63340 }, { "epoch": 56.107174490699734, "grad_norm": 0.23832666873931885, "learning_rate": 1e-05, "loss": 0.9895, "step": 63345 }, { "epoch": 56.111603188662535, "grad_norm": 0.22562791407108307, "learning_rate": 1e-05, "loss": 0.971, "step": 63350 }, { "epoch": 56.11603188662533, "grad_norm": 0.251179963350296, "learning_rate": 1e-05, "loss": 0.9181, "step": 63355 }, { "epoch": 56.12046058458813, "grad_norm": 0.2523738741874695, "learning_rate": 1e-05, "loss": 0.9987, "step": 63360 }, { "epoch": 56.12488928255093, "grad_norm": 0.2886253893375397, "learning_rate": 1e-05, "loss": 0.9774, "step": 63365 }, { "epoch": 56.129317980513726, "grad_norm": 0.27339133620262146, "learning_rate": 1e-05, "loss": 0.9735, "step": 63370 }, { "epoch": 56.13374667847653, "grad_norm": 0.22999246418476105, "learning_rate": 1e-05, "loss": 0.9337, "step": 63375 }, { "epoch": 56.13817537643933, "grad_norm": 0.25712668895721436, "learning_rate": 1e-05, "loss": 0.9717, "step": 63380 }, { "epoch": 56.14260407440213, "grad_norm": 0.2335035502910614, "learning_rate": 1e-05, "loss": 0.9754, "step": 63385 }, { "epoch": 56.14703277236492, "grad_norm": 0.2485598772764206, "learning_rate": 1e-05, "loss": 0.9551, "step": 63390 }, { "epoch": 56.151461470327725, "grad_norm": 0.20362693071365356, "learning_rate": 1e-05, "loss": 0.9831, "step": 63395 }, { "epoch": 56.155890168290526, "grad_norm": 0.22485344111919403, "learning_rate": 1e-05, "loss": 0.9658, "step": 63400 }, { "epoch": 56.16031886625332, "grad_norm": 0.25063422322273254, "learning_rate": 1e-05, "loss": 0.9302, "step": 63405 }, { "epoch": 56.16474756421612, "grad_norm": 0.26546475291252136, "learning_rate": 1e-05, "loss": 0.9155, "step": 63410 }, { "epoch": 56.16917626217892, "grad_norm": 0.2285129725933075, "learning_rate": 1e-05, "loss": 0.9526, "step": 63415 }, { "epoch": 56.173604960141716, "grad_norm": 0.2408449649810791, "learning_rate": 1e-05, "loss": 0.9475, "step": 63420 }, { "epoch": 56.17803365810452, "grad_norm": 0.2592220604419708, "learning_rate": 1e-05, "loss": 0.924, "step": 63425 }, { "epoch": 56.18246235606732, "grad_norm": 0.2781519591808319, "learning_rate": 1e-05, "loss": 0.9794, "step": 63430 }, { "epoch": 56.18689105403011, "grad_norm": 0.3001905679702759, "learning_rate": 1e-05, "loss": 0.9205, "step": 63435 }, { "epoch": 56.191319751992914, "grad_norm": 0.24128659069538116, "learning_rate": 1e-05, "loss": 0.9504, "step": 63440 }, { "epoch": 56.195748449955715, "grad_norm": 0.30806219577789307, "learning_rate": 1e-05, "loss": 0.9608, "step": 63445 }, { "epoch": 56.20017714791851, "grad_norm": 0.28717541694641113, "learning_rate": 1e-05, "loss": 0.962, "step": 63450 }, { "epoch": 56.20460584588131, "grad_norm": 0.22641092538833618, "learning_rate": 1e-05, "loss": 1.0252, "step": 63455 }, { "epoch": 56.20903454384411, "grad_norm": 0.31907275319099426, "learning_rate": 1e-05, "loss": 0.969, "step": 63460 }, { "epoch": 56.213463241806906, "grad_norm": 0.23403482139110565, "learning_rate": 1e-05, "loss": 0.9647, "step": 63465 }, { "epoch": 56.21789193976971, "grad_norm": 0.2310042679309845, "learning_rate": 1e-05, "loss": 1.015, "step": 63470 }, { "epoch": 56.22232063773251, "grad_norm": 0.2366589456796646, "learning_rate": 1e-05, "loss": 0.9542, "step": 63475 }, { "epoch": 56.2267493356953, "grad_norm": 0.2359618991613388, "learning_rate": 1e-05, "loss": 0.9616, "step": 63480 }, { "epoch": 56.231178033658104, "grad_norm": 0.26032111048698425, "learning_rate": 1e-05, "loss": 0.9615, "step": 63485 }, { "epoch": 56.235606731620905, "grad_norm": 0.22414495050907135, "learning_rate": 1e-05, "loss": 0.9589, "step": 63490 }, { "epoch": 56.2400354295837, "grad_norm": 0.22042685747146606, "learning_rate": 1e-05, "loss": 0.9175, "step": 63495 }, { "epoch": 56.2444641275465, "grad_norm": 0.24749435484409332, "learning_rate": 1e-05, "loss": 0.8955, "step": 63500 }, { "epoch": 56.2488928255093, "grad_norm": 0.2552974224090576, "learning_rate": 1e-05, "loss": 0.9497, "step": 63505 }, { "epoch": 56.2533215234721, "grad_norm": 0.24924516677856445, "learning_rate": 1e-05, "loss": 0.9629, "step": 63510 }, { "epoch": 56.2577502214349, "grad_norm": 0.22684572637081146, "learning_rate": 1e-05, "loss": 1.0038, "step": 63515 }, { "epoch": 56.2621789193977, "grad_norm": 0.24484111368656158, "learning_rate": 1e-05, "loss": 0.9167, "step": 63520 }, { "epoch": 56.2666076173605, "grad_norm": 0.28961053490638733, "learning_rate": 1e-05, "loss": 0.9695, "step": 63525 }, { "epoch": 56.27103631532329, "grad_norm": 0.2719418406486511, "learning_rate": 1e-05, "loss": 0.9519, "step": 63530 }, { "epoch": 56.275465013286095, "grad_norm": 0.19193609058856964, "learning_rate": 1e-05, "loss": 0.9664, "step": 63535 }, { "epoch": 56.279893711248896, "grad_norm": 0.2647283375263214, "learning_rate": 1e-05, "loss": 1.0258, "step": 63540 }, { "epoch": 56.28432240921169, "grad_norm": 0.26923179626464844, "learning_rate": 1e-05, "loss": 0.9481, "step": 63545 }, { "epoch": 56.28875110717449, "grad_norm": 0.22998864948749542, "learning_rate": 1e-05, "loss": 0.9625, "step": 63550 }, { "epoch": 56.29317980513729, "grad_norm": 0.257528692483902, "learning_rate": 1e-05, "loss": 0.9266, "step": 63555 }, { "epoch": 56.297608503100086, "grad_norm": 0.2713509500026703, "learning_rate": 1e-05, "loss": 0.9429, "step": 63560 }, { "epoch": 56.30203720106289, "grad_norm": 0.2289247363805771, "learning_rate": 1e-05, "loss": 0.9936, "step": 63565 }, { "epoch": 56.30646589902569, "grad_norm": 0.22357754409313202, "learning_rate": 1e-05, "loss": 0.9805, "step": 63570 }, { "epoch": 56.31089459698848, "grad_norm": 0.26244646310806274, "learning_rate": 1e-05, "loss": 0.9946, "step": 63575 }, { "epoch": 56.315323294951284, "grad_norm": 0.27205440402030945, "learning_rate": 1e-05, "loss": 0.9724, "step": 63580 }, { "epoch": 56.319751992914085, "grad_norm": 0.24197699129581451, "learning_rate": 1e-05, "loss": 0.9565, "step": 63585 }, { "epoch": 56.32418069087688, "grad_norm": 0.24268017709255219, "learning_rate": 1e-05, "loss": 1.0146, "step": 63590 }, { "epoch": 56.32860938883968, "grad_norm": 0.25126227736473083, "learning_rate": 1e-05, "loss": 0.9133, "step": 63595 }, { "epoch": 56.33303808680248, "grad_norm": 0.2567299008369446, "learning_rate": 1e-05, "loss": 0.9628, "step": 63600 }, { "epoch": 56.337466784765276, "grad_norm": 0.22538305819034576, "learning_rate": 1e-05, "loss": 0.9567, "step": 63605 }, { "epoch": 56.34189548272808, "grad_norm": 0.2222796082496643, "learning_rate": 1e-05, "loss": 0.9589, "step": 63610 }, { "epoch": 56.34632418069088, "grad_norm": 0.24867665767669678, "learning_rate": 1e-05, "loss": 0.9337, "step": 63615 }, { "epoch": 56.35075287865367, "grad_norm": 0.23514172434806824, "learning_rate": 1e-05, "loss": 0.9973, "step": 63620 }, { "epoch": 56.355181576616474, "grad_norm": 0.2335110753774643, "learning_rate": 1e-05, "loss": 1.0379, "step": 63625 }, { "epoch": 56.359610274579275, "grad_norm": 0.2373688966035843, "learning_rate": 1e-05, "loss": 0.9952, "step": 63630 }, { "epoch": 56.364038972542076, "grad_norm": 0.24348297715187073, "learning_rate": 1e-05, "loss": 0.9856, "step": 63635 }, { "epoch": 56.36846767050487, "grad_norm": 0.2737266719341278, "learning_rate": 1e-05, "loss": 0.9624, "step": 63640 }, { "epoch": 56.37289636846767, "grad_norm": 0.278772234916687, "learning_rate": 1e-05, "loss": 1.0141, "step": 63645 }, { "epoch": 56.37732506643047, "grad_norm": 0.29180070757865906, "learning_rate": 1e-05, "loss": 0.9907, "step": 63650 }, { "epoch": 56.38175376439327, "grad_norm": 0.2496812492609024, "learning_rate": 1e-05, "loss": 0.9247, "step": 63655 }, { "epoch": 56.38618246235607, "grad_norm": 0.28697994351387024, "learning_rate": 1e-05, "loss": 0.9875, "step": 63660 }, { "epoch": 56.39061116031887, "grad_norm": 0.23187129199504852, "learning_rate": 1e-05, "loss": 0.9803, "step": 63665 }, { "epoch": 56.39503985828166, "grad_norm": 0.21356260776519775, "learning_rate": 1e-05, "loss": 0.9353, "step": 63670 }, { "epoch": 56.399468556244464, "grad_norm": 0.22953714430332184, "learning_rate": 1e-05, "loss": 0.951, "step": 63675 }, { "epoch": 56.403897254207266, "grad_norm": 0.25784069299697876, "learning_rate": 1e-05, "loss": 0.9447, "step": 63680 }, { "epoch": 56.40832595217006, "grad_norm": 0.24349458515644073, "learning_rate": 1e-05, "loss": 0.9994, "step": 63685 }, { "epoch": 56.41275465013286, "grad_norm": 0.21518701314926147, "learning_rate": 1e-05, "loss": 0.9518, "step": 63690 }, { "epoch": 56.41718334809566, "grad_norm": 0.2251671850681305, "learning_rate": 1e-05, "loss": 0.9535, "step": 63695 }, { "epoch": 56.421612046058456, "grad_norm": 0.22276322543621063, "learning_rate": 1e-05, "loss": 1.0075, "step": 63700 }, { "epoch": 56.42604074402126, "grad_norm": 0.21394184231758118, "learning_rate": 1e-05, "loss": 0.9789, "step": 63705 }, { "epoch": 56.43046944198406, "grad_norm": 0.2657020688056946, "learning_rate": 1e-05, "loss": 0.9316, "step": 63710 }, { "epoch": 56.43489813994685, "grad_norm": 0.24964112043380737, "learning_rate": 1e-05, "loss": 1.0044, "step": 63715 }, { "epoch": 56.439326837909654, "grad_norm": 0.23910826444625854, "learning_rate": 1e-05, "loss": 0.9614, "step": 63720 }, { "epoch": 56.443755535872455, "grad_norm": 0.27830642461776733, "learning_rate": 1e-05, "loss": 1.0319, "step": 63725 }, { "epoch": 56.44818423383525, "grad_norm": 0.2774246335029602, "learning_rate": 1e-05, "loss": 0.9883, "step": 63730 }, { "epoch": 56.45261293179805, "grad_norm": 0.22147469222545624, "learning_rate": 1e-05, "loss": 1.0128, "step": 63735 }, { "epoch": 56.45704162976085, "grad_norm": 0.23240873217582703, "learning_rate": 1e-05, "loss": 0.988, "step": 63740 }, { "epoch": 56.461470327723646, "grad_norm": 0.26253893971443176, "learning_rate": 1e-05, "loss": 0.9238, "step": 63745 }, { "epoch": 56.46589902568645, "grad_norm": 0.29025065898895264, "learning_rate": 1e-05, "loss": 0.9719, "step": 63750 }, { "epoch": 56.47032772364925, "grad_norm": 0.24803084135055542, "learning_rate": 1e-05, "loss": 0.9721, "step": 63755 }, { "epoch": 56.47475642161205, "grad_norm": 0.20899616181850433, "learning_rate": 1e-05, "loss": 1.0076, "step": 63760 }, { "epoch": 56.479185119574844, "grad_norm": 0.2042752355337143, "learning_rate": 1e-05, "loss": 0.9614, "step": 63765 }, { "epoch": 56.483613817537645, "grad_norm": 0.21896463632583618, "learning_rate": 1e-05, "loss": 1.0067, "step": 63770 }, { "epoch": 56.488042515500446, "grad_norm": 0.23999616503715515, "learning_rate": 1e-05, "loss": 0.9318, "step": 63775 }, { "epoch": 56.49247121346324, "grad_norm": 0.22065283358097076, "learning_rate": 1e-05, "loss": 0.9596, "step": 63780 }, { "epoch": 56.49689991142604, "grad_norm": 0.22317902743816376, "learning_rate": 1e-05, "loss": 0.9755, "step": 63785 }, { "epoch": 56.50132860938884, "grad_norm": 0.20797133445739746, "learning_rate": 1e-05, "loss": 0.9669, "step": 63790 }, { "epoch": 56.50575730735164, "grad_norm": 0.22109512984752655, "learning_rate": 1e-05, "loss": 0.9454, "step": 63795 }, { "epoch": 56.51018600531444, "grad_norm": 0.24690577387809753, "learning_rate": 1e-05, "loss": 0.9996, "step": 63800 }, { "epoch": 56.51461470327724, "grad_norm": 0.26962345838546753, "learning_rate": 1e-05, "loss": 0.9711, "step": 63805 }, { "epoch": 56.51904340124003, "grad_norm": 0.25095251202583313, "learning_rate": 1e-05, "loss": 0.9524, "step": 63810 }, { "epoch": 56.523472099202834, "grad_norm": 0.24364377558231354, "learning_rate": 1e-05, "loss": 0.9593, "step": 63815 }, { "epoch": 56.527900797165636, "grad_norm": 0.25177696347236633, "learning_rate": 1e-05, "loss": 1.0035, "step": 63820 }, { "epoch": 56.53232949512843, "grad_norm": 0.26411545276641846, "learning_rate": 1e-05, "loss": 0.9724, "step": 63825 }, { "epoch": 56.53675819309123, "grad_norm": 0.22823084890842438, "learning_rate": 1e-05, "loss": 0.9302, "step": 63830 }, { "epoch": 56.54118689105403, "grad_norm": 0.22628812491893768, "learning_rate": 1e-05, "loss": 1.0069, "step": 63835 }, { "epoch": 56.545615589016826, "grad_norm": 0.25449636578559875, "learning_rate": 1e-05, "loss": 0.9757, "step": 63840 }, { "epoch": 56.55004428697963, "grad_norm": 0.21194985508918762, "learning_rate": 1e-05, "loss": 0.9756, "step": 63845 }, { "epoch": 56.55447298494243, "grad_norm": 0.25784966349601746, "learning_rate": 1e-05, "loss": 1.0244, "step": 63850 }, { "epoch": 56.55890168290522, "grad_norm": 0.2687908113002777, "learning_rate": 1e-05, "loss": 0.9678, "step": 63855 }, { "epoch": 56.563330380868024, "grad_norm": 0.23463495075702667, "learning_rate": 1e-05, "loss": 1.0169, "step": 63860 }, { "epoch": 56.567759078830825, "grad_norm": 0.3146330416202545, "learning_rate": 1e-05, "loss": 0.9475, "step": 63865 }, { "epoch": 56.57218777679362, "grad_norm": 0.24020206928253174, "learning_rate": 1e-05, "loss": 0.9705, "step": 63870 }, { "epoch": 56.57661647475642, "grad_norm": 0.25961068272590637, "learning_rate": 1e-05, "loss": 0.9919, "step": 63875 }, { "epoch": 56.58104517271922, "grad_norm": 0.2569449245929718, "learning_rate": 1e-05, "loss": 0.9539, "step": 63880 }, { "epoch": 56.58547387068202, "grad_norm": 0.2551226019859314, "learning_rate": 1e-05, "loss": 0.9663, "step": 63885 }, { "epoch": 56.58990256864482, "grad_norm": 0.2499077320098877, "learning_rate": 1e-05, "loss": 0.9727, "step": 63890 }, { "epoch": 56.59433126660762, "grad_norm": 0.2654375731945038, "learning_rate": 1e-05, "loss": 0.9546, "step": 63895 }, { "epoch": 56.59875996457042, "grad_norm": 0.21383053064346313, "learning_rate": 1e-05, "loss": 0.9733, "step": 63900 }, { "epoch": 56.60318866253321, "grad_norm": 0.2844485938549042, "learning_rate": 1e-05, "loss": 0.9572, "step": 63905 }, { "epoch": 56.607617360496015, "grad_norm": 0.20691964030265808, "learning_rate": 1e-05, "loss": 1.0076, "step": 63910 }, { "epoch": 56.612046058458816, "grad_norm": 0.2167731076478958, "learning_rate": 1e-05, "loss": 0.9686, "step": 63915 }, { "epoch": 56.61647475642161, "grad_norm": 0.23787663877010345, "learning_rate": 1e-05, "loss": 1.0177, "step": 63920 }, { "epoch": 56.62090345438441, "grad_norm": 0.3365321159362793, "learning_rate": 1e-05, "loss": 0.9433, "step": 63925 }, { "epoch": 56.62533215234721, "grad_norm": 0.20767658948898315, "learning_rate": 1e-05, "loss": 1.006, "step": 63930 }, { "epoch": 56.62976085031001, "grad_norm": 0.21107585728168488, "learning_rate": 1e-05, "loss": 0.9922, "step": 63935 }, { "epoch": 56.63418954827281, "grad_norm": 0.20760977268218994, "learning_rate": 1e-05, "loss": 0.9818, "step": 63940 }, { "epoch": 56.63861824623561, "grad_norm": 0.21589045226573944, "learning_rate": 1e-05, "loss": 0.9537, "step": 63945 }, { "epoch": 56.6430469441984, "grad_norm": 0.23618140816688538, "learning_rate": 1e-05, "loss": 0.959, "step": 63950 }, { "epoch": 56.647475642161204, "grad_norm": 0.23637351393699646, "learning_rate": 1e-05, "loss": 0.9914, "step": 63955 }, { "epoch": 56.651904340124005, "grad_norm": 0.23837397992610931, "learning_rate": 1e-05, "loss": 0.9781, "step": 63960 }, { "epoch": 56.6563330380868, "grad_norm": 0.2561452388763428, "learning_rate": 1e-05, "loss": 0.9914, "step": 63965 }, { "epoch": 56.6607617360496, "grad_norm": 0.3384799361228943, "learning_rate": 1e-05, "loss": 0.9405, "step": 63970 }, { "epoch": 56.6651904340124, "grad_norm": 0.23272429406642914, "learning_rate": 1e-05, "loss": 0.9568, "step": 63975 }, { "epoch": 56.669619131975196, "grad_norm": 0.23863771557807922, "learning_rate": 1e-05, "loss": 0.9815, "step": 63980 }, { "epoch": 56.674047829938, "grad_norm": 0.23360808193683624, "learning_rate": 1e-05, "loss": 0.9018, "step": 63985 }, { "epoch": 56.6784765279008, "grad_norm": 0.24241039156913757, "learning_rate": 1e-05, "loss": 1.0153, "step": 63990 }, { "epoch": 56.68290522586359, "grad_norm": 0.31063148379325867, "learning_rate": 1e-05, "loss": 1.0098, "step": 63995 }, { "epoch": 56.687333923826394, "grad_norm": 0.23626072704792023, "learning_rate": 1e-05, "loss": 1.0094, "step": 64000 }, { "epoch": 56.691762621789195, "grad_norm": 0.23614118993282318, "learning_rate": 1e-05, "loss": 0.9812, "step": 64005 }, { "epoch": 56.696191319751996, "grad_norm": 0.20562206208705902, "learning_rate": 1e-05, "loss": 0.9331, "step": 64010 }, { "epoch": 56.70062001771479, "grad_norm": 0.19414405524730682, "learning_rate": 1e-05, "loss": 0.9793, "step": 64015 }, { "epoch": 56.70504871567759, "grad_norm": 0.2645584046840668, "learning_rate": 1e-05, "loss": 0.9535, "step": 64020 }, { "epoch": 56.70947741364039, "grad_norm": 0.19743388891220093, "learning_rate": 1e-05, "loss": 0.9344, "step": 64025 }, { "epoch": 56.71390611160319, "grad_norm": 0.22869670391082764, "learning_rate": 1e-05, "loss": 0.9621, "step": 64030 }, { "epoch": 56.71833480956599, "grad_norm": 0.2251833975315094, "learning_rate": 1e-05, "loss": 0.9723, "step": 64035 }, { "epoch": 56.72276350752879, "grad_norm": 0.22205980122089386, "learning_rate": 1e-05, "loss": 0.9796, "step": 64040 }, { "epoch": 56.72719220549158, "grad_norm": 0.2329990565776825, "learning_rate": 1e-05, "loss": 1.0023, "step": 64045 }, { "epoch": 56.731620903454385, "grad_norm": 0.22200244665145874, "learning_rate": 1e-05, "loss": 0.8899, "step": 64050 }, { "epoch": 56.736049601417186, "grad_norm": 0.25576964020729065, "learning_rate": 1e-05, "loss": 1.0027, "step": 64055 }, { "epoch": 56.74047829937998, "grad_norm": 0.24480198323726654, "learning_rate": 1e-05, "loss": 0.9835, "step": 64060 }, { "epoch": 56.74490699734278, "grad_norm": 0.2794451117515564, "learning_rate": 1e-05, "loss": 1.0139, "step": 64065 }, { "epoch": 56.74933569530558, "grad_norm": 0.23102684319019318, "learning_rate": 1e-05, "loss": 0.9511, "step": 64070 }, { "epoch": 56.753764393268376, "grad_norm": 0.25073865056037903, "learning_rate": 1e-05, "loss": 0.9748, "step": 64075 }, { "epoch": 56.75819309123118, "grad_norm": 0.22945536673069, "learning_rate": 1e-05, "loss": 0.9229, "step": 64080 }, { "epoch": 56.76262178919398, "grad_norm": 0.2440461367368698, "learning_rate": 1e-05, "loss": 0.9724, "step": 64085 }, { "epoch": 56.76705048715677, "grad_norm": 0.2708947956562042, "learning_rate": 1e-05, "loss": 0.9664, "step": 64090 }, { "epoch": 56.771479185119574, "grad_norm": 0.23065300285816193, "learning_rate": 1e-05, "loss": 0.9248, "step": 64095 }, { "epoch": 56.775907883082375, "grad_norm": 0.24007974565029144, "learning_rate": 1e-05, "loss": 0.9641, "step": 64100 }, { "epoch": 56.78033658104517, "grad_norm": 0.22570888698101044, "learning_rate": 1e-05, "loss": 0.9764, "step": 64105 }, { "epoch": 56.78476527900797, "grad_norm": 0.24484147131443024, "learning_rate": 1e-05, "loss": 0.9518, "step": 64110 }, { "epoch": 56.78919397697077, "grad_norm": 0.2136494517326355, "learning_rate": 1e-05, "loss": 0.9688, "step": 64115 }, { "epoch": 56.79362267493357, "grad_norm": 0.25291118025779724, "learning_rate": 1e-05, "loss": 0.9855, "step": 64120 }, { "epoch": 56.79805137289637, "grad_norm": 0.2680213451385498, "learning_rate": 1e-05, "loss": 0.9772, "step": 64125 }, { "epoch": 56.80248007085917, "grad_norm": 0.24497048556804657, "learning_rate": 1e-05, "loss": 0.9472, "step": 64130 }, { "epoch": 56.80690876882197, "grad_norm": 0.20482584834098816, "learning_rate": 1e-05, "loss": 0.9568, "step": 64135 }, { "epoch": 56.811337466784764, "grad_norm": 0.2691558599472046, "learning_rate": 1e-05, "loss": 0.981, "step": 64140 }, { "epoch": 56.815766164747565, "grad_norm": 0.29624661803245544, "learning_rate": 1e-05, "loss": 1.0043, "step": 64145 }, { "epoch": 56.820194862710366, "grad_norm": 0.26249465346336365, "learning_rate": 1e-05, "loss": 0.9597, "step": 64150 }, { "epoch": 56.82462356067316, "grad_norm": 0.2631811499595642, "learning_rate": 1e-05, "loss": 0.9694, "step": 64155 }, { "epoch": 56.82905225863596, "grad_norm": 0.20773299038410187, "learning_rate": 1e-05, "loss": 0.9157, "step": 64160 }, { "epoch": 56.83348095659876, "grad_norm": 0.24310153722763062, "learning_rate": 1e-05, "loss": 0.9577, "step": 64165 }, { "epoch": 56.83790965456156, "grad_norm": 0.26155924797058105, "learning_rate": 1e-05, "loss": 0.9351, "step": 64170 }, { "epoch": 56.84233835252436, "grad_norm": 0.2059398740530014, "learning_rate": 1e-05, "loss": 0.946, "step": 64175 }, { "epoch": 56.84676705048716, "grad_norm": 0.19910509884357452, "learning_rate": 1e-05, "loss": 0.9848, "step": 64180 }, { "epoch": 56.85119574844995, "grad_norm": 0.24665945768356323, "learning_rate": 1e-05, "loss": 0.9802, "step": 64185 }, { "epoch": 56.855624446412754, "grad_norm": 0.23373277485370636, "learning_rate": 1e-05, "loss": 0.9885, "step": 64190 }, { "epoch": 56.860053144375556, "grad_norm": 0.18853700160980225, "learning_rate": 1e-05, "loss": 0.9984, "step": 64195 }, { "epoch": 56.86448184233835, "grad_norm": 0.29547590017318726, "learning_rate": 1e-05, "loss": 0.9795, "step": 64200 }, { "epoch": 56.86891054030115, "grad_norm": 0.2515179216861725, "learning_rate": 1e-05, "loss": 0.9729, "step": 64205 }, { "epoch": 56.87333923826395, "grad_norm": 0.23519451916217804, "learning_rate": 1e-05, "loss": 0.9778, "step": 64210 }, { "epoch": 56.877767936226746, "grad_norm": 0.2409517765045166, "learning_rate": 1e-05, "loss": 0.9771, "step": 64215 }, { "epoch": 56.88219663418955, "grad_norm": 0.26572102308273315, "learning_rate": 1e-05, "loss": 0.9427, "step": 64220 }, { "epoch": 56.88662533215235, "grad_norm": 0.29910725355148315, "learning_rate": 1e-05, "loss": 0.989, "step": 64225 }, { "epoch": 56.89105403011514, "grad_norm": 0.2584010362625122, "learning_rate": 1e-05, "loss": 0.977, "step": 64230 }, { "epoch": 56.895482728077944, "grad_norm": 0.20503230392932892, "learning_rate": 1e-05, "loss": 0.9453, "step": 64235 }, { "epoch": 56.899911426040745, "grad_norm": 0.22223049402236938, "learning_rate": 1e-05, "loss": 0.9855, "step": 64240 }, { "epoch": 56.90434012400354, "grad_norm": 0.23342706263065338, "learning_rate": 1e-05, "loss": 0.9401, "step": 64245 }, { "epoch": 56.90876882196634, "grad_norm": 0.2384214848279953, "learning_rate": 1e-05, "loss": 0.9623, "step": 64250 }, { "epoch": 56.91319751992914, "grad_norm": 0.27266746759414673, "learning_rate": 1e-05, "loss": 0.9728, "step": 64255 }, { "epoch": 56.91762621789194, "grad_norm": 0.2111235111951828, "learning_rate": 1e-05, "loss": 0.9784, "step": 64260 }, { "epoch": 56.92205491585474, "grad_norm": 0.23333798348903656, "learning_rate": 1e-05, "loss": 0.9863, "step": 64265 }, { "epoch": 56.92648361381754, "grad_norm": 0.2777155935764313, "learning_rate": 1e-05, "loss": 0.9652, "step": 64270 }, { "epoch": 56.93091231178034, "grad_norm": 0.22184447944164276, "learning_rate": 1e-05, "loss": 0.9832, "step": 64275 }, { "epoch": 56.935341009743134, "grad_norm": 0.23562589287757874, "learning_rate": 1e-05, "loss": 1.0197, "step": 64280 }, { "epoch": 56.939769707705935, "grad_norm": 0.23291298747062683, "learning_rate": 1e-05, "loss": 0.8889, "step": 64285 }, { "epoch": 56.944198405668736, "grad_norm": 0.226912721991539, "learning_rate": 1e-05, "loss": 0.964, "step": 64290 }, { "epoch": 56.94862710363153, "grad_norm": 0.2113107591867447, "learning_rate": 1e-05, "loss": 0.9773, "step": 64295 }, { "epoch": 56.95305580159433, "grad_norm": 0.2539554536342621, "learning_rate": 1e-05, "loss": 1.0049, "step": 64300 }, { "epoch": 56.95748449955713, "grad_norm": 0.2438468486070633, "learning_rate": 1e-05, "loss": 0.9788, "step": 64305 }, { "epoch": 56.96191319751993, "grad_norm": 0.25723347067832947, "learning_rate": 1e-05, "loss": 0.995, "step": 64310 }, { "epoch": 56.96634189548273, "grad_norm": 0.28310999274253845, "learning_rate": 1e-05, "loss": 0.9325, "step": 64315 }, { "epoch": 56.97077059344553, "grad_norm": 0.2382184863090515, "learning_rate": 1e-05, "loss": 0.9625, "step": 64320 }, { "epoch": 56.97519929140832, "grad_norm": 0.2212591916322708, "learning_rate": 1e-05, "loss": 1.0167, "step": 64325 }, { "epoch": 56.979627989371124, "grad_norm": 0.23368185758590698, "learning_rate": 1e-05, "loss": 0.9695, "step": 64330 }, { "epoch": 56.984056687333926, "grad_norm": 0.22670021653175354, "learning_rate": 1e-05, "loss": 1.0019, "step": 64335 }, { "epoch": 56.98848538529672, "grad_norm": 0.2316238433122635, "learning_rate": 1e-05, "loss": 0.9406, "step": 64340 }, { "epoch": 56.99291408325952, "grad_norm": 0.22660046815872192, "learning_rate": 1e-05, "loss": 1.028, "step": 64345 }, { "epoch": 56.99734278122232, "grad_norm": 0.2520727515220642, "learning_rate": 1e-05, "loss": 0.9624, "step": 64350 }, { "epoch": 57.001771479185116, "grad_norm": 0.2616822421550751, "learning_rate": 1e-05, "loss": 0.9365, "step": 64355 }, { "epoch": 57.00620017714792, "grad_norm": 0.22308708727359772, "learning_rate": 1e-05, "loss": 0.9867, "step": 64360 }, { "epoch": 57.01062887511072, "grad_norm": 0.2275661826133728, "learning_rate": 1e-05, "loss": 1.0006, "step": 64365 }, { "epoch": 57.01505757307352, "grad_norm": 0.2338424175977707, "learning_rate": 1e-05, "loss": 0.9466, "step": 64370 }, { "epoch": 57.019486271036314, "grad_norm": 0.22162552177906036, "learning_rate": 1e-05, "loss": 0.9551, "step": 64375 }, { "epoch": 57.023914968999115, "grad_norm": 0.23149742186069489, "learning_rate": 1e-05, "loss": 1.0098, "step": 64380 }, { "epoch": 57.028343666961916, "grad_norm": 0.2888084650039673, "learning_rate": 1e-05, "loss": 0.9453, "step": 64385 }, { "epoch": 57.03277236492471, "grad_norm": 0.21528057754039764, "learning_rate": 1e-05, "loss": 0.9103, "step": 64390 }, { "epoch": 57.03720106288751, "grad_norm": 0.27679142355918884, "learning_rate": 1e-05, "loss": 1.0257, "step": 64395 }, { "epoch": 57.04162976085031, "grad_norm": 0.2184085249900818, "learning_rate": 1e-05, "loss": 0.9593, "step": 64400 }, { "epoch": 57.04605845881311, "grad_norm": 0.20083005726337433, "learning_rate": 1e-05, "loss": 0.9559, "step": 64405 }, { "epoch": 57.05048715677591, "grad_norm": 0.23817558586597443, "learning_rate": 1e-05, "loss": 0.9729, "step": 64410 }, { "epoch": 57.05491585473871, "grad_norm": 0.2634621262550354, "learning_rate": 1e-05, "loss": 0.9845, "step": 64415 }, { "epoch": 57.0593445527015, "grad_norm": 0.24302643537521362, "learning_rate": 1e-05, "loss": 0.9734, "step": 64420 }, { "epoch": 57.063773250664305, "grad_norm": 0.2861745357513428, "learning_rate": 1e-05, "loss": 0.9491, "step": 64425 }, { "epoch": 57.068201948627106, "grad_norm": 0.2536584734916687, "learning_rate": 1e-05, "loss": 0.9853, "step": 64430 }, { "epoch": 57.0726306465899, "grad_norm": 0.2796446681022644, "learning_rate": 1e-05, "loss": 1.0087, "step": 64435 }, { "epoch": 57.0770593445527, "grad_norm": 0.2579536736011505, "learning_rate": 1e-05, "loss": 0.94, "step": 64440 }, { "epoch": 57.0814880425155, "grad_norm": 0.24723190069198608, "learning_rate": 1e-05, "loss": 0.9007, "step": 64445 }, { "epoch": 57.0859167404783, "grad_norm": 0.22301775217056274, "learning_rate": 1e-05, "loss": 0.934, "step": 64450 }, { "epoch": 57.0903454384411, "grad_norm": 0.2747363746166229, "learning_rate": 1e-05, "loss": 0.9505, "step": 64455 }, { "epoch": 57.0947741364039, "grad_norm": 0.23562118411064148, "learning_rate": 1e-05, "loss": 0.9201, "step": 64460 }, { "epoch": 57.09920283436669, "grad_norm": 0.24335841834545135, "learning_rate": 1e-05, "loss": 0.9738, "step": 64465 }, { "epoch": 57.103631532329494, "grad_norm": 0.23442089557647705, "learning_rate": 1e-05, "loss": 0.9316, "step": 64470 }, { "epoch": 57.108060230292296, "grad_norm": 0.22402559220790863, "learning_rate": 1e-05, "loss": 0.936, "step": 64475 }, { "epoch": 57.11248892825509, "grad_norm": 0.25184866786003113, "learning_rate": 1e-05, "loss": 0.9971, "step": 64480 }, { "epoch": 57.11691762621789, "grad_norm": 0.23988264799118042, "learning_rate": 1e-05, "loss": 0.964, "step": 64485 }, { "epoch": 57.12134632418069, "grad_norm": 0.2549890875816345, "learning_rate": 1e-05, "loss": 0.9608, "step": 64490 }, { "epoch": 57.12577502214349, "grad_norm": 0.23983615636825562, "learning_rate": 1e-05, "loss": 0.982, "step": 64495 }, { "epoch": 57.13020372010629, "grad_norm": 0.2442067563533783, "learning_rate": 1e-05, "loss": 0.9365, "step": 64500 }, { "epoch": 57.13463241806909, "grad_norm": 0.28639140725135803, "learning_rate": 1e-05, "loss": 0.9623, "step": 64505 }, { "epoch": 57.13906111603189, "grad_norm": 0.2667704224586487, "learning_rate": 1e-05, "loss": 1.0331, "step": 64510 }, { "epoch": 57.143489813994684, "grad_norm": 0.2432333528995514, "learning_rate": 1e-05, "loss": 0.9868, "step": 64515 }, { "epoch": 57.147918511957485, "grad_norm": 0.2359638810157776, "learning_rate": 1e-05, "loss": 0.9734, "step": 64520 }, { "epoch": 57.152347209920286, "grad_norm": 0.20179174840450287, "learning_rate": 1e-05, "loss": 0.9886, "step": 64525 }, { "epoch": 57.15677590788308, "grad_norm": 0.23003005981445312, "learning_rate": 1e-05, "loss": 0.915, "step": 64530 }, { "epoch": 57.16120460584588, "grad_norm": 0.2588357925415039, "learning_rate": 1e-05, "loss": 0.9835, "step": 64535 }, { "epoch": 57.16563330380868, "grad_norm": 0.2229629009962082, "learning_rate": 1e-05, "loss": 1.0034, "step": 64540 }, { "epoch": 57.17006200177148, "grad_norm": 0.2676277160644531, "learning_rate": 1e-05, "loss": 1.0041, "step": 64545 }, { "epoch": 57.17449069973428, "grad_norm": 0.23335818946361542, "learning_rate": 1e-05, "loss": 0.9515, "step": 64550 }, { "epoch": 57.17891939769708, "grad_norm": 0.25431424379348755, "learning_rate": 1e-05, "loss": 0.9223, "step": 64555 }, { "epoch": 57.18334809565987, "grad_norm": 0.2280757576227188, "learning_rate": 1e-05, "loss": 0.969, "step": 64560 }, { "epoch": 57.187776793622675, "grad_norm": 0.2610466778278351, "learning_rate": 1e-05, "loss": 0.943, "step": 64565 }, { "epoch": 57.192205491585476, "grad_norm": 0.22489522397518158, "learning_rate": 1e-05, "loss": 0.9563, "step": 64570 }, { "epoch": 57.19663418954827, "grad_norm": 0.22267022728919983, "learning_rate": 1e-05, "loss": 0.97, "step": 64575 }, { "epoch": 57.20106288751107, "grad_norm": 0.25446072220802307, "learning_rate": 1e-05, "loss": 1.0012, "step": 64580 }, { "epoch": 57.20549158547387, "grad_norm": 0.2352152019739151, "learning_rate": 1e-05, "loss": 0.9932, "step": 64585 }, { "epoch": 57.20992028343667, "grad_norm": 0.25363969802856445, "learning_rate": 1e-05, "loss": 0.9409, "step": 64590 }, { "epoch": 57.21434898139947, "grad_norm": 0.26549476385116577, "learning_rate": 1e-05, "loss": 0.9845, "step": 64595 }, { "epoch": 57.21877767936227, "grad_norm": 0.2486758977174759, "learning_rate": 1e-05, "loss": 1.0128, "step": 64600 }, { "epoch": 57.22320637732506, "grad_norm": 0.26905712485313416, "learning_rate": 1e-05, "loss": 0.9484, "step": 64605 }, { "epoch": 57.227635075287864, "grad_norm": 0.2641041576862335, "learning_rate": 1e-05, "loss": 0.9658, "step": 64610 }, { "epoch": 57.232063773250665, "grad_norm": 0.2319459766149521, "learning_rate": 1e-05, "loss": 0.9604, "step": 64615 }, { "epoch": 57.23649247121347, "grad_norm": 0.24442654848098755, "learning_rate": 1e-05, "loss": 0.9991, "step": 64620 }, { "epoch": 57.24092116917626, "grad_norm": 0.21868108212947845, "learning_rate": 1e-05, "loss": 0.9966, "step": 64625 }, { "epoch": 57.24534986713906, "grad_norm": 0.22921469807624817, "learning_rate": 1e-05, "loss": 0.9843, "step": 64630 }, { "epoch": 57.24977856510186, "grad_norm": 0.27202725410461426, "learning_rate": 1e-05, "loss": 1.0014, "step": 64635 }, { "epoch": 57.25420726306466, "grad_norm": 0.22790783643722534, "learning_rate": 1e-05, "loss": 0.98, "step": 64640 }, { "epoch": 57.25863596102746, "grad_norm": 0.22276154160499573, "learning_rate": 1e-05, "loss": 0.9835, "step": 64645 }, { "epoch": 57.26306465899026, "grad_norm": 0.24896281957626343, "learning_rate": 1e-05, "loss": 0.9975, "step": 64650 }, { "epoch": 57.267493356953054, "grad_norm": 0.2236364632844925, "learning_rate": 1e-05, "loss": 0.9492, "step": 64655 }, { "epoch": 57.271922054915855, "grad_norm": 0.22617529332637787, "learning_rate": 1e-05, "loss": 0.9242, "step": 64660 }, { "epoch": 57.276350752878656, "grad_norm": 0.24415220320224762, "learning_rate": 1e-05, "loss": 0.9461, "step": 64665 }, { "epoch": 57.28077945084145, "grad_norm": 0.23086804151535034, "learning_rate": 1e-05, "loss": 0.9538, "step": 64670 }, { "epoch": 57.28520814880425, "grad_norm": 0.27890124917030334, "learning_rate": 1e-05, "loss": 0.9995, "step": 64675 }, { "epoch": 57.28963684676705, "grad_norm": 0.26853156089782715, "learning_rate": 1e-05, "loss": 0.9657, "step": 64680 }, { "epoch": 57.29406554472985, "grad_norm": 0.23927192389965057, "learning_rate": 1e-05, "loss": 0.9585, "step": 64685 }, { "epoch": 57.29849424269265, "grad_norm": 0.21735401451587677, "learning_rate": 1e-05, "loss": 0.9558, "step": 64690 }, { "epoch": 57.30292294065545, "grad_norm": 0.25924548506736755, "learning_rate": 1e-05, "loss": 0.9569, "step": 64695 }, { "epoch": 57.30735163861824, "grad_norm": 0.24101030826568604, "learning_rate": 1e-05, "loss": 0.949, "step": 64700 }, { "epoch": 57.311780336581045, "grad_norm": 0.2158128321170807, "learning_rate": 1e-05, "loss": 0.9371, "step": 64705 }, { "epoch": 57.316209034543846, "grad_norm": 0.2520535886287689, "learning_rate": 1e-05, "loss": 0.9979, "step": 64710 }, { "epoch": 57.32063773250664, "grad_norm": 0.23017117381095886, "learning_rate": 1e-05, "loss": 0.9473, "step": 64715 }, { "epoch": 57.32506643046944, "grad_norm": 0.2008381485939026, "learning_rate": 1e-05, "loss": 1.0187, "step": 64720 }, { "epoch": 57.32949512843224, "grad_norm": 0.22195418179035187, "learning_rate": 1e-05, "loss": 0.9353, "step": 64725 }, { "epoch": 57.333923826395036, "grad_norm": 0.25837013125419617, "learning_rate": 1e-05, "loss": 0.999, "step": 64730 }, { "epoch": 57.33835252435784, "grad_norm": 0.20500344038009644, "learning_rate": 1e-05, "loss": 0.9553, "step": 64735 }, { "epoch": 57.34278122232064, "grad_norm": 0.2097352147102356, "learning_rate": 1e-05, "loss": 0.9794, "step": 64740 }, { "epoch": 57.34720992028344, "grad_norm": 0.20659856498241425, "learning_rate": 1e-05, "loss": 0.9814, "step": 64745 }, { "epoch": 57.351638618246234, "grad_norm": 0.23098255693912506, "learning_rate": 1e-05, "loss": 0.9897, "step": 64750 }, { "epoch": 57.356067316209035, "grad_norm": 0.2713516652584076, "learning_rate": 1e-05, "loss": 0.9336, "step": 64755 }, { "epoch": 57.36049601417184, "grad_norm": 0.2024286836385727, "learning_rate": 1e-05, "loss": 1.0205, "step": 64760 }, { "epoch": 57.36492471213463, "grad_norm": 0.24792177975177765, "learning_rate": 1e-05, "loss": 0.9767, "step": 64765 }, { "epoch": 57.36935341009743, "grad_norm": 0.21796000003814697, "learning_rate": 1e-05, "loss": 0.9802, "step": 64770 }, { "epoch": 57.37378210806023, "grad_norm": 0.2694271504878998, "learning_rate": 1e-05, "loss": 0.9486, "step": 64775 }, { "epoch": 57.37821080602303, "grad_norm": 0.22968241572380066, "learning_rate": 1e-05, "loss": 0.9907, "step": 64780 }, { "epoch": 57.38263950398583, "grad_norm": 0.19997096061706543, "learning_rate": 1e-05, "loss": 0.9664, "step": 64785 }, { "epoch": 57.38706820194863, "grad_norm": 0.23471786081790924, "learning_rate": 1e-05, "loss": 0.9449, "step": 64790 }, { "epoch": 57.391496899911424, "grad_norm": 0.20355014503002167, "learning_rate": 1e-05, "loss": 0.9737, "step": 64795 }, { "epoch": 57.395925597874225, "grad_norm": 0.22634902596473694, "learning_rate": 1e-05, "loss": 0.9263, "step": 64800 }, { "epoch": 57.400354295837026, "grad_norm": 0.20699720084667206, "learning_rate": 1e-05, "loss": 0.9913, "step": 64805 }, { "epoch": 57.40478299379982, "grad_norm": 0.21015790104866028, "learning_rate": 1e-05, "loss": 0.9808, "step": 64810 }, { "epoch": 57.40921169176262, "grad_norm": 0.23895959556102753, "learning_rate": 1e-05, "loss": 0.9753, "step": 64815 }, { "epoch": 57.41364038972542, "grad_norm": 0.2263335883617401, "learning_rate": 1e-05, "loss": 0.9251, "step": 64820 }, { "epoch": 57.41806908768822, "grad_norm": 0.23941682279109955, "learning_rate": 1e-05, "loss": 0.9975, "step": 64825 }, { "epoch": 57.42249778565102, "grad_norm": 0.21777811646461487, "learning_rate": 1e-05, "loss": 0.9921, "step": 64830 }, { "epoch": 57.42692648361382, "grad_norm": 0.205412358045578, "learning_rate": 1e-05, "loss": 0.9488, "step": 64835 }, { "epoch": 57.43135518157661, "grad_norm": 0.24491752684116364, "learning_rate": 1e-05, "loss": 0.9634, "step": 64840 }, { "epoch": 57.435783879539414, "grad_norm": 0.24434705078601837, "learning_rate": 1e-05, "loss": 0.9496, "step": 64845 }, { "epoch": 57.440212577502216, "grad_norm": 0.20678195357322693, "learning_rate": 1e-05, "loss": 0.9898, "step": 64850 }, { "epoch": 57.44464127546502, "grad_norm": 0.22470834851264954, "learning_rate": 1e-05, "loss": 0.9672, "step": 64855 }, { "epoch": 57.44906997342781, "grad_norm": 0.22402292490005493, "learning_rate": 1e-05, "loss": 0.9923, "step": 64860 }, { "epoch": 57.45349867139061, "grad_norm": 0.25211504101753235, "learning_rate": 1e-05, "loss": 0.9887, "step": 64865 }, { "epoch": 57.45792736935341, "grad_norm": 0.22374984622001648, "learning_rate": 1e-05, "loss": 0.9236, "step": 64870 }, { "epoch": 57.46235606731621, "grad_norm": 0.2464490383863449, "learning_rate": 1e-05, "loss": 1.0107, "step": 64875 }, { "epoch": 57.46678476527901, "grad_norm": 0.23377221822738647, "learning_rate": 1e-05, "loss": 0.9951, "step": 64880 }, { "epoch": 57.47121346324181, "grad_norm": 0.21537736058235168, "learning_rate": 1e-05, "loss": 1.0498, "step": 64885 }, { "epoch": 57.475642161204604, "grad_norm": 0.23741796612739563, "learning_rate": 1e-05, "loss": 1.02, "step": 64890 }, { "epoch": 57.480070859167405, "grad_norm": 0.21205613017082214, "learning_rate": 1e-05, "loss": 0.9663, "step": 64895 }, { "epoch": 57.484499557130206, "grad_norm": 0.2406456619501114, "learning_rate": 1e-05, "loss": 0.9877, "step": 64900 }, { "epoch": 57.488928255093, "grad_norm": 0.24922002851963043, "learning_rate": 1e-05, "loss": 1.0193, "step": 64905 }, { "epoch": 57.4933569530558, "grad_norm": 0.22549712657928467, "learning_rate": 1e-05, "loss": 0.9384, "step": 64910 }, { "epoch": 57.4977856510186, "grad_norm": 0.24909362196922302, "learning_rate": 1e-05, "loss": 0.955, "step": 64915 }, { "epoch": 57.5022143489814, "grad_norm": 0.3197036385536194, "learning_rate": 1e-05, "loss": 0.9604, "step": 64920 }, { "epoch": 57.5066430469442, "grad_norm": 0.2692038118839264, "learning_rate": 1e-05, "loss": 0.9908, "step": 64925 }, { "epoch": 57.511071744907, "grad_norm": 0.25321099162101746, "learning_rate": 1e-05, "loss": 0.9938, "step": 64930 }, { "epoch": 57.515500442869794, "grad_norm": 0.22069984674453735, "learning_rate": 1e-05, "loss": 0.947, "step": 64935 }, { "epoch": 57.519929140832595, "grad_norm": 0.23525096476078033, "learning_rate": 1e-05, "loss": 0.9504, "step": 64940 }, { "epoch": 57.524357838795396, "grad_norm": 0.2202586978673935, "learning_rate": 1e-05, "loss": 0.9147, "step": 64945 }, { "epoch": 57.52878653675819, "grad_norm": 0.2157680094242096, "learning_rate": 1e-05, "loss": 0.9952, "step": 64950 }, { "epoch": 57.53321523472099, "grad_norm": 0.3081792891025543, "learning_rate": 1e-05, "loss": 0.9546, "step": 64955 }, { "epoch": 57.53764393268379, "grad_norm": 0.2438458502292633, "learning_rate": 1e-05, "loss": 0.9372, "step": 64960 }, { "epoch": 57.54207263064659, "grad_norm": 0.23848648369312286, "learning_rate": 1e-05, "loss": 0.9641, "step": 64965 }, { "epoch": 57.54650132860939, "grad_norm": 0.24289342761039734, "learning_rate": 1e-05, "loss": 0.9821, "step": 64970 }, { "epoch": 57.55093002657219, "grad_norm": 0.21775168180465698, "learning_rate": 1e-05, "loss": 0.9587, "step": 64975 }, { "epoch": 57.55535872453498, "grad_norm": 0.25212976336479187, "learning_rate": 1e-05, "loss": 0.9377, "step": 64980 }, { "epoch": 57.559787422497784, "grad_norm": 0.23616497218608856, "learning_rate": 1e-05, "loss": 0.9558, "step": 64985 }, { "epoch": 57.564216120460586, "grad_norm": 0.24934092164039612, "learning_rate": 1e-05, "loss": 0.9941, "step": 64990 }, { "epoch": 57.56864481842339, "grad_norm": 0.23486179113388062, "learning_rate": 1e-05, "loss": 1.0593, "step": 64995 }, { "epoch": 57.57307351638618, "grad_norm": 0.21842266619205475, "learning_rate": 1e-05, "loss": 0.9819, "step": 65000 }, { "epoch": 57.57750221434898, "grad_norm": 0.2404446005821228, "learning_rate": 1e-05, "loss": 0.9734, "step": 65005 }, { "epoch": 57.58193091231178, "grad_norm": 0.2785521149635315, "learning_rate": 1e-05, "loss": 0.9738, "step": 65010 }, { "epoch": 57.58635961027458, "grad_norm": 0.270435631275177, "learning_rate": 1e-05, "loss": 1.0223, "step": 65015 }, { "epoch": 57.59078830823738, "grad_norm": 0.29943397641181946, "learning_rate": 1e-05, "loss": 0.943, "step": 65020 }, { "epoch": 57.59521700620018, "grad_norm": 0.23091958463191986, "learning_rate": 1e-05, "loss": 0.9632, "step": 65025 }, { "epoch": 57.599645704162974, "grad_norm": 0.2477135956287384, "learning_rate": 1e-05, "loss": 0.9528, "step": 65030 }, { "epoch": 57.604074402125775, "grad_norm": 0.2208290547132492, "learning_rate": 1e-05, "loss": 1.0232, "step": 65035 }, { "epoch": 57.608503100088576, "grad_norm": 0.2288370430469513, "learning_rate": 1e-05, "loss": 0.9299, "step": 65040 }, { "epoch": 57.61293179805137, "grad_norm": 0.22634239494800568, "learning_rate": 1e-05, "loss": 0.9821, "step": 65045 }, { "epoch": 57.61736049601417, "grad_norm": 0.25695568323135376, "learning_rate": 1e-05, "loss": 0.9798, "step": 65050 }, { "epoch": 57.62178919397697, "grad_norm": 0.1895153820514679, "learning_rate": 1e-05, "loss": 1.0176, "step": 65055 }, { "epoch": 57.62621789193977, "grad_norm": 0.24530749022960663, "learning_rate": 1e-05, "loss": 0.9504, "step": 65060 }, { "epoch": 57.63064658990257, "grad_norm": 0.2582763731479645, "learning_rate": 1e-05, "loss": 0.944, "step": 65065 }, { "epoch": 57.63507528786537, "grad_norm": 0.2380317747592926, "learning_rate": 1e-05, "loss": 0.9735, "step": 65070 }, { "epoch": 57.63950398582816, "grad_norm": 0.19777148962020874, "learning_rate": 1e-05, "loss": 0.9835, "step": 65075 }, { "epoch": 57.643932683790965, "grad_norm": 0.31268760561943054, "learning_rate": 1e-05, "loss": 0.9741, "step": 65080 }, { "epoch": 57.648361381753766, "grad_norm": 0.2274935096502304, "learning_rate": 1e-05, "loss": 0.9701, "step": 65085 }, { "epoch": 57.65279007971656, "grad_norm": 0.22767941653728485, "learning_rate": 1e-05, "loss": 0.9791, "step": 65090 }, { "epoch": 57.65721877767936, "grad_norm": 0.2537647485733032, "learning_rate": 1e-05, "loss": 0.9872, "step": 65095 }, { "epoch": 57.66164747564216, "grad_norm": 0.27609172463417053, "learning_rate": 1e-05, "loss": 1.0065, "step": 65100 }, { "epoch": 57.666076173604964, "grad_norm": 0.2544642388820648, "learning_rate": 1e-05, "loss": 0.9733, "step": 65105 }, { "epoch": 57.67050487156776, "grad_norm": 0.2952786386013031, "learning_rate": 1e-05, "loss": 0.9811, "step": 65110 }, { "epoch": 57.67493356953056, "grad_norm": 0.22622506320476532, "learning_rate": 1e-05, "loss": 1.029, "step": 65115 }, { "epoch": 57.67936226749336, "grad_norm": 0.22703221440315247, "learning_rate": 1e-05, "loss": 0.9677, "step": 65120 }, { "epoch": 57.683790965456154, "grad_norm": 0.2238233983516693, "learning_rate": 1e-05, "loss": 0.976, "step": 65125 }, { "epoch": 57.688219663418955, "grad_norm": 0.24899868667125702, "learning_rate": 1e-05, "loss": 0.9751, "step": 65130 }, { "epoch": 57.69264836138176, "grad_norm": 0.25768086314201355, "learning_rate": 1e-05, "loss": 0.9578, "step": 65135 }, { "epoch": 57.69707705934455, "grad_norm": 0.2340216338634491, "learning_rate": 1e-05, "loss": 0.9406, "step": 65140 }, { "epoch": 57.70150575730735, "grad_norm": 0.26033055782318115, "learning_rate": 1e-05, "loss": 0.9476, "step": 65145 }, { "epoch": 57.70593445527015, "grad_norm": 0.23662853240966797, "learning_rate": 1e-05, "loss": 0.9573, "step": 65150 }, { "epoch": 57.71036315323295, "grad_norm": 0.27634745836257935, "learning_rate": 1e-05, "loss": 0.9363, "step": 65155 }, { "epoch": 57.71479185119575, "grad_norm": 0.24156300723552704, "learning_rate": 1e-05, "loss": 0.9591, "step": 65160 }, { "epoch": 57.71922054915855, "grad_norm": 0.20061972737312317, "learning_rate": 1e-05, "loss": 0.9927, "step": 65165 }, { "epoch": 57.723649247121344, "grad_norm": 0.24070684611797333, "learning_rate": 1e-05, "loss": 0.9609, "step": 65170 }, { "epoch": 57.728077945084145, "grad_norm": 0.2417374700307846, "learning_rate": 1e-05, "loss": 0.9741, "step": 65175 }, { "epoch": 57.732506643046946, "grad_norm": 0.2408277839422226, "learning_rate": 1e-05, "loss": 0.9362, "step": 65180 }, { "epoch": 57.73693534100974, "grad_norm": 0.2178143411874771, "learning_rate": 1e-05, "loss": 1.0029, "step": 65185 }, { "epoch": 57.74136403897254, "grad_norm": 0.2403598129749298, "learning_rate": 1e-05, "loss": 0.9986, "step": 65190 }, { "epoch": 57.74579273693534, "grad_norm": 0.26669999957084656, "learning_rate": 1e-05, "loss": 0.9559, "step": 65195 }, { "epoch": 57.75022143489814, "grad_norm": 0.2620389461517334, "learning_rate": 1e-05, "loss": 0.9188, "step": 65200 }, { "epoch": 57.75465013286094, "grad_norm": 0.23346661031246185, "learning_rate": 1e-05, "loss": 0.9543, "step": 65205 }, { "epoch": 57.75907883082374, "grad_norm": 0.21242472529411316, "learning_rate": 1e-05, "loss": 0.9304, "step": 65210 }, { "epoch": 57.76350752878653, "grad_norm": 0.25294920802116394, "learning_rate": 1e-05, "loss": 0.9816, "step": 65215 }, { "epoch": 57.767936226749335, "grad_norm": 0.23865795135498047, "learning_rate": 1e-05, "loss": 0.9766, "step": 65220 }, { "epoch": 57.772364924712136, "grad_norm": 0.2392657846212387, "learning_rate": 1e-05, "loss": 0.9264, "step": 65225 }, { "epoch": 57.77679362267494, "grad_norm": 0.2450830340385437, "learning_rate": 1e-05, "loss": 0.9632, "step": 65230 }, { "epoch": 57.78122232063773, "grad_norm": 0.2542381286621094, "learning_rate": 1e-05, "loss": 1.0038, "step": 65235 }, { "epoch": 57.78565101860053, "grad_norm": 0.21462008357048035, "learning_rate": 1e-05, "loss": 0.9572, "step": 65240 }, { "epoch": 57.79007971656333, "grad_norm": 0.24379025399684906, "learning_rate": 1e-05, "loss": 0.9882, "step": 65245 }, { "epoch": 57.79450841452613, "grad_norm": 0.25722989439964294, "learning_rate": 1e-05, "loss": 0.9764, "step": 65250 }, { "epoch": 57.79893711248893, "grad_norm": 0.2545693516731262, "learning_rate": 1e-05, "loss": 0.9565, "step": 65255 }, { "epoch": 57.80336581045173, "grad_norm": 0.22995683550834656, "learning_rate": 1e-05, "loss": 0.9703, "step": 65260 }, { "epoch": 57.807794508414524, "grad_norm": 0.29748353362083435, "learning_rate": 1e-05, "loss": 0.9554, "step": 65265 }, { "epoch": 57.812223206377325, "grad_norm": 0.24034743010997772, "learning_rate": 1e-05, "loss": 0.9917, "step": 65270 }, { "epoch": 57.81665190434013, "grad_norm": 0.23386414349079132, "learning_rate": 1e-05, "loss": 0.9749, "step": 65275 }, { "epoch": 57.82108060230292, "grad_norm": 0.2299608737230301, "learning_rate": 1e-05, "loss": 0.9517, "step": 65280 }, { "epoch": 57.82550930026572, "grad_norm": 0.23577432334423065, "learning_rate": 1e-05, "loss": 0.9686, "step": 65285 }, { "epoch": 57.82993799822852, "grad_norm": 0.2799549996852875, "learning_rate": 1e-05, "loss": 1.0059, "step": 65290 }, { "epoch": 57.83436669619132, "grad_norm": 0.22159375250339508, "learning_rate": 1e-05, "loss": 0.9429, "step": 65295 }, { "epoch": 57.83879539415412, "grad_norm": 0.2742268443107605, "learning_rate": 1e-05, "loss": 0.9443, "step": 65300 }, { "epoch": 57.84322409211692, "grad_norm": 0.2504993975162506, "learning_rate": 1e-05, "loss": 0.9155, "step": 65305 }, { "epoch": 57.847652790079714, "grad_norm": 0.2482503354549408, "learning_rate": 1e-05, "loss": 1.0059, "step": 65310 }, { "epoch": 57.852081488042515, "grad_norm": 0.21859294176101685, "learning_rate": 1e-05, "loss": 0.9706, "step": 65315 }, { "epoch": 57.856510186005316, "grad_norm": 0.23677146434783936, "learning_rate": 1e-05, "loss": 0.9796, "step": 65320 }, { "epoch": 57.86093888396811, "grad_norm": 0.25044289231300354, "learning_rate": 1e-05, "loss": 0.9918, "step": 65325 }, { "epoch": 57.86536758193091, "grad_norm": 0.1946915239095688, "learning_rate": 1e-05, "loss": 0.9677, "step": 65330 }, { "epoch": 57.86979627989371, "grad_norm": 0.20747967064380646, "learning_rate": 1e-05, "loss": 0.9835, "step": 65335 }, { "epoch": 57.87422497785651, "grad_norm": 0.25214090943336487, "learning_rate": 1e-05, "loss": 1.0029, "step": 65340 }, { "epoch": 57.87865367581931, "grad_norm": 0.23532888293266296, "learning_rate": 1e-05, "loss": 0.9744, "step": 65345 }, { "epoch": 57.88308237378211, "grad_norm": 0.25461849570274353, "learning_rate": 1e-05, "loss": 1.0009, "step": 65350 }, { "epoch": 57.88751107174491, "grad_norm": 0.25920936465263367, "learning_rate": 1e-05, "loss": 1.0195, "step": 65355 }, { "epoch": 57.891939769707704, "grad_norm": 0.21980534493923187, "learning_rate": 1e-05, "loss": 0.9529, "step": 65360 }, { "epoch": 57.896368467670506, "grad_norm": 0.286761999130249, "learning_rate": 1e-05, "loss": 0.9614, "step": 65365 }, { "epoch": 57.90079716563331, "grad_norm": 0.2770629823207855, "learning_rate": 1e-05, "loss": 0.9548, "step": 65370 }, { "epoch": 57.9052258635961, "grad_norm": 0.2169332653284073, "learning_rate": 1e-05, "loss": 0.9365, "step": 65375 }, { "epoch": 57.9096545615589, "grad_norm": 0.2575649321079254, "learning_rate": 1e-05, "loss": 1.0107, "step": 65380 }, { "epoch": 57.9140832595217, "grad_norm": 0.2296285629272461, "learning_rate": 1e-05, "loss": 0.9255, "step": 65385 }, { "epoch": 57.9185119574845, "grad_norm": 0.2089402675628662, "learning_rate": 1e-05, "loss": 1.0392, "step": 65390 }, { "epoch": 57.9229406554473, "grad_norm": 0.2604351043701172, "learning_rate": 1e-05, "loss": 0.974, "step": 65395 }, { "epoch": 57.9273693534101, "grad_norm": 0.26109015941619873, "learning_rate": 1e-05, "loss": 0.9798, "step": 65400 }, { "epoch": 57.931798051372894, "grad_norm": 0.268934041261673, "learning_rate": 1e-05, "loss": 0.9695, "step": 65405 }, { "epoch": 57.936226749335695, "grad_norm": 0.22601456940174103, "learning_rate": 1e-05, "loss": 0.992, "step": 65410 }, { "epoch": 57.9406554472985, "grad_norm": 0.2878456115722656, "learning_rate": 1e-05, "loss": 0.9835, "step": 65415 }, { "epoch": 57.94508414526129, "grad_norm": 0.22090114653110504, "learning_rate": 1e-05, "loss": 0.9312, "step": 65420 }, { "epoch": 57.94951284322409, "grad_norm": 0.2888115346431732, "learning_rate": 1e-05, "loss": 0.9759, "step": 65425 }, { "epoch": 57.95394154118689, "grad_norm": 0.39820554852485657, "learning_rate": 1e-05, "loss": 1.0051, "step": 65430 }, { "epoch": 57.95837023914969, "grad_norm": 0.24735014140605927, "learning_rate": 1e-05, "loss": 0.9514, "step": 65435 }, { "epoch": 57.96279893711249, "grad_norm": 0.21726997196674347, "learning_rate": 1e-05, "loss": 1.0121, "step": 65440 }, { "epoch": 57.96722763507529, "grad_norm": 0.23674222826957703, "learning_rate": 1e-05, "loss": 0.9387, "step": 65445 }, { "epoch": 57.971656333038084, "grad_norm": 0.23199085891246796, "learning_rate": 1e-05, "loss": 0.9683, "step": 65450 }, { "epoch": 57.976085031000885, "grad_norm": 0.23483982682228088, "learning_rate": 1e-05, "loss": 0.9533, "step": 65455 }, { "epoch": 57.980513728963686, "grad_norm": 0.23024198412895203, "learning_rate": 1e-05, "loss": 0.9631, "step": 65460 }, { "epoch": 57.98494242692648, "grad_norm": 0.22566211223602295, "learning_rate": 1e-05, "loss": 0.9237, "step": 65465 }, { "epoch": 57.98937112488928, "grad_norm": 0.2305525541305542, "learning_rate": 1e-05, "loss": 0.9576, "step": 65470 }, { "epoch": 57.99379982285208, "grad_norm": 0.2151440531015396, "learning_rate": 1e-05, "loss": 0.9877, "step": 65475 }, { "epoch": 57.998228520814884, "grad_norm": 0.21990051865577698, "learning_rate": 1e-05, "loss": 0.9633, "step": 65480 }, { "epoch": 58.00265721877768, "grad_norm": 0.3026951551437378, "learning_rate": 1e-05, "loss": 0.9858, "step": 65485 }, { "epoch": 58.00708591674048, "grad_norm": 0.1917659491300583, "learning_rate": 1e-05, "loss": 0.9635, "step": 65490 }, { "epoch": 58.01151461470328, "grad_norm": 0.23054540157318115, "learning_rate": 1e-05, "loss": 1.0412, "step": 65495 }, { "epoch": 58.015943312666074, "grad_norm": 0.20408916473388672, "learning_rate": 1e-05, "loss": 0.9824, "step": 65500 }, { "epoch": 58.020372010628876, "grad_norm": 0.23431792855262756, "learning_rate": 1e-05, "loss": 0.9546, "step": 65505 }, { "epoch": 58.02480070859168, "grad_norm": 0.2742765247821808, "learning_rate": 1e-05, "loss": 0.9761, "step": 65510 }, { "epoch": 58.02922940655447, "grad_norm": 0.22317524254322052, "learning_rate": 1e-05, "loss": 0.9488, "step": 65515 }, { "epoch": 58.03365810451727, "grad_norm": 0.2639908492565155, "learning_rate": 1e-05, "loss": 0.9618, "step": 65520 }, { "epoch": 58.03808680248007, "grad_norm": 0.22771699726581573, "learning_rate": 1e-05, "loss": 0.932, "step": 65525 }, { "epoch": 58.04251550044287, "grad_norm": 0.2904604375362396, "learning_rate": 1e-05, "loss": 0.9716, "step": 65530 }, { "epoch": 58.04694419840567, "grad_norm": 0.25606799125671387, "learning_rate": 1e-05, "loss": 0.9212, "step": 65535 }, { "epoch": 58.05137289636847, "grad_norm": 0.22992970049381256, "learning_rate": 1e-05, "loss": 0.9752, "step": 65540 }, { "epoch": 58.055801594331264, "grad_norm": 0.26750385761260986, "learning_rate": 1e-05, "loss": 0.9751, "step": 65545 }, { "epoch": 58.060230292294065, "grad_norm": 0.23747022449970245, "learning_rate": 1e-05, "loss": 0.9836, "step": 65550 }, { "epoch": 58.064658990256866, "grad_norm": 0.24407124519348145, "learning_rate": 1e-05, "loss": 0.9652, "step": 65555 }, { "epoch": 58.06908768821966, "grad_norm": 0.26146432757377625, "learning_rate": 1e-05, "loss": 0.9994, "step": 65560 }, { "epoch": 58.07351638618246, "grad_norm": 0.2496596872806549, "learning_rate": 1e-05, "loss": 0.9343, "step": 65565 }, { "epoch": 58.07794508414526, "grad_norm": 0.22963643074035645, "learning_rate": 1e-05, "loss": 0.9681, "step": 65570 }, { "epoch": 58.08237378210806, "grad_norm": 0.24085082113742828, "learning_rate": 1e-05, "loss": 0.9493, "step": 65575 }, { "epoch": 58.08680248007086, "grad_norm": 0.2758622169494629, "learning_rate": 1e-05, "loss": 0.9232, "step": 65580 }, { "epoch": 58.09123117803366, "grad_norm": 0.24488963186740875, "learning_rate": 1e-05, "loss": 1.0118, "step": 65585 }, { "epoch": 58.09565987599645, "grad_norm": 0.242008775472641, "learning_rate": 1e-05, "loss": 0.9819, "step": 65590 }, { "epoch": 58.100088573959255, "grad_norm": 0.23270395398139954, "learning_rate": 1e-05, "loss": 0.9783, "step": 65595 }, { "epoch": 58.104517271922056, "grad_norm": 0.27526354789733887, "learning_rate": 1e-05, "loss": 0.9371, "step": 65600 }, { "epoch": 58.10894596988486, "grad_norm": 0.20777063071727753, "learning_rate": 1e-05, "loss": 0.9826, "step": 65605 }, { "epoch": 58.11337466784765, "grad_norm": 0.2342921942472458, "learning_rate": 1e-05, "loss": 0.9744, "step": 65610 }, { "epoch": 58.11780336581045, "grad_norm": 0.27047473192214966, "learning_rate": 1e-05, "loss": 0.928, "step": 65615 }, { "epoch": 58.122232063773254, "grad_norm": 0.25943392515182495, "learning_rate": 1e-05, "loss": 0.9447, "step": 65620 }, { "epoch": 58.12666076173605, "grad_norm": 0.25543007254600525, "learning_rate": 1e-05, "loss": 1.0124, "step": 65625 }, { "epoch": 58.13108945969885, "grad_norm": 0.2618057131767273, "learning_rate": 1e-05, "loss": 0.9773, "step": 65630 }, { "epoch": 58.13551815766165, "grad_norm": 0.21959581971168518, "learning_rate": 1e-05, "loss": 0.9313, "step": 65635 }, { "epoch": 58.139946855624444, "grad_norm": 0.21703411638736725, "learning_rate": 1e-05, "loss": 0.9288, "step": 65640 }, { "epoch": 58.144375553587246, "grad_norm": 0.2822389602661133, "learning_rate": 1e-05, "loss": 1.0094, "step": 65645 }, { "epoch": 58.14880425155005, "grad_norm": 0.23886580765247345, "learning_rate": 1e-05, "loss": 0.9751, "step": 65650 }, { "epoch": 58.15323294951284, "grad_norm": 0.28538328409194946, "learning_rate": 1e-05, "loss": 0.9912, "step": 65655 }, { "epoch": 58.15766164747564, "grad_norm": 0.22715330123901367, "learning_rate": 1e-05, "loss": 0.9513, "step": 65660 }, { "epoch": 58.16209034543844, "grad_norm": 0.22549784183502197, "learning_rate": 1e-05, "loss": 0.9381, "step": 65665 }, { "epoch": 58.16651904340124, "grad_norm": 0.22157911956310272, "learning_rate": 1e-05, "loss": 0.935, "step": 65670 }, { "epoch": 58.17094774136404, "grad_norm": 0.21394436061382294, "learning_rate": 1e-05, "loss": 0.9335, "step": 65675 }, { "epoch": 58.17537643932684, "grad_norm": 0.2460552453994751, "learning_rate": 1e-05, "loss": 0.8801, "step": 65680 }, { "epoch": 58.179805137289634, "grad_norm": 0.2419407218694687, "learning_rate": 1e-05, "loss": 0.921, "step": 65685 }, { "epoch": 58.184233835252435, "grad_norm": 0.2834322154521942, "learning_rate": 1e-05, "loss": 0.9513, "step": 65690 }, { "epoch": 58.188662533215236, "grad_norm": 0.23645217716693878, "learning_rate": 1e-05, "loss": 0.9975, "step": 65695 }, { "epoch": 58.19309123117803, "grad_norm": 0.22626784443855286, "learning_rate": 1e-05, "loss": 0.9527, "step": 65700 }, { "epoch": 58.19751992914083, "grad_norm": 0.2656615376472473, "learning_rate": 1e-05, "loss": 1.0159, "step": 65705 }, { "epoch": 58.20194862710363, "grad_norm": 0.2266595959663391, "learning_rate": 1e-05, "loss": 0.9833, "step": 65710 }, { "epoch": 58.20637732506643, "grad_norm": 0.24668626487255096, "learning_rate": 1e-05, "loss": 0.933, "step": 65715 }, { "epoch": 58.21080602302923, "grad_norm": 0.2371007353067398, "learning_rate": 1e-05, "loss": 1.0198, "step": 65720 }, { "epoch": 58.21523472099203, "grad_norm": 0.2515372931957245, "learning_rate": 1e-05, "loss": 0.8912, "step": 65725 }, { "epoch": 58.21966341895483, "grad_norm": 0.21236766874790192, "learning_rate": 1e-05, "loss": 0.9465, "step": 65730 }, { "epoch": 58.224092116917625, "grad_norm": 0.23803332448005676, "learning_rate": 1e-05, "loss": 1.0373, "step": 65735 }, { "epoch": 58.228520814880426, "grad_norm": 0.2148313969373703, "learning_rate": 1e-05, "loss": 0.941, "step": 65740 }, { "epoch": 58.23294951284323, "grad_norm": 0.2393161654472351, "learning_rate": 1e-05, "loss": 0.9529, "step": 65745 }, { "epoch": 58.23737821080602, "grad_norm": 0.24256375432014465, "learning_rate": 1e-05, "loss": 0.9378, "step": 65750 }, { "epoch": 58.24180690876882, "grad_norm": 0.21639619767665863, "learning_rate": 1e-05, "loss": 1.0088, "step": 65755 }, { "epoch": 58.246235606731624, "grad_norm": 0.2605295479297638, "learning_rate": 1e-05, "loss": 0.9652, "step": 65760 }, { "epoch": 58.25066430469442, "grad_norm": 0.2850554287433624, "learning_rate": 1e-05, "loss": 1.0016, "step": 65765 }, { "epoch": 58.25509300265722, "grad_norm": 0.2696142792701721, "learning_rate": 1e-05, "loss": 0.9873, "step": 65770 }, { "epoch": 58.25952170062002, "grad_norm": 0.21933311223983765, "learning_rate": 1e-05, "loss": 0.978, "step": 65775 }, { "epoch": 58.263950398582814, "grad_norm": 0.2362578958272934, "learning_rate": 1e-05, "loss": 1.0037, "step": 65780 }, { "epoch": 58.268379096545615, "grad_norm": 0.2514587342739105, "learning_rate": 1e-05, "loss": 0.9765, "step": 65785 }, { "epoch": 58.27280779450842, "grad_norm": 0.25394684076309204, "learning_rate": 1e-05, "loss": 1.0049, "step": 65790 }, { "epoch": 58.27723649247121, "grad_norm": 0.26241201162338257, "learning_rate": 1e-05, "loss": 1.045, "step": 65795 }, { "epoch": 58.28166519043401, "grad_norm": 0.2563392221927643, "learning_rate": 1e-05, "loss": 0.9781, "step": 65800 }, { "epoch": 58.28609388839681, "grad_norm": 0.26506853103637695, "learning_rate": 1e-05, "loss": 0.9575, "step": 65805 }, { "epoch": 58.29052258635961, "grad_norm": 0.27237415313720703, "learning_rate": 1e-05, "loss": 0.9907, "step": 65810 }, { "epoch": 58.29495128432241, "grad_norm": 0.27189236879348755, "learning_rate": 1e-05, "loss": 1.0135, "step": 65815 }, { "epoch": 58.29937998228521, "grad_norm": 0.25804075598716736, "learning_rate": 1e-05, "loss": 0.9493, "step": 65820 }, { "epoch": 58.303808680248004, "grad_norm": 0.23183204233646393, "learning_rate": 1e-05, "loss": 0.9518, "step": 65825 }, { "epoch": 58.308237378210805, "grad_norm": 0.20005829632282257, "learning_rate": 1e-05, "loss": 0.9511, "step": 65830 }, { "epoch": 58.312666076173606, "grad_norm": 0.24902881681919098, "learning_rate": 1e-05, "loss": 0.9419, "step": 65835 }, { "epoch": 58.31709477413641, "grad_norm": 0.2612319588661194, "learning_rate": 1e-05, "loss": 1.0145, "step": 65840 }, { "epoch": 58.3215234720992, "grad_norm": 0.252181738615036, "learning_rate": 1e-05, "loss": 1.0049, "step": 65845 }, { "epoch": 58.325952170062, "grad_norm": 0.23472100496292114, "learning_rate": 1e-05, "loss": 1.0005, "step": 65850 }, { "epoch": 58.330380868024804, "grad_norm": 0.2129027545452118, "learning_rate": 1e-05, "loss": 0.9358, "step": 65855 }, { "epoch": 58.3348095659876, "grad_norm": 0.2990647554397583, "learning_rate": 1e-05, "loss": 0.9741, "step": 65860 }, { "epoch": 58.3392382639504, "grad_norm": 0.24078285694122314, "learning_rate": 1e-05, "loss": 0.962, "step": 65865 }, { "epoch": 58.3436669619132, "grad_norm": 0.23519694805145264, "learning_rate": 1e-05, "loss": 1.0108, "step": 65870 }, { "epoch": 58.348095659875995, "grad_norm": 0.22987228631973267, "learning_rate": 1e-05, "loss": 0.9726, "step": 65875 }, { "epoch": 58.352524357838796, "grad_norm": 0.22575676441192627, "learning_rate": 1e-05, "loss": 0.9994, "step": 65880 }, { "epoch": 58.3569530558016, "grad_norm": 0.23451492190361023, "learning_rate": 1e-05, "loss": 0.8912, "step": 65885 }, { "epoch": 58.36138175376439, "grad_norm": 0.2505142390727997, "learning_rate": 1e-05, "loss": 0.9567, "step": 65890 }, { "epoch": 58.36581045172719, "grad_norm": 0.19898705184459686, "learning_rate": 1e-05, "loss": 0.9963, "step": 65895 }, { "epoch": 58.37023914968999, "grad_norm": 0.21096228063106537, "learning_rate": 1e-05, "loss": 0.9495, "step": 65900 }, { "epoch": 58.37466784765279, "grad_norm": 0.21735480427742004, "learning_rate": 1e-05, "loss": 1.0126, "step": 65905 }, { "epoch": 58.37909654561559, "grad_norm": 0.23830744624137878, "learning_rate": 1e-05, "loss": 1.0127, "step": 65910 }, { "epoch": 58.38352524357839, "grad_norm": 0.25660133361816406, "learning_rate": 1e-05, "loss": 1.0189, "step": 65915 }, { "epoch": 58.387953941541184, "grad_norm": 0.24429598450660706, "learning_rate": 1e-05, "loss": 0.9523, "step": 65920 }, { "epoch": 58.392382639503985, "grad_norm": 0.2170325517654419, "learning_rate": 1e-05, "loss": 0.9927, "step": 65925 }, { "epoch": 58.39681133746679, "grad_norm": 0.23029619455337524, "learning_rate": 1e-05, "loss": 0.991, "step": 65930 }, { "epoch": 58.40124003542958, "grad_norm": 0.24783751368522644, "learning_rate": 1e-05, "loss": 0.9726, "step": 65935 }, { "epoch": 58.40566873339238, "grad_norm": 0.30578845739364624, "learning_rate": 1e-05, "loss": 0.9772, "step": 65940 }, { "epoch": 58.41009743135518, "grad_norm": 0.2600264549255371, "learning_rate": 1e-05, "loss": 0.9539, "step": 65945 }, { "epoch": 58.41452612931798, "grad_norm": 0.21792787313461304, "learning_rate": 1e-05, "loss": 0.9959, "step": 65950 }, { "epoch": 58.41895482728078, "grad_norm": 0.23134055733680725, "learning_rate": 1e-05, "loss": 0.9835, "step": 65955 }, { "epoch": 58.42338352524358, "grad_norm": 0.20161613821983337, "learning_rate": 1e-05, "loss": 0.9302, "step": 65960 }, { "epoch": 58.42781222320638, "grad_norm": 0.21276476979255676, "learning_rate": 1e-05, "loss": 1.01, "step": 65965 }, { "epoch": 58.432240921169175, "grad_norm": 0.23096302151679993, "learning_rate": 1e-05, "loss": 0.9851, "step": 65970 }, { "epoch": 58.436669619131976, "grad_norm": 0.2491220086812973, "learning_rate": 1e-05, "loss": 0.9658, "step": 65975 }, { "epoch": 58.44109831709478, "grad_norm": 0.23723995685577393, "learning_rate": 1e-05, "loss": 0.9488, "step": 65980 }, { "epoch": 58.44552701505757, "grad_norm": 0.24288012087345123, "learning_rate": 1e-05, "loss": 0.9989, "step": 65985 }, { "epoch": 58.44995571302037, "grad_norm": 0.2346174716949463, "learning_rate": 1e-05, "loss": 0.9739, "step": 65990 }, { "epoch": 58.454384410983174, "grad_norm": 0.21272841095924377, "learning_rate": 1e-05, "loss": 0.9636, "step": 65995 }, { "epoch": 58.45881310894597, "grad_norm": 0.2549314498901367, "learning_rate": 1e-05, "loss": 0.996, "step": 66000 }, { "epoch": 58.46324180690877, "grad_norm": 0.3597918152809143, "learning_rate": 1e-05, "loss": 1.0121, "step": 66005 }, { "epoch": 58.46767050487157, "grad_norm": 0.22955277562141418, "learning_rate": 1e-05, "loss": 0.9784, "step": 66010 }, { "epoch": 58.472099202834364, "grad_norm": 0.2038521021604538, "learning_rate": 1e-05, "loss": 1.0093, "step": 66015 }, { "epoch": 58.476527900797166, "grad_norm": 0.20900289714336395, "learning_rate": 1e-05, "loss": 0.9922, "step": 66020 }, { "epoch": 58.48095659875997, "grad_norm": 0.19945380091667175, "learning_rate": 1e-05, "loss": 0.9146, "step": 66025 }, { "epoch": 58.48538529672276, "grad_norm": 0.21381662786006927, "learning_rate": 1e-05, "loss": 0.9512, "step": 66030 }, { "epoch": 58.48981399468556, "grad_norm": 0.24177663028240204, "learning_rate": 1e-05, "loss": 0.9581, "step": 66035 }, { "epoch": 58.49424269264836, "grad_norm": 0.2254743129014969, "learning_rate": 1e-05, "loss": 0.9679, "step": 66040 }, { "epoch": 58.49867139061116, "grad_norm": 0.24336585402488708, "learning_rate": 1e-05, "loss": 0.963, "step": 66045 }, { "epoch": 58.50310008857396, "grad_norm": 0.2734587788581848, "learning_rate": 1e-05, "loss": 1.0033, "step": 66050 }, { "epoch": 58.50752878653676, "grad_norm": 0.21274012327194214, "learning_rate": 1e-05, "loss": 0.9587, "step": 66055 }, { "epoch": 58.511957484499554, "grad_norm": 0.21428635716438293, "learning_rate": 1e-05, "loss": 0.9795, "step": 66060 }, { "epoch": 58.516386182462355, "grad_norm": 0.2442777454853058, "learning_rate": 1e-05, "loss": 0.9636, "step": 66065 }, { "epoch": 58.520814880425156, "grad_norm": 0.22740158438682556, "learning_rate": 1e-05, "loss": 0.9627, "step": 66070 }, { "epoch": 58.52524357838795, "grad_norm": 0.24372412264347076, "learning_rate": 1e-05, "loss": 1.0162, "step": 66075 }, { "epoch": 58.52967227635075, "grad_norm": 0.20975163578987122, "learning_rate": 1e-05, "loss": 0.9978, "step": 66080 }, { "epoch": 58.53410097431355, "grad_norm": 0.24740298092365265, "learning_rate": 1e-05, "loss": 0.9506, "step": 66085 }, { "epoch": 58.538529672276354, "grad_norm": 0.3319302201271057, "learning_rate": 1e-05, "loss": 0.9818, "step": 66090 }, { "epoch": 58.54295837023915, "grad_norm": 0.25016993284225464, "learning_rate": 1e-05, "loss": 0.9828, "step": 66095 }, { "epoch": 58.54738706820195, "grad_norm": 0.23626646399497986, "learning_rate": 1e-05, "loss": 1.0002, "step": 66100 }, { "epoch": 58.55181576616475, "grad_norm": 0.1829553097486496, "learning_rate": 1e-05, "loss": 1.006, "step": 66105 }, { "epoch": 58.556244464127545, "grad_norm": 0.2579843997955322, "learning_rate": 1e-05, "loss": 0.979, "step": 66110 }, { "epoch": 58.560673162090346, "grad_norm": 0.21693141758441925, "learning_rate": 1e-05, "loss": 0.9749, "step": 66115 }, { "epoch": 58.56510186005315, "grad_norm": 0.2358841449022293, "learning_rate": 1e-05, "loss": 1.0554, "step": 66120 }, { "epoch": 58.56953055801594, "grad_norm": 0.23074689507484436, "learning_rate": 1e-05, "loss": 0.9399, "step": 66125 }, { "epoch": 58.57395925597874, "grad_norm": 0.2559589445590973, "learning_rate": 1e-05, "loss": 0.955, "step": 66130 }, { "epoch": 58.578387953941544, "grad_norm": 0.2241109311580658, "learning_rate": 1e-05, "loss": 0.9291, "step": 66135 }, { "epoch": 58.58281665190434, "grad_norm": 0.2654598653316498, "learning_rate": 1e-05, "loss": 0.9667, "step": 66140 }, { "epoch": 58.58724534986714, "grad_norm": 0.22279340028762817, "learning_rate": 1e-05, "loss": 0.9399, "step": 66145 }, { "epoch": 58.59167404782994, "grad_norm": 0.22340507805347443, "learning_rate": 1e-05, "loss": 0.9642, "step": 66150 }, { "epoch": 58.596102745792734, "grad_norm": 0.23884937167167664, "learning_rate": 1e-05, "loss": 0.9385, "step": 66155 }, { "epoch": 58.600531443755536, "grad_norm": 0.2491408735513687, "learning_rate": 1e-05, "loss": 0.9447, "step": 66160 }, { "epoch": 58.60496014171834, "grad_norm": 0.2564755082130432, "learning_rate": 1e-05, "loss": 0.9639, "step": 66165 }, { "epoch": 58.60938883968113, "grad_norm": 0.22645698487758636, "learning_rate": 1e-05, "loss": 1.0501, "step": 66170 }, { "epoch": 58.61381753764393, "grad_norm": 0.2449694126844406, "learning_rate": 1e-05, "loss": 0.9421, "step": 66175 }, { "epoch": 58.61824623560673, "grad_norm": 0.22285647690296173, "learning_rate": 1e-05, "loss": 1.0056, "step": 66180 }, { "epoch": 58.62267493356953, "grad_norm": 0.2291710525751114, "learning_rate": 1e-05, "loss": 0.9587, "step": 66185 }, { "epoch": 58.62710363153233, "grad_norm": 0.2962253987789154, "learning_rate": 1e-05, "loss": 0.9823, "step": 66190 }, { "epoch": 58.63153232949513, "grad_norm": 0.2888045310974121, "learning_rate": 1e-05, "loss": 0.9872, "step": 66195 }, { "epoch": 58.635961027457924, "grad_norm": 0.24166929721832275, "learning_rate": 1e-05, "loss": 0.9961, "step": 66200 }, { "epoch": 58.640389725420725, "grad_norm": 0.23009037971496582, "learning_rate": 1e-05, "loss": 1.0109, "step": 66205 }, { "epoch": 58.644818423383526, "grad_norm": 0.25208649039268494, "learning_rate": 1e-05, "loss": 0.9918, "step": 66210 }, { "epoch": 58.64924712134633, "grad_norm": 0.22103185951709747, "learning_rate": 1e-05, "loss": 0.9853, "step": 66215 }, { "epoch": 58.65367581930912, "grad_norm": 0.24001199007034302, "learning_rate": 1e-05, "loss": 0.936, "step": 66220 }, { "epoch": 58.65810451727192, "grad_norm": 0.2976042628288269, "learning_rate": 1e-05, "loss": 1.0025, "step": 66225 }, { "epoch": 58.662533215234724, "grad_norm": 0.20681537687778473, "learning_rate": 1e-05, "loss": 0.9094, "step": 66230 }, { "epoch": 58.66696191319752, "grad_norm": 0.20726196467876434, "learning_rate": 1e-05, "loss": 0.9684, "step": 66235 }, { "epoch": 58.67139061116032, "grad_norm": 0.21387752890586853, "learning_rate": 1e-05, "loss": 1.0204, "step": 66240 }, { "epoch": 58.67581930912312, "grad_norm": 0.2361944019794464, "learning_rate": 1e-05, "loss": 0.9678, "step": 66245 }, { "epoch": 58.680248007085915, "grad_norm": 0.19607706367969513, "learning_rate": 1e-05, "loss": 0.9613, "step": 66250 }, { "epoch": 58.684676705048716, "grad_norm": 0.24780048429965973, "learning_rate": 1e-05, "loss": 0.8867, "step": 66255 }, { "epoch": 58.68910540301152, "grad_norm": 0.22258004546165466, "learning_rate": 1e-05, "loss": 0.9965, "step": 66260 }, { "epoch": 58.69353410097431, "grad_norm": 0.21379536390304565, "learning_rate": 1e-05, "loss": 0.9552, "step": 66265 }, { "epoch": 58.69796279893711, "grad_norm": 0.23343965411186218, "learning_rate": 1e-05, "loss": 1.0044, "step": 66270 }, { "epoch": 58.702391496899914, "grad_norm": 0.24378928542137146, "learning_rate": 1e-05, "loss": 1.0205, "step": 66275 }, { "epoch": 58.70682019486271, "grad_norm": 0.23671655356884003, "learning_rate": 1e-05, "loss": 0.9566, "step": 66280 }, { "epoch": 58.71124889282551, "grad_norm": 0.2634745240211487, "learning_rate": 1e-05, "loss": 0.965, "step": 66285 }, { "epoch": 58.71567759078831, "grad_norm": 0.206903338432312, "learning_rate": 1e-05, "loss": 0.9774, "step": 66290 }, { "epoch": 58.720106288751104, "grad_norm": 0.24206727743148804, "learning_rate": 1e-05, "loss": 0.9262, "step": 66295 }, { "epoch": 58.724534986713905, "grad_norm": 0.22427155077457428, "learning_rate": 1e-05, "loss": 1.0347, "step": 66300 }, { "epoch": 58.72896368467671, "grad_norm": 0.21122358739376068, "learning_rate": 1e-05, "loss": 0.9588, "step": 66305 }, { "epoch": 58.7333923826395, "grad_norm": 0.21206024289131165, "learning_rate": 1e-05, "loss": 0.9753, "step": 66310 }, { "epoch": 58.7378210806023, "grad_norm": 0.23835526406764984, "learning_rate": 1e-05, "loss": 0.9449, "step": 66315 }, { "epoch": 58.7422497785651, "grad_norm": 0.274087518453598, "learning_rate": 1e-05, "loss": 0.9471, "step": 66320 }, { "epoch": 58.7466784765279, "grad_norm": 0.314848393201828, "learning_rate": 1e-05, "loss": 0.9472, "step": 66325 }, { "epoch": 58.7511071744907, "grad_norm": 0.23780398070812225, "learning_rate": 1e-05, "loss": 1.0145, "step": 66330 }, { "epoch": 58.7555358724535, "grad_norm": 0.24787330627441406, "learning_rate": 1e-05, "loss": 0.9622, "step": 66335 }, { "epoch": 58.7599645704163, "grad_norm": 0.23480159044265747, "learning_rate": 1e-05, "loss": 0.9816, "step": 66340 }, { "epoch": 58.764393268379095, "grad_norm": 0.2621453106403351, "learning_rate": 1e-05, "loss": 0.9276, "step": 66345 }, { "epoch": 58.768821966341896, "grad_norm": 0.27325886487960815, "learning_rate": 1e-05, "loss": 0.9553, "step": 66350 }, { "epoch": 58.7732506643047, "grad_norm": 0.21999648213386536, "learning_rate": 1e-05, "loss": 0.9974, "step": 66355 }, { "epoch": 58.77767936226749, "grad_norm": 0.23174482583999634, "learning_rate": 1e-05, "loss": 0.9895, "step": 66360 }, { "epoch": 58.78210806023029, "grad_norm": 0.23891036212444305, "learning_rate": 1e-05, "loss": 0.903, "step": 66365 }, { "epoch": 58.786536758193094, "grad_norm": 0.23595242202281952, "learning_rate": 1e-05, "loss": 1.0136, "step": 66370 }, { "epoch": 58.79096545615589, "grad_norm": 0.2569817006587982, "learning_rate": 1e-05, "loss": 0.9811, "step": 66375 }, { "epoch": 58.79539415411869, "grad_norm": 0.21331319212913513, "learning_rate": 1e-05, "loss": 1.0119, "step": 66380 }, { "epoch": 58.79982285208149, "grad_norm": 0.21353302896022797, "learning_rate": 1e-05, "loss": 0.9885, "step": 66385 }, { "epoch": 58.804251550044285, "grad_norm": 0.2393171638250351, "learning_rate": 1e-05, "loss": 0.9423, "step": 66390 }, { "epoch": 58.808680248007086, "grad_norm": 0.26260244846343994, "learning_rate": 1e-05, "loss": 0.9271, "step": 66395 }, { "epoch": 58.81310894596989, "grad_norm": 0.2421475052833557, "learning_rate": 1e-05, "loss": 0.954, "step": 66400 }, { "epoch": 58.81753764393268, "grad_norm": 0.24958375096321106, "learning_rate": 1e-05, "loss": 0.954, "step": 66405 }, { "epoch": 58.82196634189548, "grad_norm": 0.3130318522453308, "learning_rate": 1e-05, "loss": 0.9783, "step": 66410 }, { "epoch": 58.826395039858284, "grad_norm": 0.25453537702560425, "learning_rate": 1e-05, "loss": 0.9894, "step": 66415 }, { "epoch": 58.83082373782108, "grad_norm": 0.22835572063922882, "learning_rate": 1e-05, "loss": 0.9714, "step": 66420 }, { "epoch": 58.83525243578388, "grad_norm": 0.3002355098724365, "learning_rate": 1e-05, "loss": 1.0011, "step": 66425 }, { "epoch": 58.83968113374668, "grad_norm": 0.2848992943763733, "learning_rate": 1e-05, "loss": 0.9543, "step": 66430 }, { "epoch": 58.844109831709474, "grad_norm": 0.2286827117204666, "learning_rate": 1e-05, "loss": 0.977, "step": 66435 }, { "epoch": 58.848538529672275, "grad_norm": 0.23456798493862152, "learning_rate": 1e-05, "loss": 0.9485, "step": 66440 }, { "epoch": 58.85296722763508, "grad_norm": 0.2134729027748108, "learning_rate": 1e-05, "loss": 0.9839, "step": 66445 }, { "epoch": 58.85739592559787, "grad_norm": 0.2205529361963272, "learning_rate": 1e-05, "loss": 0.9986, "step": 66450 }, { "epoch": 58.86182462356067, "grad_norm": 0.25917816162109375, "learning_rate": 1e-05, "loss": 0.9598, "step": 66455 }, { "epoch": 58.86625332152347, "grad_norm": 0.25884154438972473, "learning_rate": 1e-05, "loss": 0.9514, "step": 66460 }, { "epoch": 58.870682019486274, "grad_norm": 0.2629377543926239, "learning_rate": 1e-05, "loss": 0.9721, "step": 66465 }, { "epoch": 58.87511071744907, "grad_norm": 0.2892385721206665, "learning_rate": 1e-05, "loss": 0.9213, "step": 66470 }, { "epoch": 58.87953941541187, "grad_norm": 0.21622851490974426, "learning_rate": 1e-05, "loss": 0.9485, "step": 66475 }, { "epoch": 58.88396811337467, "grad_norm": 0.2472354918718338, "learning_rate": 1e-05, "loss": 0.9686, "step": 66480 }, { "epoch": 58.888396811337465, "grad_norm": 0.244821235537529, "learning_rate": 1e-05, "loss": 0.9259, "step": 66485 }, { "epoch": 58.892825509300266, "grad_norm": 0.2422996610403061, "learning_rate": 1e-05, "loss": 0.976, "step": 66490 }, { "epoch": 58.89725420726307, "grad_norm": 0.2427123486995697, "learning_rate": 1e-05, "loss": 0.9967, "step": 66495 }, { "epoch": 58.90168290522586, "grad_norm": 0.2249944508075714, "learning_rate": 1e-05, "loss": 0.9605, "step": 66500 }, { "epoch": 58.90611160318866, "grad_norm": 0.2916962504386902, "learning_rate": 1e-05, "loss": 0.8985, "step": 66505 }, { "epoch": 58.910540301151464, "grad_norm": 0.2462603598833084, "learning_rate": 1e-05, "loss": 0.9817, "step": 66510 }, { "epoch": 58.91496899911426, "grad_norm": 0.2573831379413605, "learning_rate": 1e-05, "loss": 1.0366, "step": 66515 }, { "epoch": 58.91939769707706, "grad_norm": 0.22972248494625092, "learning_rate": 1e-05, "loss": 0.9764, "step": 66520 }, { "epoch": 58.92382639503986, "grad_norm": 0.24964314699172974, "learning_rate": 1e-05, "loss": 0.9877, "step": 66525 }, { "epoch": 58.928255093002655, "grad_norm": 0.27196383476257324, "learning_rate": 1e-05, "loss": 0.9489, "step": 66530 }, { "epoch": 58.932683790965456, "grad_norm": 0.2552023231983185, "learning_rate": 1e-05, "loss": 1.0175, "step": 66535 }, { "epoch": 58.93711248892826, "grad_norm": 0.23833294212818146, "learning_rate": 1e-05, "loss": 0.9726, "step": 66540 }, { "epoch": 58.94154118689105, "grad_norm": 0.25160494446754456, "learning_rate": 1e-05, "loss": 0.9366, "step": 66545 }, { "epoch": 58.94596988485385, "grad_norm": 0.21554109454154968, "learning_rate": 1e-05, "loss": 1.0051, "step": 66550 }, { "epoch": 58.95039858281665, "grad_norm": 0.2272166609764099, "learning_rate": 1e-05, "loss": 0.9767, "step": 66555 }, { "epoch": 58.95482728077945, "grad_norm": 0.263322114944458, "learning_rate": 1e-05, "loss": 0.9871, "step": 66560 }, { "epoch": 58.95925597874225, "grad_norm": 0.2646535336971283, "learning_rate": 1e-05, "loss": 0.9924, "step": 66565 }, { "epoch": 58.96368467670505, "grad_norm": 0.23884084820747375, "learning_rate": 1e-05, "loss": 0.9487, "step": 66570 }, { "epoch": 58.96811337466785, "grad_norm": 0.24935728311538696, "learning_rate": 1e-05, "loss": 0.9291, "step": 66575 }, { "epoch": 58.972542072630645, "grad_norm": 0.24558894336223602, "learning_rate": 1e-05, "loss": 0.9597, "step": 66580 }, { "epoch": 58.97697077059345, "grad_norm": 0.2120255082845688, "learning_rate": 1e-05, "loss": 0.9417, "step": 66585 }, { "epoch": 58.98139946855625, "grad_norm": 0.23919865489006042, "learning_rate": 1e-05, "loss": 0.9787, "step": 66590 }, { "epoch": 58.98582816651904, "grad_norm": 0.24285271763801575, "learning_rate": 1e-05, "loss": 0.8847, "step": 66595 }, { "epoch": 58.99025686448184, "grad_norm": 0.26579606533050537, "learning_rate": 1e-05, "loss": 1.0232, "step": 66600 }, { "epoch": 58.994685562444644, "grad_norm": 0.24005885422229767, "learning_rate": 1e-05, "loss": 0.9876, "step": 66605 }, { "epoch": 58.99911426040744, "grad_norm": 0.22951431572437286, "learning_rate": 1e-05, "loss": 1.0279, "step": 66610 }, { "epoch": 59.00354295837024, "grad_norm": 0.2932699918746948, "learning_rate": 1e-05, "loss": 0.9372, "step": 66615 }, { "epoch": 59.00797165633304, "grad_norm": 0.22845040261745453, "learning_rate": 1e-05, "loss": 0.9347, "step": 66620 }, { "epoch": 59.012400354295835, "grad_norm": 0.21848931908607483, "learning_rate": 1e-05, "loss": 0.9465, "step": 66625 }, { "epoch": 59.016829052258636, "grad_norm": 0.24698922038078308, "learning_rate": 1e-05, "loss": 0.9978, "step": 66630 }, { "epoch": 59.02125775022144, "grad_norm": 0.2556541860103607, "learning_rate": 1e-05, "loss": 0.9648, "step": 66635 }, { "epoch": 59.02568644818423, "grad_norm": 0.23641115427017212, "learning_rate": 1e-05, "loss": 0.946, "step": 66640 }, { "epoch": 59.03011514614703, "grad_norm": 0.2486625611782074, "learning_rate": 1e-05, "loss": 0.9702, "step": 66645 }, { "epoch": 59.034543844109834, "grad_norm": 0.23156021535396576, "learning_rate": 1e-05, "loss": 0.9547, "step": 66650 }, { "epoch": 59.03897254207263, "grad_norm": 0.2528839707374573, "learning_rate": 1e-05, "loss": 0.9859, "step": 66655 }, { "epoch": 59.04340124003543, "grad_norm": 0.23054809868335724, "learning_rate": 1e-05, "loss": 1.0019, "step": 66660 }, { "epoch": 59.04782993799823, "grad_norm": 0.2053862065076828, "learning_rate": 1e-05, "loss": 0.9246, "step": 66665 }, { "epoch": 59.052258635961024, "grad_norm": 0.22063937783241272, "learning_rate": 1e-05, "loss": 0.9276, "step": 66670 }, { "epoch": 59.056687333923826, "grad_norm": 0.22324319183826447, "learning_rate": 1e-05, "loss": 0.9225, "step": 66675 }, { "epoch": 59.06111603188663, "grad_norm": 0.23338168859481812, "learning_rate": 1e-05, "loss": 0.9688, "step": 66680 }, { "epoch": 59.06554472984942, "grad_norm": 0.2395501434803009, "learning_rate": 1e-05, "loss": 0.9593, "step": 66685 }, { "epoch": 59.06997342781222, "grad_norm": 0.2651427984237671, "learning_rate": 1e-05, "loss": 1.0267, "step": 66690 }, { "epoch": 59.07440212577502, "grad_norm": 0.24108341336250305, "learning_rate": 1e-05, "loss": 0.922, "step": 66695 }, { "epoch": 59.078830823737825, "grad_norm": 0.24153071641921997, "learning_rate": 1e-05, "loss": 0.9891, "step": 66700 }, { "epoch": 59.08325952170062, "grad_norm": 0.2270612120628357, "learning_rate": 1e-05, "loss": 0.9399, "step": 66705 }, { "epoch": 59.08768821966342, "grad_norm": 0.23814406991004944, "learning_rate": 1e-05, "loss": 0.9806, "step": 66710 }, { "epoch": 59.09211691762622, "grad_norm": 0.24804134666919708, "learning_rate": 1e-05, "loss": 0.9415, "step": 66715 }, { "epoch": 59.096545615589015, "grad_norm": 0.27677488327026367, "learning_rate": 1e-05, "loss": 1.006, "step": 66720 }, { "epoch": 59.100974313551816, "grad_norm": 0.2481439858675003, "learning_rate": 1e-05, "loss": 0.9585, "step": 66725 }, { "epoch": 59.10540301151462, "grad_norm": 0.21477828919887543, "learning_rate": 1e-05, "loss": 0.9657, "step": 66730 }, { "epoch": 59.10983170947741, "grad_norm": 0.23443078994750977, "learning_rate": 1e-05, "loss": 0.974, "step": 66735 }, { "epoch": 59.11426040744021, "grad_norm": 0.19984757900238037, "learning_rate": 1e-05, "loss": 0.9769, "step": 66740 }, { "epoch": 59.118689105403014, "grad_norm": 0.22607550024986267, "learning_rate": 1e-05, "loss": 0.9769, "step": 66745 }, { "epoch": 59.12311780336581, "grad_norm": 0.2566843330860138, "learning_rate": 1e-05, "loss": 1.0295, "step": 66750 }, { "epoch": 59.12754650132861, "grad_norm": 0.24119414389133453, "learning_rate": 1e-05, "loss": 0.9507, "step": 66755 }, { "epoch": 59.13197519929141, "grad_norm": 0.24013695120811462, "learning_rate": 1e-05, "loss": 0.9461, "step": 66760 }, { "epoch": 59.136403897254205, "grad_norm": 0.2711068093776703, "learning_rate": 1e-05, "loss": 1.0125, "step": 66765 }, { "epoch": 59.140832595217006, "grad_norm": 0.25309956073760986, "learning_rate": 1e-05, "loss": 0.9464, "step": 66770 }, { "epoch": 59.14526129317981, "grad_norm": 0.26761960983276367, "learning_rate": 1e-05, "loss": 0.9775, "step": 66775 }, { "epoch": 59.1496899911426, "grad_norm": 0.27214285731315613, "learning_rate": 1e-05, "loss": 0.9917, "step": 66780 }, { "epoch": 59.1541186891054, "grad_norm": 0.22049346566200256, "learning_rate": 1e-05, "loss": 0.9617, "step": 66785 }, { "epoch": 59.158547387068204, "grad_norm": 0.3557475209236145, "learning_rate": 1e-05, "loss": 0.9851, "step": 66790 }, { "epoch": 59.162976085031, "grad_norm": 0.2651252746582031, "learning_rate": 1e-05, "loss": 0.9149, "step": 66795 }, { "epoch": 59.1674047829938, "grad_norm": 0.29058703780174255, "learning_rate": 1e-05, "loss": 0.9687, "step": 66800 }, { "epoch": 59.1718334809566, "grad_norm": 0.27897506952285767, "learning_rate": 1e-05, "loss": 0.9239, "step": 66805 }, { "epoch": 59.176262178919394, "grad_norm": 0.22232943773269653, "learning_rate": 1e-05, "loss": 0.9604, "step": 66810 }, { "epoch": 59.180690876882196, "grad_norm": 0.24779817461967468, "learning_rate": 1e-05, "loss": 1.0037, "step": 66815 }, { "epoch": 59.185119574845, "grad_norm": 0.22302231192588806, "learning_rate": 1e-05, "loss": 0.9508, "step": 66820 }, { "epoch": 59.1895482728078, "grad_norm": 0.24547143280506134, "learning_rate": 1e-05, "loss": 0.9416, "step": 66825 }, { "epoch": 59.19397697077059, "grad_norm": 0.21537873148918152, "learning_rate": 1e-05, "loss": 0.9271, "step": 66830 }, { "epoch": 59.19840566873339, "grad_norm": 0.3073902130126953, "learning_rate": 1e-05, "loss": 0.9945, "step": 66835 }, { "epoch": 59.202834366696194, "grad_norm": 0.23865914344787598, "learning_rate": 1e-05, "loss": 0.9474, "step": 66840 }, { "epoch": 59.20726306465899, "grad_norm": 0.2591618299484253, "learning_rate": 1e-05, "loss": 0.9523, "step": 66845 }, { "epoch": 59.21169176262179, "grad_norm": 0.23636005818843842, "learning_rate": 1e-05, "loss": 0.9668, "step": 66850 }, { "epoch": 59.21612046058459, "grad_norm": 0.2438323199748993, "learning_rate": 1e-05, "loss": 0.9872, "step": 66855 }, { "epoch": 59.220549158547385, "grad_norm": 0.24456475675106049, "learning_rate": 1e-05, "loss": 1.0107, "step": 66860 }, { "epoch": 59.224977856510186, "grad_norm": 0.23916451632976532, "learning_rate": 1e-05, "loss": 1.003, "step": 66865 }, { "epoch": 59.22940655447299, "grad_norm": 0.2286665141582489, "learning_rate": 1e-05, "loss": 0.9618, "step": 66870 }, { "epoch": 59.23383525243578, "grad_norm": 0.22660048305988312, "learning_rate": 1e-05, "loss": 0.9873, "step": 66875 }, { "epoch": 59.23826395039858, "grad_norm": 0.2248922437429428, "learning_rate": 1e-05, "loss": 0.9737, "step": 66880 }, { "epoch": 59.242692648361384, "grad_norm": 0.24523353576660156, "learning_rate": 1e-05, "loss": 0.9515, "step": 66885 }, { "epoch": 59.24712134632418, "grad_norm": 0.2399088591337204, "learning_rate": 1e-05, "loss": 0.9994, "step": 66890 }, { "epoch": 59.25155004428698, "grad_norm": 0.23760074377059937, "learning_rate": 1e-05, "loss": 0.9648, "step": 66895 }, { "epoch": 59.25597874224978, "grad_norm": 0.2238783836364746, "learning_rate": 1e-05, "loss": 0.9297, "step": 66900 }, { "epoch": 59.260407440212575, "grad_norm": 0.287847101688385, "learning_rate": 1e-05, "loss": 1.0076, "step": 66905 }, { "epoch": 59.264836138175376, "grad_norm": 0.24397455155849457, "learning_rate": 1e-05, "loss": 0.9877, "step": 66910 }, { "epoch": 59.26926483613818, "grad_norm": 0.24414679408073425, "learning_rate": 1e-05, "loss": 0.9738, "step": 66915 }, { "epoch": 59.27369353410097, "grad_norm": 0.23794770240783691, "learning_rate": 1e-05, "loss": 0.9729, "step": 66920 }, { "epoch": 59.27812223206377, "grad_norm": 0.26321831345558167, "learning_rate": 1e-05, "loss": 1.0015, "step": 66925 }, { "epoch": 59.282550930026574, "grad_norm": 0.2558186650276184, "learning_rate": 1e-05, "loss": 0.9949, "step": 66930 }, { "epoch": 59.28697962798937, "grad_norm": 0.25666743516921997, "learning_rate": 1e-05, "loss": 0.989, "step": 66935 }, { "epoch": 59.29140832595217, "grad_norm": 0.2599847912788391, "learning_rate": 1e-05, "loss": 0.9961, "step": 66940 }, { "epoch": 59.29583702391497, "grad_norm": 0.24281898140907288, "learning_rate": 1e-05, "loss": 0.9211, "step": 66945 }, { "epoch": 59.30026572187777, "grad_norm": 0.2657301723957062, "learning_rate": 1e-05, "loss": 0.9251, "step": 66950 }, { "epoch": 59.304694419840565, "grad_norm": 0.2535754442214966, "learning_rate": 1e-05, "loss": 0.9678, "step": 66955 }, { "epoch": 59.30912311780337, "grad_norm": 0.2630328834056854, "learning_rate": 1e-05, "loss": 0.9304, "step": 66960 }, { "epoch": 59.31355181576617, "grad_norm": 0.20457661151885986, "learning_rate": 1e-05, "loss": 0.9341, "step": 66965 }, { "epoch": 59.31798051372896, "grad_norm": 0.23641666769981384, "learning_rate": 1e-05, "loss": 0.9444, "step": 66970 }, { "epoch": 59.32240921169176, "grad_norm": 0.2554995119571686, "learning_rate": 1e-05, "loss": 0.9655, "step": 66975 }, { "epoch": 59.326837909654564, "grad_norm": 0.2306179702281952, "learning_rate": 1e-05, "loss": 0.9559, "step": 66980 }, { "epoch": 59.33126660761736, "grad_norm": 0.25697603821754456, "learning_rate": 1e-05, "loss": 0.9314, "step": 66985 }, { "epoch": 59.33569530558016, "grad_norm": 0.25965002179145813, "learning_rate": 1e-05, "loss": 0.9838, "step": 66990 }, { "epoch": 59.34012400354296, "grad_norm": 0.24917984008789062, "learning_rate": 1e-05, "loss": 0.9387, "step": 66995 }, { "epoch": 59.344552701505755, "grad_norm": 0.26654982566833496, "learning_rate": 1e-05, "loss": 0.9435, "step": 67000 }, { "epoch": 59.348981399468556, "grad_norm": 0.2426074892282486, "learning_rate": 1e-05, "loss": 0.9591, "step": 67005 }, { "epoch": 59.35341009743136, "grad_norm": 0.26889070868492126, "learning_rate": 1e-05, "loss": 0.9516, "step": 67010 }, { "epoch": 59.35783879539415, "grad_norm": 0.20574183762073517, "learning_rate": 1e-05, "loss": 0.9812, "step": 67015 }, { "epoch": 59.36226749335695, "grad_norm": 0.23412829637527466, "learning_rate": 1e-05, "loss": 0.9834, "step": 67020 }, { "epoch": 59.366696191319754, "grad_norm": 0.24843290448188782, "learning_rate": 1e-05, "loss": 0.9718, "step": 67025 }, { "epoch": 59.37112488928255, "grad_norm": 0.2229468673467636, "learning_rate": 1e-05, "loss": 0.9609, "step": 67030 }, { "epoch": 59.37555358724535, "grad_norm": 0.23482932150363922, "learning_rate": 1e-05, "loss": 0.9305, "step": 67035 }, { "epoch": 59.37998228520815, "grad_norm": 0.23547935485839844, "learning_rate": 1e-05, "loss": 0.96, "step": 67040 }, { "epoch": 59.384410983170945, "grad_norm": 0.24420316517353058, "learning_rate": 1e-05, "loss": 0.9401, "step": 67045 }, { "epoch": 59.388839681133746, "grad_norm": 0.2632458209991455, "learning_rate": 1e-05, "loss": 0.9937, "step": 67050 }, { "epoch": 59.39326837909655, "grad_norm": 0.24076366424560547, "learning_rate": 1e-05, "loss": 0.937, "step": 67055 }, { "epoch": 59.39769707705934, "grad_norm": 0.27422770857810974, "learning_rate": 1e-05, "loss": 0.9636, "step": 67060 }, { "epoch": 59.40212577502214, "grad_norm": 0.2265530526638031, "learning_rate": 1e-05, "loss": 0.9902, "step": 67065 }, { "epoch": 59.40655447298494, "grad_norm": 0.22102276980876923, "learning_rate": 1e-05, "loss": 0.9842, "step": 67070 }, { "epoch": 59.410983170947745, "grad_norm": 0.21235457062721252, "learning_rate": 1e-05, "loss": 0.9461, "step": 67075 }, { "epoch": 59.41541186891054, "grad_norm": 0.24659648537635803, "learning_rate": 1e-05, "loss": 1.0166, "step": 67080 }, { "epoch": 59.41984056687334, "grad_norm": 0.2166571319103241, "learning_rate": 1e-05, "loss": 1.0039, "step": 67085 }, { "epoch": 59.42426926483614, "grad_norm": 0.22712436318397522, "learning_rate": 1e-05, "loss": 0.9465, "step": 67090 }, { "epoch": 59.428697962798935, "grad_norm": 0.21531012654304504, "learning_rate": 1e-05, "loss": 1.0074, "step": 67095 }, { "epoch": 59.43312666076174, "grad_norm": 0.21641729772090912, "learning_rate": 1e-05, "loss": 0.9609, "step": 67100 }, { "epoch": 59.43755535872454, "grad_norm": 0.20961648225784302, "learning_rate": 1e-05, "loss": 0.9485, "step": 67105 }, { "epoch": 59.44198405668733, "grad_norm": 0.19643104076385498, "learning_rate": 1e-05, "loss": 0.9402, "step": 67110 }, { "epoch": 59.44641275465013, "grad_norm": 0.21212148666381836, "learning_rate": 1e-05, "loss": 0.9923, "step": 67115 }, { "epoch": 59.450841452612934, "grad_norm": 0.21542568504810333, "learning_rate": 1e-05, "loss": 0.9926, "step": 67120 }, { "epoch": 59.45527015057573, "grad_norm": 0.26023104786872864, "learning_rate": 1e-05, "loss": 0.9928, "step": 67125 }, { "epoch": 59.45969884853853, "grad_norm": 0.23674340546131134, "learning_rate": 1e-05, "loss": 0.9092, "step": 67130 }, { "epoch": 59.46412754650133, "grad_norm": 0.23538298904895782, "learning_rate": 1e-05, "loss": 0.95, "step": 67135 }, { "epoch": 59.468556244464125, "grad_norm": 0.2596363425254822, "learning_rate": 1e-05, "loss": 0.9349, "step": 67140 }, { "epoch": 59.472984942426926, "grad_norm": 0.20869964361190796, "learning_rate": 1e-05, "loss": 0.9782, "step": 67145 }, { "epoch": 59.47741364038973, "grad_norm": 0.2394796460866928, "learning_rate": 1e-05, "loss": 0.9825, "step": 67150 }, { "epoch": 59.48184233835252, "grad_norm": 0.22037747502326965, "learning_rate": 1e-05, "loss": 0.9903, "step": 67155 }, { "epoch": 59.48627103631532, "grad_norm": 0.20692504942417145, "learning_rate": 1e-05, "loss": 0.9429, "step": 67160 }, { "epoch": 59.490699734278124, "grad_norm": 0.22223250567913055, "learning_rate": 1e-05, "loss": 0.9839, "step": 67165 }, { "epoch": 59.49512843224092, "grad_norm": 0.2629796862602234, "learning_rate": 1e-05, "loss": 0.9834, "step": 67170 }, { "epoch": 59.49955713020372, "grad_norm": 0.2817767858505249, "learning_rate": 1e-05, "loss": 0.9463, "step": 67175 }, { "epoch": 59.50398582816652, "grad_norm": 0.2689191997051239, "learning_rate": 1e-05, "loss": 0.974, "step": 67180 }, { "epoch": 59.508414526129314, "grad_norm": 0.27091455459594727, "learning_rate": 1e-05, "loss": 0.9548, "step": 67185 }, { "epoch": 59.512843224092116, "grad_norm": 0.26691892743110657, "learning_rate": 1e-05, "loss": 0.9796, "step": 67190 }, { "epoch": 59.51727192205492, "grad_norm": 0.26606935262680054, "learning_rate": 1e-05, "loss": 0.9439, "step": 67195 }, { "epoch": 59.52170062001772, "grad_norm": 0.22358602285385132, "learning_rate": 1e-05, "loss": 0.921, "step": 67200 }, { "epoch": 59.52612931798051, "grad_norm": 0.22314418852329254, "learning_rate": 1e-05, "loss": 0.9709, "step": 67205 }, { "epoch": 59.53055801594331, "grad_norm": 0.22729524970054626, "learning_rate": 1e-05, "loss": 0.9328, "step": 67210 }, { "epoch": 59.534986713906115, "grad_norm": 0.2256029099225998, "learning_rate": 1e-05, "loss": 0.9467, "step": 67215 }, { "epoch": 59.53941541186891, "grad_norm": 0.27966466546058655, "learning_rate": 1e-05, "loss": 0.9856, "step": 67220 }, { "epoch": 59.54384410983171, "grad_norm": 0.2753465473651886, "learning_rate": 1e-05, "loss": 0.9718, "step": 67225 }, { "epoch": 59.54827280779451, "grad_norm": 0.24972563982009888, "learning_rate": 1e-05, "loss": 0.9475, "step": 67230 }, { "epoch": 59.552701505757305, "grad_norm": 0.28063878417015076, "learning_rate": 1e-05, "loss": 0.9767, "step": 67235 }, { "epoch": 59.55713020372011, "grad_norm": 0.26607462763786316, "learning_rate": 1e-05, "loss": 0.9591, "step": 67240 }, { "epoch": 59.56155890168291, "grad_norm": 0.2708020806312561, "learning_rate": 1e-05, "loss": 0.9285, "step": 67245 }, { "epoch": 59.5659875996457, "grad_norm": 0.2864118814468384, "learning_rate": 1e-05, "loss": 1.0101, "step": 67250 }, { "epoch": 59.5704162976085, "grad_norm": 0.2689034938812256, "learning_rate": 1e-05, "loss": 0.9492, "step": 67255 }, { "epoch": 59.574844995571304, "grad_norm": 0.24226604402065277, "learning_rate": 1e-05, "loss": 1.0001, "step": 67260 }, { "epoch": 59.5792736935341, "grad_norm": 0.23298649489879608, "learning_rate": 1e-05, "loss": 0.9794, "step": 67265 }, { "epoch": 59.5837023914969, "grad_norm": 0.23314152657985687, "learning_rate": 1e-05, "loss": 0.9456, "step": 67270 }, { "epoch": 59.5881310894597, "grad_norm": 0.24087823927402496, "learning_rate": 1e-05, "loss": 0.9783, "step": 67275 }, { "epoch": 59.592559787422495, "grad_norm": 0.2168496549129486, "learning_rate": 1e-05, "loss": 0.9538, "step": 67280 }, { "epoch": 59.596988485385296, "grad_norm": 0.27107295393943787, "learning_rate": 1e-05, "loss": 0.9612, "step": 67285 }, { "epoch": 59.6014171833481, "grad_norm": 0.25947487354278564, "learning_rate": 1e-05, "loss": 0.9701, "step": 67290 }, { "epoch": 59.60584588131089, "grad_norm": 0.23305225372314453, "learning_rate": 1e-05, "loss": 0.9479, "step": 67295 }, { "epoch": 59.61027457927369, "grad_norm": 0.2442581206560135, "learning_rate": 1e-05, "loss": 0.9593, "step": 67300 }, { "epoch": 59.614703277236494, "grad_norm": 0.26204654574394226, "learning_rate": 1e-05, "loss": 0.9949, "step": 67305 }, { "epoch": 59.619131975199295, "grad_norm": 0.2580435574054718, "learning_rate": 1e-05, "loss": 0.9755, "step": 67310 }, { "epoch": 59.62356067316209, "grad_norm": 0.23890714347362518, "learning_rate": 1e-05, "loss": 0.9553, "step": 67315 }, { "epoch": 59.62798937112489, "grad_norm": 0.249770388007164, "learning_rate": 1e-05, "loss": 0.9386, "step": 67320 }, { "epoch": 59.63241806908769, "grad_norm": 0.22697529196739197, "learning_rate": 1e-05, "loss": 0.9489, "step": 67325 }, { "epoch": 59.636846767050486, "grad_norm": 0.23145148158073425, "learning_rate": 1e-05, "loss": 1.0289, "step": 67330 }, { "epoch": 59.64127546501329, "grad_norm": 0.23793938755989075, "learning_rate": 1e-05, "loss": 0.9676, "step": 67335 }, { "epoch": 59.64570416297609, "grad_norm": 0.2357412874698639, "learning_rate": 1e-05, "loss": 0.9648, "step": 67340 }, { "epoch": 59.65013286093888, "grad_norm": 0.2740411162376404, "learning_rate": 1e-05, "loss": 0.9135, "step": 67345 }, { "epoch": 59.65456155890168, "grad_norm": 0.2274979203939438, "learning_rate": 1e-05, "loss": 0.903, "step": 67350 }, { "epoch": 59.658990256864485, "grad_norm": 0.2634359300136566, "learning_rate": 1e-05, "loss": 0.9494, "step": 67355 }, { "epoch": 59.66341895482728, "grad_norm": 0.2276473194360733, "learning_rate": 1e-05, "loss": 0.9682, "step": 67360 }, { "epoch": 59.66784765279008, "grad_norm": 0.2399526834487915, "learning_rate": 1e-05, "loss": 0.9944, "step": 67365 }, { "epoch": 59.67227635075288, "grad_norm": 0.23105016350746155, "learning_rate": 1e-05, "loss": 0.9775, "step": 67370 }, { "epoch": 59.676705048715675, "grad_norm": 0.24216358363628387, "learning_rate": 1e-05, "loss": 1.0355, "step": 67375 }, { "epoch": 59.681133746678476, "grad_norm": 0.21329949796199799, "learning_rate": 1e-05, "loss": 0.9406, "step": 67380 }, { "epoch": 59.68556244464128, "grad_norm": 0.2208309918642044, "learning_rate": 1e-05, "loss": 0.9593, "step": 67385 }, { "epoch": 59.68999114260407, "grad_norm": 0.24061959981918335, "learning_rate": 1e-05, "loss": 0.9558, "step": 67390 }, { "epoch": 59.69441984056687, "grad_norm": 0.2035655379295349, "learning_rate": 1e-05, "loss": 0.9755, "step": 67395 }, { "epoch": 59.698848538529674, "grad_norm": 0.21024549007415771, "learning_rate": 1e-05, "loss": 0.9938, "step": 67400 }, { "epoch": 59.70327723649247, "grad_norm": 0.24159760773181915, "learning_rate": 1e-05, "loss": 0.9779, "step": 67405 }, { "epoch": 59.70770593445527, "grad_norm": 0.2286030650138855, "learning_rate": 1e-05, "loss": 0.989, "step": 67410 }, { "epoch": 59.71213463241807, "grad_norm": 0.28946295380592346, "learning_rate": 1e-05, "loss": 0.9353, "step": 67415 }, { "epoch": 59.716563330380865, "grad_norm": 0.19123387336730957, "learning_rate": 1e-05, "loss": 1.0026, "step": 67420 }, { "epoch": 59.720992028343666, "grad_norm": 0.2614642083644867, "learning_rate": 1e-05, "loss": 0.9456, "step": 67425 }, { "epoch": 59.72542072630647, "grad_norm": 0.2773922383785248, "learning_rate": 1e-05, "loss": 0.9984, "step": 67430 }, { "epoch": 59.72984942426926, "grad_norm": 0.2428750842809677, "learning_rate": 1e-05, "loss": 0.9184, "step": 67435 }, { "epoch": 59.73427812223206, "grad_norm": 0.22674207389354706, "learning_rate": 1e-05, "loss": 1.0044, "step": 67440 }, { "epoch": 59.738706820194864, "grad_norm": 0.24473629891872406, "learning_rate": 1e-05, "loss": 0.9975, "step": 67445 }, { "epoch": 59.743135518157665, "grad_norm": 0.24417509138584137, "learning_rate": 1e-05, "loss": 0.9893, "step": 67450 }, { "epoch": 59.74756421612046, "grad_norm": 0.23501455783843994, "learning_rate": 1e-05, "loss": 0.9932, "step": 67455 }, { "epoch": 59.75199291408326, "grad_norm": 0.217117041349411, "learning_rate": 1e-05, "loss": 0.9852, "step": 67460 }, { "epoch": 59.75642161204606, "grad_norm": 0.2634599506855011, "learning_rate": 1e-05, "loss": 1.0133, "step": 67465 }, { "epoch": 59.760850310008856, "grad_norm": 0.251139760017395, "learning_rate": 1e-05, "loss": 0.9816, "step": 67470 }, { "epoch": 59.76527900797166, "grad_norm": 0.248135045170784, "learning_rate": 1e-05, "loss": 0.9938, "step": 67475 }, { "epoch": 59.76970770593446, "grad_norm": 0.24345804750919342, "learning_rate": 1e-05, "loss": 1.0309, "step": 67480 }, { "epoch": 59.77413640389725, "grad_norm": 0.2599170506000519, "learning_rate": 1e-05, "loss": 0.9493, "step": 67485 }, { "epoch": 59.77856510186005, "grad_norm": 0.2228216677904129, "learning_rate": 1e-05, "loss": 0.9459, "step": 67490 }, { "epoch": 59.782993799822854, "grad_norm": 0.21924491226673126, "learning_rate": 1e-05, "loss": 0.9784, "step": 67495 }, { "epoch": 59.78742249778565, "grad_norm": 0.2735077440738678, "learning_rate": 1e-05, "loss": 0.9411, "step": 67500 }, { "epoch": 59.79185119574845, "grad_norm": 0.24382410943508148, "learning_rate": 1e-05, "loss": 1.0471, "step": 67505 }, { "epoch": 59.79627989371125, "grad_norm": 0.25825035572052, "learning_rate": 1e-05, "loss": 0.9575, "step": 67510 }, { "epoch": 59.800708591674045, "grad_norm": 0.23420469462871552, "learning_rate": 1e-05, "loss": 0.9405, "step": 67515 }, { "epoch": 59.805137289636846, "grad_norm": 0.24119849503040314, "learning_rate": 1e-05, "loss": 0.9419, "step": 67520 }, { "epoch": 59.80956598759965, "grad_norm": 0.251857191324234, "learning_rate": 1e-05, "loss": 0.9625, "step": 67525 }, { "epoch": 59.81399468556244, "grad_norm": 0.25429603457450867, "learning_rate": 1e-05, "loss": 0.9508, "step": 67530 }, { "epoch": 59.81842338352524, "grad_norm": 0.3338758945465088, "learning_rate": 1e-05, "loss": 0.9815, "step": 67535 }, { "epoch": 59.822852081488044, "grad_norm": 0.2282615602016449, "learning_rate": 1e-05, "loss": 0.9452, "step": 67540 }, { "epoch": 59.82728077945084, "grad_norm": 0.23681332170963287, "learning_rate": 1e-05, "loss": 0.9478, "step": 67545 }, { "epoch": 59.83170947741364, "grad_norm": 0.21596649289131165, "learning_rate": 1e-05, "loss": 0.9484, "step": 67550 }, { "epoch": 59.83613817537644, "grad_norm": 0.25154420733451843, "learning_rate": 1e-05, "loss": 0.9151, "step": 67555 }, { "epoch": 59.84056687333924, "grad_norm": 0.2358541339635849, "learning_rate": 1e-05, "loss": 0.9985, "step": 67560 }, { "epoch": 59.844995571302036, "grad_norm": 0.23139184713363647, "learning_rate": 1e-05, "loss": 0.985, "step": 67565 }, { "epoch": 59.84942426926484, "grad_norm": 0.24705787003040314, "learning_rate": 1e-05, "loss": 0.9435, "step": 67570 }, { "epoch": 59.85385296722764, "grad_norm": 0.2336421012878418, "learning_rate": 1e-05, "loss": 0.9932, "step": 67575 }, { "epoch": 59.85828166519043, "grad_norm": 0.22496020793914795, "learning_rate": 1e-05, "loss": 1.0282, "step": 67580 }, { "epoch": 59.862710363153234, "grad_norm": 0.23790419101715088, "learning_rate": 1e-05, "loss": 0.9393, "step": 67585 }, { "epoch": 59.867139061116035, "grad_norm": 0.23613722622394562, "learning_rate": 1e-05, "loss": 0.9751, "step": 67590 }, { "epoch": 59.87156775907883, "grad_norm": 0.23906050622463226, "learning_rate": 1e-05, "loss": 0.9569, "step": 67595 }, { "epoch": 59.87599645704163, "grad_norm": 0.2269524186849594, "learning_rate": 1e-05, "loss": 0.9429, "step": 67600 }, { "epoch": 59.88042515500443, "grad_norm": 0.22984082996845245, "learning_rate": 1e-05, "loss": 0.9685, "step": 67605 }, { "epoch": 59.884853852967225, "grad_norm": 0.27834585309028625, "learning_rate": 1e-05, "loss": 0.9948, "step": 67610 }, { "epoch": 59.88928255093003, "grad_norm": 0.2387055605649948, "learning_rate": 1e-05, "loss": 0.9453, "step": 67615 }, { "epoch": 59.89371124889283, "grad_norm": 0.23092499375343323, "learning_rate": 1e-05, "loss": 0.9544, "step": 67620 }, { "epoch": 59.89813994685562, "grad_norm": 0.2322911024093628, "learning_rate": 1e-05, "loss": 1.0418, "step": 67625 }, { "epoch": 59.90256864481842, "grad_norm": 0.24539130926132202, "learning_rate": 1e-05, "loss": 0.9682, "step": 67630 }, { "epoch": 59.906997342781224, "grad_norm": 0.27204397320747375, "learning_rate": 1e-05, "loss": 0.9427, "step": 67635 }, { "epoch": 59.91142604074402, "grad_norm": 0.2431749850511551, "learning_rate": 1e-05, "loss": 1.0219, "step": 67640 }, { "epoch": 59.91585473870682, "grad_norm": 0.2722782790660858, "learning_rate": 1e-05, "loss": 1.0117, "step": 67645 }, { "epoch": 59.92028343666962, "grad_norm": 0.271264910697937, "learning_rate": 1e-05, "loss": 0.952, "step": 67650 }, { "epoch": 59.924712134632415, "grad_norm": 0.24249565601348877, "learning_rate": 1e-05, "loss": 1.0019, "step": 67655 }, { "epoch": 59.929140832595216, "grad_norm": 0.23625680804252625, "learning_rate": 1e-05, "loss": 0.9711, "step": 67660 }, { "epoch": 59.93356953055802, "grad_norm": 0.19955313205718994, "learning_rate": 1e-05, "loss": 0.9292, "step": 67665 }, { "epoch": 59.93799822852081, "grad_norm": 0.27409690618515015, "learning_rate": 1e-05, "loss": 0.9913, "step": 67670 }, { "epoch": 59.94242692648361, "grad_norm": 0.2384069412946701, "learning_rate": 1e-05, "loss": 0.9574, "step": 67675 }, { "epoch": 59.946855624446414, "grad_norm": 0.20589803159236908, "learning_rate": 1e-05, "loss": 0.969, "step": 67680 }, { "epoch": 59.951284322409215, "grad_norm": 0.22315742075443268, "learning_rate": 1e-05, "loss": 0.9652, "step": 67685 }, { "epoch": 59.95571302037201, "grad_norm": 0.2215559333562851, "learning_rate": 1e-05, "loss": 0.9989, "step": 67690 }, { "epoch": 59.96014171833481, "grad_norm": 0.21825172007083893, "learning_rate": 1e-05, "loss": 0.9516, "step": 67695 }, { "epoch": 59.96457041629761, "grad_norm": 0.20135079324245453, "learning_rate": 1e-05, "loss": 1.0123, "step": 67700 }, { "epoch": 59.968999114260406, "grad_norm": 0.22235675156116486, "learning_rate": 1e-05, "loss": 1.0107, "step": 67705 }, { "epoch": 59.97342781222321, "grad_norm": 0.27094313502311707, "learning_rate": 1e-05, "loss": 0.987, "step": 67710 }, { "epoch": 59.97785651018601, "grad_norm": 0.2268366515636444, "learning_rate": 1e-05, "loss": 0.9647, "step": 67715 }, { "epoch": 59.9822852081488, "grad_norm": 0.23001061379909515, "learning_rate": 1e-05, "loss": 0.9976, "step": 67720 }, { "epoch": 59.9867139061116, "grad_norm": 0.24745343625545502, "learning_rate": 1e-05, "loss": 1.0217, "step": 67725 }, { "epoch": 59.991142604074405, "grad_norm": 0.24070978164672852, "learning_rate": 1e-05, "loss": 0.9867, "step": 67730 }, { "epoch": 59.9955713020372, "grad_norm": 0.24640832841396332, "learning_rate": 1e-05, "loss": 0.9516, "step": 67735 }, { "epoch": 60.0, "grad_norm": 0.25133851170539856, "learning_rate": 1e-05, "loss": 0.991, "step": 67740 }, { "epoch": 60.0044286979628, "grad_norm": 0.22156170010566711, "learning_rate": 1e-05, "loss": 0.9679, "step": 67745 }, { "epoch": 60.008857395925595, "grad_norm": 0.2612433433532715, "learning_rate": 1e-05, "loss": 0.9405, "step": 67750 }, { "epoch": 60.0132860938884, "grad_norm": 0.22379903495311737, "learning_rate": 1e-05, "loss": 1.022, "step": 67755 }, { "epoch": 60.0177147918512, "grad_norm": 0.2242935597896576, "learning_rate": 1e-05, "loss": 0.9444, "step": 67760 }, { "epoch": 60.02214348981399, "grad_norm": 0.23966464400291443, "learning_rate": 1e-05, "loss": 0.9395, "step": 67765 }, { "epoch": 60.02657218777679, "grad_norm": 0.21759504079818726, "learning_rate": 1e-05, "loss": 0.9361, "step": 67770 }, { "epoch": 60.031000885739594, "grad_norm": 0.20940038561820984, "learning_rate": 1e-05, "loss": 0.9785, "step": 67775 }, { "epoch": 60.03542958370239, "grad_norm": 0.24632173776626587, "learning_rate": 1e-05, "loss": 0.9532, "step": 67780 }, { "epoch": 60.03985828166519, "grad_norm": 0.26936087012290955, "learning_rate": 1e-05, "loss": 0.9482, "step": 67785 }, { "epoch": 60.04428697962799, "grad_norm": 0.23884139955043793, "learning_rate": 1e-05, "loss": 0.9928, "step": 67790 }, { "epoch": 60.048715677590785, "grad_norm": 0.2195722460746765, "learning_rate": 1e-05, "loss": 0.9748, "step": 67795 }, { "epoch": 60.053144375553586, "grad_norm": 0.23998527228832245, "learning_rate": 1e-05, "loss": 0.9443, "step": 67800 }, { "epoch": 60.05757307351639, "grad_norm": 0.24039466679096222, "learning_rate": 1e-05, "loss": 0.9479, "step": 67805 }, { "epoch": 60.06200177147919, "grad_norm": 0.23541930317878723, "learning_rate": 1e-05, "loss": 0.9927, "step": 67810 }, { "epoch": 60.06643046944198, "grad_norm": 0.23188871145248413, "learning_rate": 1e-05, "loss": 0.9891, "step": 67815 }, { "epoch": 60.070859167404784, "grad_norm": 0.2790294289588928, "learning_rate": 1e-05, "loss": 0.9593, "step": 67820 }, { "epoch": 60.075287865367585, "grad_norm": 0.21344634890556335, "learning_rate": 1e-05, "loss": 1.0105, "step": 67825 }, { "epoch": 60.07971656333038, "grad_norm": 0.21285206079483032, "learning_rate": 1e-05, "loss": 0.9331, "step": 67830 }, { "epoch": 60.08414526129318, "grad_norm": 0.2517503798007965, "learning_rate": 1e-05, "loss": 0.9747, "step": 67835 }, { "epoch": 60.08857395925598, "grad_norm": 0.2339160442352295, "learning_rate": 1e-05, "loss": 1.0071, "step": 67840 }, { "epoch": 60.093002657218776, "grad_norm": 0.31511059403419495, "learning_rate": 1e-05, "loss": 0.9885, "step": 67845 }, { "epoch": 60.09743135518158, "grad_norm": 0.24189969897270203, "learning_rate": 1e-05, "loss": 1.0008, "step": 67850 }, { "epoch": 60.10186005314438, "grad_norm": 0.27861279249191284, "learning_rate": 1e-05, "loss": 0.9087, "step": 67855 }, { "epoch": 60.10628875110717, "grad_norm": 0.22302816808223724, "learning_rate": 1e-05, "loss": 0.9881, "step": 67860 }, { "epoch": 60.11071744906997, "grad_norm": 0.23074880242347717, "learning_rate": 1e-05, "loss": 0.9627, "step": 67865 }, { "epoch": 60.115146147032775, "grad_norm": 0.24772585928440094, "learning_rate": 1e-05, "loss": 0.9675, "step": 67870 }, { "epoch": 60.11957484499557, "grad_norm": 0.23156459629535675, "learning_rate": 1e-05, "loss": 0.9749, "step": 67875 }, { "epoch": 60.12400354295837, "grad_norm": 0.3078056871891022, "learning_rate": 1e-05, "loss": 0.9675, "step": 67880 }, { "epoch": 60.12843224092117, "grad_norm": 0.21533367037773132, "learning_rate": 1e-05, "loss": 0.9539, "step": 67885 }, { "epoch": 60.132860938883965, "grad_norm": 0.2413732260465622, "learning_rate": 1e-05, "loss": 0.972, "step": 67890 }, { "epoch": 60.137289636846766, "grad_norm": 0.2541654109954834, "learning_rate": 1e-05, "loss": 0.9484, "step": 67895 }, { "epoch": 60.14171833480957, "grad_norm": 0.30525773763656616, "learning_rate": 1e-05, "loss": 0.9198, "step": 67900 }, { "epoch": 60.14614703277236, "grad_norm": 0.276557981967926, "learning_rate": 1e-05, "loss": 0.9488, "step": 67905 }, { "epoch": 60.15057573073516, "grad_norm": 0.2996844947338104, "learning_rate": 1e-05, "loss": 0.9627, "step": 67910 }, { "epoch": 60.155004428697964, "grad_norm": 0.2943823039531708, "learning_rate": 1e-05, "loss": 0.9316, "step": 67915 }, { "epoch": 60.15943312666076, "grad_norm": 0.2414475530385971, "learning_rate": 1e-05, "loss": 0.9496, "step": 67920 }, { "epoch": 60.16386182462356, "grad_norm": 0.26345542073249817, "learning_rate": 1e-05, "loss": 1.0073, "step": 67925 }, { "epoch": 60.16829052258636, "grad_norm": 0.2540135681629181, "learning_rate": 1e-05, "loss": 0.9692, "step": 67930 }, { "epoch": 60.17271922054916, "grad_norm": 0.2111683338880539, "learning_rate": 1e-05, "loss": 0.9623, "step": 67935 }, { "epoch": 60.177147918511956, "grad_norm": 0.25722554326057434, "learning_rate": 1e-05, "loss": 0.9509, "step": 67940 }, { "epoch": 60.18157661647476, "grad_norm": 0.27004504203796387, "learning_rate": 1e-05, "loss": 0.9606, "step": 67945 }, { "epoch": 60.18600531443756, "grad_norm": 0.21575520932674408, "learning_rate": 1e-05, "loss": 1.0023, "step": 67950 }, { "epoch": 60.19043401240035, "grad_norm": 0.26626771688461304, "learning_rate": 1e-05, "loss": 0.9446, "step": 67955 }, { "epoch": 60.194862710363154, "grad_norm": 0.22579853236675262, "learning_rate": 1e-05, "loss": 0.9028, "step": 67960 }, { "epoch": 60.199291408325955, "grad_norm": 0.22904513776302338, "learning_rate": 1e-05, "loss": 0.932, "step": 67965 }, { "epoch": 60.20372010628875, "grad_norm": 0.2567936182022095, "learning_rate": 1e-05, "loss": 0.9666, "step": 67970 }, { "epoch": 60.20814880425155, "grad_norm": 0.2563650608062744, "learning_rate": 1e-05, "loss": 0.9677, "step": 67975 }, { "epoch": 60.21257750221435, "grad_norm": 0.2461683303117752, "learning_rate": 1e-05, "loss": 0.9785, "step": 67980 }, { "epoch": 60.217006200177146, "grad_norm": 0.22083435952663422, "learning_rate": 1e-05, "loss": 0.9919, "step": 67985 }, { "epoch": 60.22143489813995, "grad_norm": 0.23430413007736206, "learning_rate": 1e-05, "loss": 0.9302, "step": 67990 }, { "epoch": 60.22586359610275, "grad_norm": 0.2729833126068115, "learning_rate": 1e-05, "loss": 0.9598, "step": 67995 }, { "epoch": 60.23029229406554, "grad_norm": 0.237448051571846, "learning_rate": 1e-05, "loss": 0.9724, "step": 68000 }, { "epoch": 60.23472099202834, "grad_norm": 0.2860821783542633, "learning_rate": 1e-05, "loss": 1.0156, "step": 68005 }, { "epoch": 60.239149689991144, "grad_norm": 0.22187405824661255, "learning_rate": 1e-05, "loss": 1.0146, "step": 68010 }, { "epoch": 60.24357838795394, "grad_norm": 0.21306779980659485, "learning_rate": 1e-05, "loss": 0.9778, "step": 68015 }, { "epoch": 60.24800708591674, "grad_norm": 0.22761847078800201, "learning_rate": 1e-05, "loss": 0.9647, "step": 68020 }, { "epoch": 60.25243578387954, "grad_norm": 0.2301272302865982, "learning_rate": 1e-05, "loss": 0.999, "step": 68025 }, { "epoch": 60.256864481842335, "grad_norm": 0.24498267471790314, "learning_rate": 1e-05, "loss": 0.9325, "step": 68030 }, { "epoch": 60.261293179805136, "grad_norm": 0.24395066499710083, "learning_rate": 1e-05, "loss": 0.9907, "step": 68035 }, { "epoch": 60.26572187776794, "grad_norm": 0.27467772364616394, "learning_rate": 1e-05, "loss": 0.9869, "step": 68040 }, { "epoch": 60.27015057573073, "grad_norm": 0.3105911612510681, "learning_rate": 1e-05, "loss": 0.9521, "step": 68045 }, { "epoch": 60.27457927369353, "grad_norm": 0.24254311621189117, "learning_rate": 1e-05, "loss": 0.9894, "step": 68050 }, { "epoch": 60.279007971656334, "grad_norm": 0.2539561688899994, "learning_rate": 1e-05, "loss": 0.9389, "step": 68055 }, { "epoch": 60.283436669619135, "grad_norm": 0.2549138069152832, "learning_rate": 1e-05, "loss": 0.9374, "step": 68060 }, { "epoch": 60.28786536758193, "grad_norm": 0.24409809708595276, "learning_rate": 1e-05, "loss": 1.0497, "step": 68065 }, { "epoch": 60.29229406554473, "grad_norm": 0.22572097182273865, "learning_rate": 1e-05, "loss": 1.0066, "step": 68070 }, { "epoch": 60.29672276350753, "grad_norm": 0.24944044649600983, "learning_rate": 1e-05, "loss": 1.0004, "step": 68075 }, { "epoch": 60.301151461470326, "grad_norm": 0.25153833627700806, "learning_rate": 1e-05, "loss": 0.9864, "step": 68080 }, { "epoch": 60.30558015943313, "grad_norm": 0.24118006229400635, "learning_rate": 1e-05, "loss": 0.9856, "step": 68085 }, { "epoch": 60.31000885739593, "grad_norm": 0.2610335946083069, "learning_rate": 1e-05, "loss": 1.0456, "step": 68090 }, { "epoch": 60.31443755535872, "grad_norm": 0.24294546246528625, "learning_rate": 1e-05, "loss": 1.0065, "step": 68095 }, { "epoch": 60.318866253321524, "grad_norm": 0.2512407898902893, "learning_rate": 1e-05, "loss": 0.9909, "step": 68100 }, { "epoch": 60.323294951284325, "grad_norm": 0.24164848029613495, "learning_rate": 1e-05, "loss": 0.92, "step": 68105 }, { "epoch": 60.32772364924712, "grad_norm": 0.2508971095085144, "learning_rate": 1e-05, "loss": 0.9548, "step": 68110 }, { "epoch": 60.33215234720992, "grad_norm": 0.21086767315864563, "learning_rate": 1e-05, "loss": 0.9344, "step": 68115 }, { "epoch": 60.33658104517272, "grad_norm": 0.2512386739253998, "learning_rate": 1e-05, "loss": 0.9375, "step": 68120 }, { "epoch": 60.341009743135515, "grad_norm": 0.20561937987804413, "learning_rate": 1e-05, "loss": 0.9613, "step": 68125 }, { "epoch": 60.34543844109832, "grad_norm": 0.23540417850017548, "learning_rate": 1e-05, "loss": 0.9384, "step": 68130 }, { "epoch": 60.34986713906112, "grad_norm": 0.2360813468694687, "learning_rate": 1e-05, "loss": 0.9648, "step": 68135 }, { "epoch": 60.35429583702391, "grad_norm": 0.230765238404274, "learning_rate": 1e-05, "loss": 0.9669, "step": 68140 }, { "epoch": 60.35872453498671, "grad_norm": 0.23727911710739136, "learning_rate": 1e-05, "loss": 0.9519, "step": 68145 }, { "epoch": 60.363153232949514, "grad_norm": 0.21590246260166168, "learning_rate": 1e-05, "loss": 0.916, "step": 68150 }, { "epoch": 60.36758193091231, "grad_norm": 0.2584565281867981, "learning_rate": 1e-05, "loss": 0.9852, "step": 68155 }, { "epoch": 60.37201062887511, "grad_norm": 0.24334752559661865, "learning_rate": 1e-05, "loss": 0.9626, "step": 68160 }, { "epoch": 60.37643932683791, "grad_norm": 0.2383350282907486, "learning_rate": 1e-05, "loss": 0.957, "step": 68165 }, { "epoch": 60.380868024800705, "grad_norm": 0.2528057098388672, "learning_rate": 1e-05, "loss": 0.9672, "step": 68170 }, { "epoch": 60.385296722763506, "grad_norm": 0.2439594715833664, "learning_rate": 1e-05, "loss": 0.9973, "step": 68175 }, { "epoch": 60.38972542072631, "grad_norm": 0.2513636648654938, "learning_rate": 1e-05, "loss": 0.9896, "step": 68180 }, { "epoch": 60.39415411868911, "grad_norm": 0.22704513370990753, "learning_rate": 1e-05, "loss": 0.9485, "step": 68185 }, { "epoch": 60.3985828166519, "grad_norm": 0.22684215009212494, "learning_rate": 1e-05, "loss": 0.9403, "step": 68190 }, { "epoch": 60.403011514614704, "grad_norm": 0.22785650193691254, "learning_rate": 1e-05, "loss": 1.0427, "step": 68195 }, { "epoch": 60.407440212577505, "grad_norm": 0.22285433113574982, "learning_rate": 1e-05, "loss": 0.9488, "step": 68200 }, { "epoch": 60.4118689105403, "grad_norm": 0.21892952919006348, "learning_rate": 1e-05, "loss": 0.9112, "step": 68205 }, { "epoch": 60.4162976085031, "grad_norm": 0.2665686011314392, "learning_rate": 1e-05, "loss": 0.946, "step": 68210 }, { "epoch": 60.4207263064659, "grad_norm": 0.22768713533878326, "learning_rate": 1e-05, "loss": 0.9411, "step": 68215 }, { "epoch": 60.425155004428696, "grad_norm": 0.24704597890377045, "learning_rate": 1e-05, "loss": 0.8956, "step": 68220 }, { "epoch": 60.4295837023915, "grad_norm": 0.26176685094833374, "learning_rate": 1e-05, "loss": 0.9851, "step": 68225 }, { "epoch": 60.4340124003543, "grad_norm": 0.23113496601581573, "learning_rate": 1e-05, "loss": 0.9591, "step": 68230 }, { "epoch": 60.43844109831709, "grad_norm": 0.22440707683563232, "learning_rate": 1e-05, "loss": 0.9495, "step": 68235 }, { "epoch": 60.44286979627989, "grad_norm": 0.23483550548553467, "learning_rate": 1e-05, "loss": 0.9753, "step": 68240 }, { "epoch": 60.447298494242695, "grad_norm": 0.24866607785224915, "learning_rate": 1e-05, "loss": 0.965, "step": 68245 }, { "epoch": 60.45172719220549, "grad_norm": 0.2702188789844513, "learning_rate": 1e-05, "loss": 0.9806, "step": 68250 }, { "epoch": 60.45615589016829, "grad_norm": 0.28178319334983826, "learning_rate": 1e-05, "loss": 0.9737, "step": 68255 }, { "epoch": 60.46058458813109, "grad_norm": 0.2174639254808426, "learning_rate": 1e-05, "loss": 0.9462, "step": 68260 }, { "epoch": 60.465013286093885, "grad_norm": 0.2783980071544647, "learning_rate": 1e-05, "loss": 1.0165, "step": 68265 }, { "epoch": 60.46944198405669, "grad_norm": 0.2589445412158966, "learning_rate": 1e-05, "loss": 0.9515, "step": 68270 }, { "epoch": 60.47387068201949, "grad_norm": 0.2268449068069458, "learning_rate": 1e-05, "loss": 0.929, "step": 68275 }, { "epoch": 60.47829937998228, "grad_norm": 0.2240566462278366, "learning_rate": 1e-05, "loss": 0.9515, "step": 68280 }, { "epoch": 60.48272807794508, "grad_norm": 0.207584947347641, "learning_rate": 1e-05, "loss": 0.9585, "step": 68285 }, { "epoch": 60.487156775907884, "grad_norm": 0.23902861773967743, "learning_rate": 1e-05, "loss": 0.9526, "step": 68290 }, { "epoch": 60.491585473870686, "grad_norm": 0.23124824464321136, "learning_rate": 1e-05, "loss": 0.969, "step": 68295 }, { "epoch": 60.49601417183348, "grad_norm": 0.21634815633296967, "learning_rate": 1e-05, "loss": 1.0253, "step": 68300 }, { "epoch": 60.50044286979628, "grad_norm": 0.27311766147613525, "learning_rate": 1e-05, "loss": 1.0293, "step": 68305 }, { "epoch": 60.50487156775908, "grad_norm": 0.23520460724830627, "learning_rate": 1e-05, "loss": 1.0198, "step": 68310 }, { "epoch": 60.509300265721876, "grad_norm": 0.23590585589408875, "learning_rate": 1e-05, "loss": 1.0059, "step": 68315 }, { "epoch": 60.51372896368468, "grad_norm": 0.25773492455482483, "learning_rate": 1e-05, "loss": 0.9644, "step": 68320 }, { "epoch": 60.51815766164748, "grad_norm": 0.2539404034614563, "learning_rate": 1e-05, "loss": 0.9257, "step": 68325 }, { "epoch": 60.52258635961027, "grad_norm": 0.24535350501537323, "learning_rate": 1e-05, "loss": 0.9525, "step": 68330 }, { "epoch": 60.527015057573074, "grad_norm": 0.22788722813129425, "learning_rate": 1e-05, "loss": 0.9547, "step": 68335 }, { "epoch": 60.531443755535875, "grad_norm": 0.227084681391716, "learning_rate": 1e-05, "loss": 0.9964, "step": 68340 }, { "epoch": 60.53587245349867, "grad_norm": 0.21022918820381165, "learning_rate": 1e-05, "loss": 1.007, "step": 68345 }, { "epoch": 60.54030115146147, "grad_norm": 0.27771732211112976, "learning_rate": 1e-05, "loss": 0.9587, "step": 68350 }, { "epoch": 60.54472984942427, "grad_norm": 0.2801872789859772, "learning_rate": 1e-05, "loss": 0.9607, "step": 68355 }, { "epoch": 60.549158547387066, "grad_norm": 0.27351677417755127, "learning_rate": 1e-05, "loss": 1.0152, "step": 68360 }, { "epoch": 60.55358724534987, "grad_norm": 0.2551421821117401, "learning_rate": 1e-05, "loss": 0.9968, "step": 68365 }, { "epoch": 60.55801594331267, "grad_norm": 0.26367899775505066, "learning_rate": 1e-05, "loss": 0.9671, "step": 68370 }, { "epoch": 60.56244464127546, "grad_norm": 0.2702999711036682, "learning_rate": 1e-05, "loss": 1.0168, "step": 68375 }, { "epoch": 60.56687333923826, "grad_norm": 0.21274510025978088, "learning_rate": 1e-05, "loss": 1.0235, "step": 68380 }, { "epoch": 60.571302037201065, "grad_norm": 0.23331323266029358, "learning_rate": 1e-05, "loss": 1.0011, "step": 68385 }, { "epoch": 60.57573073516386, "grad_norm": 0.257802277803421, "learning_rate": 1e-05, "loss": 0.9432, "step": 68390 }, { "epoch": 60.58015943312666, "grad_norm": 0.22249586880207062, "learning_rate": 1e-05, "loss": 0.9586, "step": 68395 }, { "epoch": 60.58458813108946, "grad_norm": 0.24841439723968506, "learning_rate": 1e-05, "loss": 0.9732, "step": 68400 }, { "epoch": 60.589016829052255, "grad_norm": 0.2400336116552353, "learning_rate": 1e-05, "loss": 1.0162, "step": 68405 }, { "epoch": 60.59344552701506, "grad_norm": 0.22133579850196838, "learning_rate": 1e-05, "loss": 0.9622, "step": 68410 }, { "epoch": 60.59787422497786, "grad_norm": 0.2465466558933258, "learning_rate": 1e-05, "loss": 0.9752, "step": 68415 }, { "epoch": 60.60230292294066, "grad_norm": 0.25301554799079895, "learning_rate": 1e-05, "loss": 0.937, "step": 68420 }, { "epoch": 60.60673162090345, "grad_norm": 0.26860591769218445, "learning_rate": 1e-05, "loss": 0.9186, "step": 68425 }, { "epoch": 60.611160318866254, "grad_norm": 0.20792792737483978, "learning_rate": 1e-05, "loss": 0.9858, "step": 68430 }, { "epoch": 60.615589016829055, "grad_norm": 0.22394777834415436, "learning_rate": 1e-05, "loss": 0.9882, "step": 68435 }, { "epoch": 60.62001771479185, "grad_norm": 0.24083735048770905, "learning_rate": 1e-05, "loss": 0.9894, "step": 68440 }, { "epoch": 60.62444641275465, "grad_norm": 0.2446780651807785, "learning_rate": 1e-05, "loss": 0.9705, "step": 68445 }, { "epoch": 60.62887511071745, "grad_norm": 0.22034183144569397, "learning_rate": 1e-05, "loss": 1.0409, "step": 68450 }, { "epoch": 60.633303808680246, "grad_norm": 0.24650482833385468, "learning_rate": 1e-05, "loss": 0.9712, "step": 68455 }, { "epoch": 60.63773250664305, "grad_norm": 0.23790277540683746, "learning_rate": 1e-05, "loss": 0.9568, "step": 68460 }, { "epoch": 60.64216120460585, "grad_norm": 0.32248207926750183, "learning_rate": 1e-05, "loss": 0.9864, "step": 68465 }, { "epoch": 60.64658990256864, "grad_norm": 0.22248025238513947, "learning_rate": 1e-05, "loss": 0.9273, "step": 68470 }, { "epoch": 60.651018600531444, "grad_norm": 0.23740394413471222, "learning_rate": 1e-05, "loss": 0.9594, "step": 68475 }, { "epoch": 60.655447298494245, "grad_norm": 0.250934362411499, "learning_rate": 1e-05, "loss": 0.9676, "step": 68480 }, { "epoch": 60.65987599645704, "grad_norm": 0.24006050825119019, "learning_rate": 1e-05, "loss": 0.9397, "step": 68485 }, { "epoch": 60.66430469441984, "grad_norm": 0.23684562742710114, "learning_rate": 1e-05, "loss": 0.9033, "step": 68490 }, { "epoch": 60.66873339238264, "grad_norm": 0.2845231294631958, "learning_rate": 1e-05, "loss": 0.9626, "step": 68495 }, { "epoch": 60.673162090345436, "grad_norm": 0.25366225838661194, "learning_rate": 1e-05, "loss": 0.9909, "step": 68500 }, { "epoch": 60.67759078830824, "grad_norm": 0.2804090678691864, "learning_rate": 1e-05, "loss": 0.9785, "step": 68505 }, { "epoch": 60.68201948627104, "grad_norm": 0.21341632306575775, "learning_rate": 1e-05, "loss": 0.9463, "step": 68510 }, { "epoch": 60.68644818423383, "grad_norm": 0.23643432557582855, "learning_rate": 1e-05, "loss": 0.9949, "step": 68515 }, { "epoch": 60.69087688219663, "grad_norm": 0.30427131056785583, "learning_rate": 1e-05, "loss": 0.9963, "step": 68520 }, { "epoch": 60.695305580159435, "grad_norm": 0.2572616934776306, "learning_rate": 1e-05, "loss": 0.988, "step": 68525 }, { "epoch": 60.69973427812223, "grad_norm": 0.2615016996860504, "learning_rate": 1e-05, "loss": 0.9487, "step": 68530 }, { "epoch": 60.70416297608503, "grad_norm": 0.2761181592941284, "learning_rate": 1e-05, "loss": 0.9936, "step": 68535 }, { "epoch": 60.70859167404783, "grad_norm": 0.21619199216365814, "learning_rate": 1e-05, "loss": 1.0539, "step": 68540 }, { "epoch": 60.71302037201063, "grad_norm": 0.2630160450935364, "learning_rate": 1e-05, "loss": 0.9936, "step": 68545 }, { "epoch": 60.717449069973426, "grad_norm": 0.21130770444869995, "learning_rate": 1e-05, "loss": 0.9996, "step": 68550 }, { "epoch": 60.72187776793623, "grad_norm": 0.22268280386924744, "learning_rate": 1e-05, "loss": 0.9837, "step": 68555 }, { "epoch": 60.72630646589903, "grad_norm": 0.32420459389686584, "learning_rate": 1e-05, "loss": 1.0032, "step": 68560 }, { "epoch": 60.73073516386182, "grad_norm": 0.24895937740802765, "learning_rate": 1e-05, "loss": 0.9588, "step": 68565 }, { "epoch": 60.735163861824624, "grad_norm": 0.2566019594669342, "learning_rate": 1e-05, "loss": 0.9228, "step": 68570 }, { "epoch": 60.739592559787425, "grad_norm": 0.22634539008140564, "learning_rate": 1e-05, "loss": 1.0061, "step": 68575 }, { "epoch": 60.74402125775022, "grad_norm": 0.26682814955711365, "learning_rate": 1e-05, "loss": 1.0013, "step": 68580 }, { "epoch": 60.74844995571302, "grad_norm": 0.275595098733902, "learning_rate": 1e-05, "loss": 0.9928, "step": 68585 }, { "epoch": 60.75287865367582, "grad_norm": 0.23859696090221405, "learning_rate": 1e-05, "loss": 1.0227, "step": 68590 }, { "epoch": 60.757307351638616, "grad_norm": 0.2459527850151062, "learning_rate": 1e-05, "loss": 1.0083, "step": 68595 }, { "epoch": 60.76173604960142, "grad_norm": 0.24977359175682068, "learning_rate": 1e-05, "loss": 0.9694, "step": 68600 }, { "epoch": 60.76616474756422, "grad_norm": 0.2629532217979431, "learning_rate": 1e-05, "loss": 0.9768, "step": 68605 }, { "epoch": 60.77059344552701, "grad_norm": 0.28251326084136963, "learning_rate": 1e-05, "loss": 0.9251, "step": 68610 }, { "epoch": 60.775022143489814, "grad_norm": 0.26810160279273987, "learning_rate": 1e-05, "loss": 0.9301, "step": 68615 }, { "epoch": 60.779450841452615, "grad_norm": 0.28561556339263916, "learning_rate": 1e-05, "loss": 0.9182, "step": 68620 }, { "epoch": 60.78387953941541, "grad_norm": 0.2667543590068817, "learning_rate": 1e-05, "loss": 0.9677, "step": 68625 }, { "epoch": 60.78830823737821, "grad_norm": 0.22049254179000854, "learning_rate": 1e-05, "loss": 0.9468, "step": 68630 }, { "epoch": 60.79273693534101, "grad_norm": 0.23839691281318665, "learning_rate": 1e-05, "loss": 0.9403, "step": 68635 }, { "epoch": 60.797165633303806, "grad_norm": 0.2234589159488678, "learning_rate": 1e-05, "loss": 0.9617, "step": 68640 }, { "epoch": 60.80159433126661, "grad_norm": 0.21641317009925842, "learning_rate": 1e-05, "loss": 0.9475, "step": 68645 }, { "epoch": 60.80602302922941, "grad_norm": 0.21696758270263672, "learning_rate": 1e-05, "loss": 0.9275, "step": 68650 }, { "epoch": 60.8104517271922, "grad_norm": 0.2315734177827835, "learning_rate": 1e-05, "loss": 0.9571, "step": 68655 }, { "epoch": 60.814880425155, "grad_norm": 0.2406282126903534, "learning_rate": 1e-05, "loss": 0.9411, "step": 68660 }, { "epoch": 60.819309123117804, "grad_norm": 0.22853884100914001, "learning_rate": 1e-05, "loss": 0.9741, "step": 68665 }, { "epoch": 60.823737821080606, "grad_norm": 0.2348943054676056, "learning_rate": 1e-05, "loss": 0.9682, "step": 68670 }, { "epoch": 60.8281665190434, "grad_norm": 0.25485584139823914, "learning_rate": 1e-05, "loss": 0.9601, "step": 68675 }, { "epoch": 60.8325952170062, "grad_norm": 0.2539708614349365, "learning_rate": 1e-05, "loss": 1.0135, "step": 68680 }, { "epoch": 60.837023914969, "grad_norm": 0.26831015944480896, "learning_rate": 1e-05, "loss": 0.9696, "step": 68685 }, { "epoch": 60.841452612931796, "grad_norm": 0.23185327649116516, "learning_rate": 1e-05, "loss": 0.9646, "step": 68690 }, { "epoch": 60.8458813108946, "grad_norm": 0.22609320282936096, "learning_rate": 1e-05, "loss": 1.0083, "step": 68695 }, { "epoch": 60.8503100088574, "grad_norm": 0.21681274473667145, "learning_rate": 1e-05, "loss": 1.0177, "step": 68700 }, { "epoch": 60.85473870682019, "grad_norm": 0.2144000083208084, "learning_rate": 1e-05, "loss": 0.9506, "step": 68705 }, { "epoch": 60.859167404782994, "grad_norm": 0.24931775033473969, "learning_rate": 1e-05, "loss": 1.0157, "step": 68710 }, { "epoch": 60.863596102745795, "grad_norm": 0.28115612268447876, "learning_rate": 1e-05, "loss": 0.9685, "step": 68715 }, { "epoch": 60.86802480070859, "grad_norm": 0.22621887922286987, "learning_rate": 1e-05, "loss": 0.9734, "step": 68720 }, { "epoch": 60.87245349867139, "grad_norm": 0.22621527314186096, "learning_rate": 1e-05, "loss": 0.9835, "step": 68725 }, { "epoch": 60.87688219663419, "grad_norm": 0.26475536823272705, "learning_rate": 1e-05, "loss": 0.9688, "step": 68730 }, { "epoch": 60.881310894596986, "grad_norm": 0.25169700384140015, "learning_rate": 1e-05, "loss": 0.9566, "step": 68735 }, { "epoch": 60.88573959255979, "grad_norm": 0.22355714440345764, "learning_rate": 1e-05, "loss": 1.0279, "step": 68740 }, { "epoch": 60.89016829052259, "grad_norm": 0.23615559935569763, "learning_rate": 1e-05, "loss": 0.9489, "step": 68745 }, { "epoch": 60.89459698848538, "grad_norm": 0.23883506655693054, "learning_rate": 1e-05, "loss": 0.9496, "step": 68750 }, { "epoch": 60.899025686448184, "grad_norm": 0.23506960272789001, "learning_rate": 1e-05, "loss": 0.9455, "step": 68755 }, { "epoch": 60.903454384410985, "grad_norm": 0.27814409136772156, "learning_rate": 1e-05, "loss": 0.9483, "step": 68760 }, { "epoch": 60.90788308237378, "grad_norm": 0.2074914276599884, "learning_rate": 1e-05, "loss": 0.9781, "step": 68765 }, { "epoch": 60.91231178033658, "grad_norm": 0.2358940690755844, "learning_rate": 1e-05, "loss": 0.9515, "step": 68770 }, { "epoch": 60.91674047829938, "grad_norm": 0.21459928154945374, "learning_rate": 1e-05, "loss": 0.9734, "step": 68775 }, { "epoch": 60.921169176262175, "grad_norm": 0.2349170446395874, "learning_rate": 1e-05, "loss": 0.9569, "step": 68780 }, { "epoch": 60.92559787422498, "grad_norm": 0.22285522520542145, "learning_rate": 1e-05, "loss": 1.0223, "step": 68785 }, { "epoch": 60.93002657218778, "grad_norm": 0.23253588378429413, "learning_rate": 1e-05, "loss": 0.9576, "step": 68790 }, { "epoch": 60.93445527015058, "grad_norm": 0.22562578320503235, "learning_rate": 1e-05, "loss": 0.9644, "step": 68795 }, { "epoch": 60.93888396811337, "grad_norm": 0.24191094934940338, "learning_rate": 1e-05, "loss": 1.005, "step": 68800 }, { "epoch": 60.943312666076174, "grad_norm": 0.22991205751895905, "learning_rate": 1e-05, "loss": 0.9777, "step": 68805 }, { "epoch": 60.947741364038976, "grad_norm": 0.22426697611808777, "learning_rate": 1e-05, "loss": 0.9477, "step": 68810 }, { "epoch": 60.95217006200177, "grad_norm": 0.25657710433006287, "learning_rate": 1e-05, "loss": 0.963, "step": 68815 }, { "epoch": 60.95659875996457, "grad_norm": 0.23975779116153717, "learning_rate": 1e-05, "loss": 1.0059, "step": 68820 }, { "epoch": 60.96102745792737, "grad_norm": 0.22793243825435638, "learning_rate": 1e-05, "loss": 0.9656, "step": 68825 }, { "epoch": 60.965456155890166, "grad_norm": 0.24359433352947235, "learning_rate": 1e-05, "loss": 0.9412, "step": 68830 }, { "epoch": 60.96988485385297, "grad_norm": 0.24737828969955444, "learning_rate": 1e-05, "loss": 0.9636, "step": 68835 }, { "epoch": 60.97431355181577, "grad_norm": 0.2817375659942627, "learning_rate": 1e-05, "loss": 0.959, "step": 68840 }, { "epoch": 60.97874224977856, "grad_norm": 0.24856781959533691, "learning_rate": 1e-05, "loss": 0.9658, "step": 68845 }, { "epoch": 60.983170947741364, "grad_norm": 0.253393292427063, "learning_rate": 1e-05, "loss": 0.9881, "step": 68850 }, { "epoch": 60.987599645704165, "grad_norm": 0.2796756327152252, "learning_rate": 1e-05, "loss": 0.931, "step": 68855 }, { "epoch": 60.99202834366696, "grad_norm": 0.2869615852832794, "learning_rate": 1e-05, "loss": 1.0176, "step": 68860 }, { "epoch": 60.99645704162976, "grad_norm": 0.24789300560951233, "learning_rate": 1e-05, "loss": 0.9578, "step": 68865 }, { "epoch": 61.00088573959256, "grad_norm": 0.2543843388557434, "learning_rate": 1e-05, "loss": 1.015, "step": 68870 }, { "epoch": 61.005314437555356, "grad_norm": 0.24900545179843903, "learning_rate": 1e-05, "loss": 0.91, "step": 68875 }, { "epoch": 61.00974313551816, "grad_norm": 0.2447466254234314, "learning_rate": 1e-05, "loss": 0.9721, "step": 68880 }, { "epoch": 61.01417183348096, "grad_norm": 0.21506695449352264, "learning_rate": 1e-05, "loss": 0.9456, "step": 68885 }, { "epoch": 61.01860053144375, "grad_norm": 0.22232094407081604, "learning_rate": 1e-05, "loss": 0.8969, "step": 68890 }, { "epoch": 61.02302922940655, "grad_norm": 0.2680467367172241, "learning_rate": 1e-05, "loss": 0.9456, "step": 68895 }, { "epoch": 61.027457927369355, "grad_norm": 0.2284451574087143, "learning_rate": 1e-05, "loss": 0.9724, "step": 68900 }, { "epoch": 61.03188662533215, "grad_norm": 0.26245033740997314, "learning_rate": 1e-05, "loss": 0.945, "step": 68905 }, { "epoch": 61.03631532329495, "grad_norm": 0.2640277147293091, "learning_rate": 1e-05, "loss": 0.9396, "step": 68910 }, { "epoch": 61.04074402125775, "grad_norm": 0.18897074460983276, "learning_rate": 1e-05, "loss": 1.0063, "step": 68915 }, { "epoch": 61.04517271922055, "grad_norm": 0.2382601499557495, "learning_rate": 1e-05, "loss": 1.0108, "step": 68920 }, { "epoch": 61.04960141718335, "grad_norm": 0.24305549263954163, "learning_rate": 1e-05, "loss": 0.9549, "step": 68925 }, { "epoch": 61.05403011514615, "grad_norm": 0.2528451085090637, "learning_rate": 1e-05, "loss": 0.9468, "step": 68930 }, { "epoch": 61.05845881310895, "grad_norm": 0.22123828530311584, "learning_rate": 1e-05, "loss": 0.9544, "step": 68935 }, { "epoch": 61.06288751107174, "grad_norm": 0.20078840851783752, "learning_rate": 1e-05, "loss": 0.9472, "step": 68940 }, { "epoch": 61.067316209034544, "grad_norm": 0.27237752079963684, "learning_rate": 1e-05, "loss": 0.9465, "step": 68945 }, { "epoch": 61.071744906997345, "grad_norm": 0.24376441538333893, "learning_rate": 1e-05, "loss": 0.9733, "step": 68950 }, { "epoch": 61.07617360496014, "grad_norm": 0.256112664937973, "learning_rate": 1e-05, "loss": 0.9346, "step": 68955 }, { "epoch": 61.08060230292294, "grad_norm": 0.21611861884593964, "learning_rate": 1e-05, "loss": 0.9226, "step": 68960 }, { "epoch": 61.08503100088574, "grad_norm": 0.26536816358566284, "learning_rate": 1e-05, "loss": 0.9573, "step": 68965 }, { "epoch": 61.089459698848536, "grad_norm": 0.27056974172592163, "learning_rate": 1e-05, "loss": 0.9733, "step": 68970 }, { "epoch": 61.09388839681134, "grad_norm": 0.24422673881053925, "learning_rate": 1e-05, "loss": 0.9714, "step": 68975 }, { "epoch": 61.09831709477414, "grad_norm": 0.23578929901123047, "learning_rate": 1e-05, "loss": 0.9881, "step": 68980 }, { "epoch": 61.10274579273693, "grad_norm": 0.22743748128414154, "learning_rate": 1e-05, "loss": 0.9478, "step": 68985 }, { "epoch": 61.107174490699734, "grad_norm": 0.23487642407417297, "learning_rate": 1e-05, "loss": 0.9241, "step": 68990 }, { "epoch": 61.111603188662535, "grad_norm": 0.21708784997463226, "learning_rate": 1e-05, "loss": 0.9648, "step": 68995 }, { "epoch": 61.11603188662533, "grad_norm": 0.24450981616973877, "learning_rate": 1e-05, "loss": 0.9972, "step": 69000 }, { "epoch": 61.12046058458813, "grad_norm": 0.21709676086902618, "learning_rate": 1e-05, "loss": 0.9364, "step": 69005 }, { "epoch": 61.12488928255093, "grad_norm": 0.2173592448234558, "learning_rate": 1e-05, "loss": 0.9833, "step": 69010 }, { "epoch": 61.129317980513726, "grad_norm": 0.2793889045715332, "learning_rate": 1e-05, "loss": 0.9887, "step": 69015 }, { "epoch": 61.13374667847653, "grad_norm": 0.22945021092891693, "learning_rate": 1e-05, "loss": 0.9434, "step": 69020 }, { "epoch": 61.13817537643933, "grad_norm": 0.21406635642051697, "learning_rate": 1e-05, "loss": 1.0035, "step": 69025 }, { "epoch": 61.14260407440213, "grad_norm": 0.21664361655712128, "learning_rate": 1e-05, "loss": 0.9372, "step": 69030 }, { "epoch": 61.14703277236492, "grad_norm": 0.2671864926815033, "learning_rate": 1e-05, "loss": 0.9462, "step": 69035 }, { "epoch": 61.151461470327725, "grad_norm": 0.2216176688671112, "learning_rate": 1e-05, "loss": 0.9455, "step": 69040 }, { "epoch": 61.155890168290526, "grad_norm": 0.2204846739768982, "learning_rate": 1e-05, "loss": 0.9809, "step": 69045 }, { "epoch": 61.16031886625332, "grad_norm": 0.2371831089258194, "learning_rate": 1e-05, "loss": 0.9219, "step": 69050 }, { "epoch": 61.16474756421612, "grad_norm": 0.22620365023612976, "learning_rate": 1e-05, "loss": 1.0386, "step": 69055 }, { "epoch": 61.16917626217892, "grad_norm": 0.22587302327156067, "learning_rate": 1e-05, "loss": 0.9667, "step": 69060 }, { "epoch": 61.173604960141716, "grad_norm": 0.2310447245836258, "learning_rate": 1e-05, "loss": 0.9284, "step": 69065 }, { "epoch": 61.17803365810452, "grad_norm": 0.26117467880249023, "learning_rate": 1e-05, "loss": 0.9797, "step": 69070 }, { "epoch": 61.18246235606732, "grad_norm": 0.28922730684280396, "learning_rate": 1e-05, "loss": 0.943, "step": 69075 }, { "epoch": 61.18689105403011, "grad_norm": 0.267564594745636, "learning_rate": 1e-05, "loss": 1.0065, "step": 69080 }, { "epoch": 61.191319751992914, "grad_norm": 0.24295002222061157, "learning_rate": 1e-05, "loss": 0.9472, "step": 69085 }, { "epoch": 61.195748449955715, "grad_norm": 0.24249619245529175, "learning_rate": 1e-05, "loss": 0.9218, "step": 69090 }, { "epoch": 61.20017714791851, "grad_norm": 0.29141199588775635, "learning_rate": 1e-05, "loss": 0.989, "step": 69095 }, { "epoch": 61.20460584588131, "grad_norm": 0.2403004914522171, "learning_rate": 1e-05, "loss": 0.9949, "step": 69100 }, { "epoch": 61.20903454384411, "grad_norm": 0.24814929068088531, "learning_rate": 1e-05, "loss": 0.9364, "step": 69105 }, { "epoch": 61.213463241806906, "grad_norm": 0.21212254464626312, "learning_rate": 1e-05, "loss": 0.9695, "step": 69110 }, { "epoch": 61.21789193976971, "grad_norm": 0.23725003004074097, "learning_rate": 1e-05, "loss": 0.9535, "step": 69115 }, { "epoch": 61.22232063773251, "grad_norm": 0.2563864290714264, "learning_rate": 1e-05, "loss": 0.9702, "step": 69120 }, { "epoch": 61.2267493356953, "grad_norm": 0.2553084194660187, "learning_rate": 1e-05, "loss": 0.9858, "step": 69125 }, { "epoch": 61.231178033658104, "grad_norm": 0.23428797721862793, "learning_rate": 1e-05, "loss": 1.0243, "step": 69130 }, { "epoch": 61.235606731620905, "grad_norm": 0.23866522312164307, "learning_rate": 1e-05, "loss": 1.0095, "step": 69135 }, { "epoch": 61.2400354295837, "grad_norm": 0.23985204100608826, "learning_rate": 1e-05, "loss": 0.9521, "step": 69140 }, { "epoch": 61.2444641275465, "grad_norm": 0.25277647376060486, "learning_rate": 1e-05, "loss": 0.9872, "step": 69145 }, { "epoch": 61.2488928255093, "grad_norm": 0.2376217246055603, "learning_rate": 1e-05, "loss": 0.9142, "step": 69150 }, { "epoch": 61.2533215234721, "grad_norm": 0.24289919435977936, "learning_rate": 1e-05, "loss": 0.9153, "step": 69155 }, { "epoch": 61.2577502214349, "grad_norm": 0.22350744903087616, "learning_rate": 1e-05, "loss": 0.9527, "step": 69160 }, { "epoch": 61.2621789193977, "grad_norm": 0.23198938369750977, "learning_rate": 1e-05, "loss": 0.9596, "step": 69165 }, { "epoch": 61.2666076173605, "grad_norm": 0.23566776514053345, "learning_rate": 1e-05, "loss": 0.9942, "step": 69170 }, { "epoch": 61.27103631532329, "grad_norm": 0.2958187460899353, "learning_rate": 1e-05, "loss": 0.9711, "step": 69175 }, { "epoch": 61.275465013286095, "grad_norm": 0.22491714358329773, "learning_rate": 1e-05, "loss": 0.9145, "step": 69180 }, { "epoch": 61.279893711248896, "grad_norm": 0.24291083216667175, "learning_rate": 1e-05, "loss": 0.9552, "step": 69185 }, { "epoch": 61.28432240921169, "grad_norm": 0.22924542427062988, "learning_rate": 1e-05, "loss": 0.9344, "step": 69190 }, { "epoch": 61.28875110717449, "grad_norm": 0.25897637009620667, "learning_rate": 1e-05, "loss": 0.963, "step": 69195 }, { "epoch": 61.29317980513729, "grad_norm": 0.25369951128959656, "learning_rate": 1e-05, "loss": 0.962, "step": 69200 }, { "epoch": 61.297608503100086, "grad_norm": 0.22002443671226501, "learning_rate": 1e-05, "loss": 0.9576, "step": 69205 }, { "epoch": 61.30203720106289, "grad_norm": 0.22557134926319122, "learning_rate": 1e-05, "loss": 0.9022, "step": 69210 }, { "epoch": 61.30646589902569, "grad_norm": 0.2160862535238266, "learning_rate": 1e-05, "loss": 0.9386, "step": 69215 }, { "epoch": 61.31089459698848, "grad_norm": 0.21808575093746185, "learning_rate": 1e-05, "loss": 0.9605, "step": 69220 }, { "epoch": 61.315323294951284, "grad_norm": 0.25484880805015564, "learning_rate": 1e-05, "loss": 0.9332, "step": 69225 }, { "epoch": 61.319751992914085, "grad_norm": 0.2583205997943878, "learning_rate": 1e-05, "loss": 0.8996, "step": 69230 }, { "epoch": 61.32418069087688, "grad_norm": 0.23051223158836365, "learning_rate": 1e-05, "loss": 0.9926, "step": 69235 }, { "epoch": 61.32860938883968, "grad_norm": 0.24472104012966156, "learning_rate": 1e-05, "loss": 1.0135, "step": 69240 }, { "epoch": 61.33303808680248, "grad_norm": 0.21966078877449036, "learning_rate": 1e-05, "loss": 0.985, "step": 69245 }, { "epoch": 61.337466784765276, "grad_norm": 0.24408718943595886, "learning_rate": 1e-05, "loss": 0.9667, "step": 69250 }, { "epoch": 61.34189548272808, "grad_norm": 0.24638210237026215, "learning_rate": 1e-05, "loss": 0.9572, "step": 69255 }, { "epoch": 61.34632418069088, "grad_norm": 0.22625134885311127, "learning_rate": 1e-05, "loss": 0.9642, "step": 69260 }, { "epoch": 61.35075287865367, "grad_norm": 0.264849454164505, "learning_rate": 1e-05, "loss": 1.0197, "step": 69265 }, { "epoch": 61.355181576616474, "grad_norm": 0.23788122832775116, "learning_rate": 1e-05, "loss": 0.9913, "step": 69270 }, { "epoch": 61.359610274579275, "grad_norm": 0.28192004561424255, "learning_rate": 1e-05, "loss": 0.9753, "step": 69275 }, { "epoch": 61.364038972542076, "grad_norm": 0.226658433675766, "learning_rate": 1e-05, "loss": 0.9119, "step": 69280 }, { "epoch": 61.36846767050487, "grad_norm": 0.22849087417125702, "learning_rate": 1e-05, "loss": 0.9338, "step": 69285 }, { "epoch": 61.37289636846767, "grad_norm": 0.2611321210861206, "learning_rate": 1e-05, "loss": 0.9465, "step": 69290 }, { "epoch": 61.37732506643047, "grad_norm": 0.2517118752002716, "learning_rate": 1e-05, "loss": 0.9206, "step": 69295 }, { "epoch": 61.38175376439327, "grad_norm": 0.2303445190191269, "learning_rate": 1e-05, "loss": 1.0028, "step": 69300 }, { "epoch": 61.38618246235607, "grad_norm": 0.23463520407676697, "learning_rate": 1e-05, "loss": 0.9564, "step": 69305 }, { "epoch": 61.39061116031887, "grad_norm": 0.23300568759441376, "learning_rate": 1e-05, "loss": 0.9544, "step": 69310 }, { "epoch": 61.39503985828166, "grad_norm": 0.24406839907169342, "learning_rate": 1e-05, "loss": 0.8969, "step": 69315 }, { "epoch": 61.399468556244464, "grad_norm": 0.23840808868408203, "learning_rate": 1e-05, "loss": 0.921, "step": 69320 }, { "epoch": 61.403897254207266, "grad_norm": 0.26248326897621155, "learning_rate": 1e-05, "loss": 0.9238, "step": 69325 }, { "epoch": 61.40832595217006, "grad_norm": 0.2521049380302429, "learning_rate": 1e-05, "loss": 1.005, "step": 69330 }, { "epoch": 61.41275465013286, "grad_norm": 0.24987314641475677, "learning_rate": 1e-05, "loss": 0.9227, "step": 69335 }, { "epoch": 61.41718334809566, "grad_norm": 0.2493465393781662, "learning_rate": 1e-05, "loss": 0.9591, "step": 69340 }, { "epoch": 61.421612046058456, "grad_norm": 0.26827895641326904, "learning_rate": 1e-05, "loss": 0.9572, "step": 69345 }, { "epoch": 61.42604074402126, "grad_norm": 0.22761604189872742, "learning_rate": 1e-05, "loss": 0.9355, "step": 69350 }, { "epoch": 61.43046944198406, "grad_norm": 0.2533360719680786, "learning_rate": 1e-05, "loss": 0.9576, "step": 69355 }, { "epoch": 61.43489813994685, "grad_norm": 0.2639541029930115, "learning_rate": 1e-05, "loss": 0.9361, "step": 69360 }, { "epoch": 61.439326837909654, "grad_norm": 0.24893233180046082, "learning_rate": 1e-05, "loss": 1.0025, "step": 69365 }, { "epoch": 61.443755535872455, "grad_norm": 0.2455807626247406, "learning_rate": 1e-05, "loss": 0.974, "step": 69370 }, { "epoch": 61.44818423383525, "grad_norm": 0.3321240544319153, "learning_rate": 1e-05, "loss": 0.9097, "step": 69375 }, { "epoch": 61.45261293179805, "grad_norm": 0.2909684181213379, "learning_rate": 1e-05, "loss": 0.9479, "step": 69380 }, { "epoch": 61.45704162976085, "grad_norm": 0.2340477854013443, "learning_rate": 1e-05, "loss": 0.9842, "step": 69385 }, { "epoch": 61.461470327723646, "grad_norm": 0.25586262345314026, "learning_rate": 1e-05, "loss": 0.9177, "step": 69390 }, { "epoch": 61.46589902568645, "grad_norm": 0.24378307163715363, "learning_rate": 1e-05, "loss": 0.9764, "step": 69395 }, { "epoch": 61.47032772364925, "grad_norm": 0.25312042236328125, "learning_rate": 1e-05, "loss": 0.9476, "step": 69400 }, { "epoch": 61.47475642161205, "grad_norm": 0.22455553710460663, "learning_rate": 1e-05, "loss": 0.9524, "step": 69405 }, { "epoch": 61.479185119574844, "grad_norm": 0.2228417992591858, "learning_rate": 1e-05, "loss": 0.9756, "step": 69410 }, { "epoch": 61.483613817537645, "grad_norm": 0.23340564966201782, "learning_rate": 1e-05, "loss": 0.9526, "step": 69415 }, { "epoch": 61.488042515500446, "grad_norm": 0.2407195121049881, "learning_rate": 1e-05, "loss": 0.9691, "step": 69420 }, { "epoch": 61.49247121346324, "grad_norm": 0.23649799823760986, "learning_rate": 1e-05, "loss": 0.9579, "step": 69425 }, { "epoch": 61.49689991142604, "grad_norm": 0.2822832763195038, "learning_rate": 1e-05, "loss": 0.9418, "step": 69430 }, { "epoch": 61.50132860938884, "grad_norm": 0.21479719877243042, "learning_rate": 1e-05, "loss": 0.9852, "step": 69435 }, { "epoch": 61.50575730735164, "grad_norm": 0.24879147112369537, "learning_rate": 1e-05, "loss": 0.9851, "step": 69440 }, { "epoch": 61.51018600531444, "grad_norm": 0.2494964450597763, "learning_rate": 1e-05, "loss": 0.956, "step": 69445 }, { "epoch": 61.51461470327724, "grad_norm": 0.26404666900634766, "learning_rate": 1e-05, "loss": 0.9579, "step": 69450 }, { "epoch": 61.51904340124003, "grad_norm": 0.2151639759540558, "learning_rate": 1e-05, "loss": 0.9833, "step": 69455 }, { "epoch": 61.523472099202834, "grad_norm": 0.21767747402191162, "learning_rate": 1e-05, "loss": 0.9741, "step": 69460 }, { "epoch": 61.527900797165636, "grad_norm": 0.26045215129852295, "learning_rate": 1e-05, "loss": 0.9606, "step": 69465 }, { "epoch": 61.53232949512843, "grad_norm": 0.23753869533538818, "learning_rate": 1e-05, "loss": 0.9204, "step": 69470 }, { "epoch": 61.53675819309123, "grad_norm": 0.21348710358142853, "learning_rate": 1e-05, "loss": 0.9573, "step": 69475 }, { "epoch": 61.54118689105403, "grad_norm": 0.22218547761440277, "learning_rate": 1e-05, "loss": 0.941, "step": 69480 }, { "epoch": 61.545615589016826, "grad_norm": 0.23249605298042297, "learning_rate": 1e-05, "loss": 0.9699, "step": 69485 }, { "epoch": 61.55004428697963, "grad_norm": 0.25759604573249817, "learning_rate": 1e-05, "loss": 0.9637, "step": 69490 }, { "epoch": 61.55447298494243, "grad_norm": 0.2337723970413208, "learning_rate": 1e-05, "loss": 1.0013, "step": 69495 }, { "epoch": 61.55890168290522, "grad_norm": 0.20633439719676971, "learning_rate": 1e-05, "loss": 0.9848, "step": 69500 }, { "epoch": 61.563330380868024, "grad_norm": 0.24570763111114502, "learning_rate": 1e-05, "loss": 1.0241, "step": 69505 }, { "epoch": 61.567759078830825, "grad_norm": 0.25317081809043884, "learning_rate": 1e-05, "loss": 0.9843, "step": 69510 }, { "epoch": 61.57218777679362, "grad_norm": 0.2093810886144638, "learning_rate": 1e-05, "loss": 1.0144, "step": 69515 }, { "epoch": 61.57661647475642, "grad_norm": 0.2280571311712265, "learning_rate": 1e-05, "loss": 1.0272, "step": 69520 }, { "epoch": 61.58104517271922, "grad_norm": 0.23558080196380615, "learning_rate": 1e-05, "loss": 0.9989, "step": 69525 }, { "epoch": 61.58547387068202, "grad_norm": 0.24911385774612427, "learning_rate": 1e-05, "loss": 0.912, "step": 69530 }, { "epoch": 61.58990256864482, "grad_norm": 0.24621254205703735, "learning_rate": 1e-05, "loss": 1.0157, "step": 69535 }, { "epoch": 61.59433126660762, "grad_norm": 0.24742159247398376, "learning_rate": 1e-05, "loss": 0.9204, "step": 69540 }, { "epoch": 61.59875996457042, "grad_norm": 0.2405882477760315, "learning_rate": 1e-05, "loss": 1.0087, "step": 69545 }, { "epoch": 61.60318866253321, "grad_norm": 0.24292337894439697, "learning_rate": 1e-05, "loss": 0.9662, "step": 69550 }, { "epoch": 61.607617360496015, "grad_norm": 0.23934440314769745, "learning_rate": 1e-05, "loss": 0.9892, "step": 69555 }, { "epoch": 61.612046058458816, "grad_norm": 0.23483149707317352, "learning_rate": 1e-05, "loss": 1.0088, "step": 69560 }, { "epoch": 61.61647475642161, "grad_norm": 0.23129723966121674, "learning_rate": 1e-05, "loss": 0.9731, "step": 69565 }, { "epoch": 61.62090345438441, "grad_norm": 0.22783353924751282, "learning_rate": 1e-05, "loss": 0.9581, "step": 69570 }, { "epoch": 61.62533215234721, "grad_norm": 0.2411622554063797, "learning_rate": 1e-05, "loss": 0.9297, "step": 69575 }, { "epoch": 61.62976085031001, "grad_norm": 0.23716354370117188, "learning_rate": 1e-05, "loss": 0.9962, "step": 69580 }, { "epoch": 61.63418954827281, "grad_norm": 0.22356528043746948, "learning_rate": 1e-05, "loss": 1.0344, "step": 69585 }, { "epoch": 61.63861824623561, "grad_norm": 0.25518184900283813, "learning_rate": 1e-05, "loss": 1.0172, "step": 69590 }, { "epoch": 61.6430469441984, "grad_norm": 0.20435483753681183, "learning_rate": 1e-05, "loss": 0.9532, "step": 69595 }, { "epoch": 61.647475642161204, "grad_norm": 0.2521303594112396, "learning_rate": 1e-05, "loss": 1.0117, "step": 69600 }, { "epoch": 61.651904340124005, "grad_norm": 0.255447655916214, "learning_rate": 1e-05, "loss": 0.9267, "step": 69605 }, { "epoch": 61.6563330380868, "grad_norm": 0.23234157264232635, "learning_rate": 1e-05, "loss": 0.9788, "step": 69610 }, { "epoch": 61.6607617360496, "grad_norm": 0.30756673216819763, "learning_rate": 1e-05, "loss": 1.0053, "step": 69615 }, { "epoch": 61.6651904340124, "grad_norm": 0.21867334842681885, "learning_rate": 1e-05, "loss": 0.9375, "step": 69620 }, { "epoch": 61.669619131975196, "grad_norm": 0.22728191316127777, "learning_rate": 1e-05, "loss": 0.9761, "step": 69625 }, { "epoch": 61.674047829938, "grad_norm": 0.27508410811424255, "learning_rate": 1e-05, "loss": 0.964, "step": 69630 }, { "epoch": 61.6784765279008, "grad_norm": 0.23752889037132263, "learning_rate": 1e-05, "loss": 0.9043, "step": 69635 }, { "epoch": 61.68290522586359, "grad_norm": 0.2650087773799896, "learning_rate": 1e-05, "loss": 0.9284, "step": 69640 }, { "epoch": 61.687333923826394, "grad_norm": 0.2092810422182083, "learning_rate": 1e-05, "loss": 0.9258, "step": 69645 }, { "epoch": 61.691762621789195, "grad_norm": 0.24645790457725525, "learning_rate": 1e-05, "loss": 0.9405, "step": 69650 }, { "epoch": 61.696191319751996, "grad_norm": 0.2182760089635849, "learning_rate": 1e-05, "loss": 0.9903, "step": 69655 }, { "epoch": 61.70062001771479, "grad_norm": 0.2168237566947937, "learning_rate": 1e-05, "loss": 1.017, "step": 69660 }, { "epoch": 61.70504871567759, "grad_norm": 0.24498319625854492, "learning_rate": 1e-05, "loss": 0.9729, "step": 69665 }, { "epoch": 61.70947741364039, "grad_norm": 0.23338940739631653, "learning_rate": 1e-05, "loss": 0.9444, "step": 69670 }, { "epoch": 61.71390611160319, "grad_norm": 0.2907017469406128, "learning_rate": 1e-05, "loss": 0.9879, "step": 69675 }, { "epoch": 61.71833480956599, "grad_norm": 0.23821742832660675, "learning_rate": 1e-05, "loss": 0.9535, "step": 69680 }, { "epoch": 61.72276350752879, "grad_norm": 0.2678869366645813, "learning_rate": 1e-05, "loss": 0.9497, "step": 69685 }, { "epoch": 61.72719220549158, "grad_norm": 0.22680503129959106, "learning_rate": 1e-05, "loss": 0.942, "step": 69690 }, { "epoch": 61.731620903454385, "grad_norm": 0.18340849876403809, "learning_rate": 1e-05, "loss": 0.9277, "step": 69695 }, { "epoch": 61.736049601417186, "grad_norm": 0.2458120435476303, "learning_rate": 1e-05, "loss": 0.9985, "step": 69700 }, { "epoch": 61.74047829937998, "grad_norm": 0.2705990672111511, "learning_rate": 1e-05, "loss": 1.0319, "step": 69705 }, { "epoch": 61.74490699734278, "grad_norm": 0.24120868742465973, "learning_rate": 1e-05, "loss": 0.9874, "step": 69710 }, { "epoch": 61.74933569530558, "grad_norm": 0.20870280265808105, "learning_rate": 1e-05, "loss": 0.977, "step": 69715 }, { "epoch": 61.753764393268376, "grad_norm": 0.21546325087547302, "learning_rate": 1e-05, "loss": 1.0359, "step": 69720 }, { "epoch": 61.75819309123118, "grad_norm": 0.2226325422525406, "learning_rate": 1e-05, "loss": 0.9434, "step": 69725 }, { "epoch": 61.76262178919398, "grad_norm": 0.250235915184021, "learning_rate": 1e-05, "loss": 0.9773, "step": 69730 }, { "epoch": 61.76705048715677, "grad_norm": 0.24084600806236267, "learning_rate": 1e-05, "loss": 0.9732, "step": 69735 }, { "epoch": 61.771479185119574, "grad_norm": 0.24350860714912415, "learning_rate": 1e-05, "loss": 0.9333, "step": 69740 }, { "epoch": 61.775907883082375, "grad_norm": 0.24917399883270264, "learning_rate": 1e-05, "loss": 0.9761, "step": 69745 }, { "epoch": 61.78033658104517, "grad_norm": 0.23366564512252808, "learning_rate": 1e-05, "loss": 0.9981, "step": 69750 }, { "epoch": 61.78476527900797, "grad_norm": 0.24146410822868347, "learning_rate": 1e-05, "loss": 0.9419, "step": 69755 }, { "epoch": 61.78919397697077, "grad_norm": 0.22105148434638977, "learning_rate": 1e-05, "loss": 0.979, "step": 69760 }, { "epoch": 61.79362267493357, "grad_norm": 0.21910695731639862, "learning_rate": 1e-05, "loss": 0.8971, "step": 69765 }, { "epoch": 61.79805137289637, "grad_norm": 0.22292330861091614, "learning_rate": 1e-05, "loss": 1.0111, "step": 69770 }, { "epoch": 61.80248007085917, "grad_norm": 0.22276896238327026, "learning_rate": 1e-05, "loss": 0.9269, "step": 69775 }, { "epoch": 61.80690876882197, "grad_norm": 0.2110166847705841, "learning_rate": 1e-05, "loss": 0.9382, "step": 69780 }, { "epoch": 61.811337466784764, "grad_norm": 0.22309884428977966, "learning_rate": 1e-05, "loss": 0.9456, "step": 69785 }, { "epoch": 61.815766164747565, "grad_norm": 0.2487981766462326, "learning_rate": 1e-05, "loss": 0.9679, "step": 69790 }, { "epoch": 61.820194862710366, "grad_norm": 0.27073320746421814, "learning_rate": 1e-05, "loss": 0.9549, "step": 69795 }, { "epoch": 61.82462356067316, "grad_norm": 0.22163337469100952, "learning_rate": 1e-05, "loss": 0.9905, "step": 69800 }, { "epoch": 61.82905225863596, "grad_norm": 0.23547673225402832, "learning_rate": 1e-05, "loss": 0.9582, "step": 69805 }, { "epoch": 61.83348095659876, "grad_norm": 0.24921423196792603, "learning_rate": 1e-05, "loss": 0.9126, "step": 69810 }, { "epoch": 61.83790965456156, "grad_norm": 0.24800798296928406, "learning_rate": 1e-05, "loss": 0.978, "step": 69815 }, { "epoch": 61.84233835252436, "grad_norm": 0.24171341955661774, "learning_rate": 1e-05, "loss": 1.0131, "step": 69820 }, { "epoch": 61.84676705048716, "grad_norm": 0.20354153215885162, "learning_rate": 1e-05, "loss": 0.9614, "step": 69825 }, { "epoch": 61.85119574844995, "grad_norm": 0.2333570122718811, "learning_rate": 1e-05, "loss": 0.9505, "step": 69830 }, { "epoch": 61.855624446412754, "grad_norm": 0.2476746290922165, "learning_rate": 1e-05, "loss": 0.9825, "step": 69835 }, { "epoch": 61.860053144375556, "grad_norm": 0.22960172593593597, "learning_rate": 1e-05, "loss": 1.0265, "step": 69840 }, { "epoch": 61.86448184233835, "grad_norm": 0.22044894099235535, "learning_rate": 1e-05, "loss": 0.9454, "step": 69845 }, { "epoch": 61.86891054030115, "grad_norm": 0.20115643739700317, "learning_rate": 1e-05, "loss": 0.9803, "step": 69850 }, { "epoch": 61.87333923826395, "grad_norm": 0.24040508270263672, "learning_rate": 1e-05, "loss": 0.9489, "step": 69855 }, { "epoch": 61.877767936226746, "grad_norm": 0.255266934633255, "learning_rate": 1e-05, "loss": 1.0084, "step": 69860 }, { "epoch": 61.88219663418955, "grad_norm": 0.20590266585350037, "learning_rate": 1e-05, "loss": 0.9302, "step": 69865 }, { "epoch": 61.88662533215235, "grad_norm": 0.24051718413829803, "learning_rate": 1e-05, "loss": 0.9632, "step": 69870 }, { "epoch": 61.89105403011514, "grad_norm": 0.20873750746250153, "learning_rate": 1e-05, "loss": 0.9457, "step": 69875 }, { "epoch": 61.895482728077944, "grad_norm": 0.25857263803482056, "learning_rate": 1e-05, "loss": 0.9564, "step": 69880 }, { "epoch": 61.899911426040745, "grad_norm": 0.2931264340877533, "learning_rate": 1e-05, "loss": 0.9797, "step": 69885 }, { "epoch": 61.90434012400354, "grad_norm": 0.2375732809305191, "learning_rate": 1e-05, "loss": 0.971, "step": 69890 }, { "epoch": 61.90876882196634, "grad_norm": 0.24140547215938568, "learning_rate": 1e-05, "loss": 0.9639, "step": 69895 }, { "epoch": 61.91319751992914, "grad_norm": 0.23719781637191772, "learning_rate": 1e-05, "loss": 0.9538, "step": 69900 }, { "epoch": 61.91762621789194, "grad_norm": 0.22623473405838013, "learning_rate": 1e-05, "loss": 1.0349, "step": 69905 }, { "epoch": 61.92205491585474, "grad_norm": 0.2743055522441864, "learning_rate": 1e-05, "loss": 0.9725, "step": 69910 }, { "epoch": 61.92648361381754, "grad_norm": 0.23880910873413086, "learning_rate": 1e-05, "loss": 0.9569, "step": 69915 }, { "epoch": 61.93091231178034, "grad_norm": 0.2551307678222656, "learning_rate": 1e-05, "loss": 0.9573, "step": 69920 }, { "epoch": 61.935341009743134, "grad_norm": 0.2756955325603485, "learning_rate": 1e-05, "loss": 0.9631, "step": 69925 }, { "epoch": 61.939769707705935, "grad_norm": 0.24531473219394684, "learning_rate": 1e-05, "loss": 0.9316, "step": 69930 }, { "epoch": 61.944198405668736, "grad_norm": 0.2602996230125427, "learning_rate": 1e-05, "loss": 1.0032, "step": 69935 }, { "epoch": 61.94862710363153, "grad_norm": 0.2426404505968094, "learning_rate": 1e-05, "loss": 0.9584, "step": 69940 }, { "epoch": 61.95305580159433, "grad_norm": 0.2642937898635864, "learning_rate": 1e-05, "loss": 0.9618, "step": 69945 }, { "epoch": 61.95748449955713, "grad_norm": 0.2578623294830322, "learning_rate": 1e-05, "loss": 0.9543, "step": 69950 }, { "epoch": 61.96191319751993, "grad_norm": 0.25468024611473083, "learning_rate": 1e-05, "loss": 0.9394, "step": 69955 }, { "epoch": 61.96634189548273, "grad_norm": 0.23923556506633759, "learning_rate": 1e-05, "loss": 1.0009, "step": 69960 }, { "epoch": 61.97077059344553, "grad_norm": 0.250541090965271, "learning_rate": 1e-05, "loss": 0.962, "step": 69965 }, { "epoch": 61.97519929140832, "grad_norm": 0.24246199429035187, "learning_rate": 1e-05, "loss": 0.9583, "step": 69970 }, { "epoch": 61.979627989371124, "grad_norm": 0.23483256995677948, "learning_rate": 1e-05, "loss": 0.9521, "step": 69975 }, { "epoch": 61.984056687333926, "grad_norm": 0.2540358603000641, "learning_rate": 1e-05, "loss": 1.0195, "step": 69980 }, { "epoch": 61.98848538529672, "grad_norm": 0.22133643925189972, "learning_rate": 1e-05, "loss": 0.9752, "step": 69985 }, { "epoch": 61.99291408325952, "grad_norm": 0.23597507178783417, "learning_rate": 1e-05, "loss": 0.9517, "step": 69990 }, { "epoch": 61.99734278122232, "grad_norm": 0.2267870455980301, "learning_rate": 1e-05, "loss": 0.968, "step": 69995 }, { "epoch": 62.001771479185116, "grad_norm": 0.25215357542037964, "learning_rate": 1e-05, "loss": 0.9533, "step": 70000 }, { "epoch": 62.00620017714792, "grad_norm": 0.21305692195892334, "learning_rate": 1e-05, "loss": 0.9656, "step": 70005 }, { "epoch": 62.01062887511072, "grad_norm": 0.2484603077173233, "learning_rate": 1e-05, "loss": 0.9938, "step": 70010 }, { "epoch": 62.01505757307352, "grad_norm": 0.2464698851108551, "learning_rate": 1e-05, "loss": 0.9825, "step": 70015 }, { "epoch": 62.019486271036314, "grad_norm": 0.21229656040668488, "learning_rate": 1e-05, "loss": 0.8915, "step": 70020 }, { "epoch": 62.023914968999115, "grad_norm": 0.250098317861557, "learning_rate": 1e-05, "loss": 0.9616, "step": 70025 }, { "epoch": 62.028343666961916, "grad_norm": 0.23205868899822235, "learning_rate": 1e-05, "loss": 0.9582, "step": 70030 }, { "epoch": 62.03277236492471, "grad_norm": 0.21758829057216644, "learning_rate": 1e-05, "loss": 0.9861, "step": 70035 }, { "epoch": 62.03720106288751, "grad_norm": 0.2773418426513672, "learning_rate": 1e-05, "loss": 0.8937, "step": 70040 }, { "epoch": 62.04162976085031, "grad_norm": 0.2416914701461792, "learning_rate": 1e-05, "loss": 0.9353, "step": 70045 }, { "epoch": 62.04605845881311, "grad_norm": 0.23452645540237427, "learning_rate": 1e-05, "loss": 0.9797, "step": 70050 }, { "epoch": 62.05048715677591, "grad_norm": 0.25741443037986755, "learning_rate": 1e-05, "loss": 0.9817, "step": 70055 }, { "epoch": 62.05491585473871, "grad_norm": 0.23296645283699036, "learning_rate": 1e-05, "loss": 0.9507, "step": 70060 }, { "epoch": 62.0593445527015, "grad_norm": 0.2542116940021515, "learning_rate": 1e-05, "loss": 0.9397, "step": 70065 }, { "epoch": 62.063773250664305, "grad_norm": 0.23072609305381775, "learning_rate": 1e-05, "loss": 0.9922, "step": 70070 }, { "epoch": 62.068201948627106, "grad_norm": 0.2435091882944107, "learning_rate": 1e-05, "loss": 1.0098, "step": 70075 }, { "epoch": 62.0726306465899, "grad_norm": 0.25461915135383606, "learning_rate": 1e-05, "loss": 1.0071, "step": 70080 }, { "epoch": 62.0770593445527, "grad_norm": 0.24691841006278992, "learning_rate": 1e-05, "loss": 0.9905, "step": 70085 }, { "epoch": 62.0814880425155, "grad_norm": 0.23524101078510284, "learning_rate": 1e-05, "loss": 0.9463, "step": 70090 }, { "epoch": 62.0859167404783, "grad_norm": 0.21469426155090332, "learning_rate": 1e-05, "loss": 0.9844, "step": 70095 }, { "epoch": 62.0903454384411, "grad_norm": 0.2569428086280823, "learning_rate": 1e-05, "loss": 0.979, "step": 70100 }, { "epoch": 62.0947741364039, "grad_norm": 0.25578826665878296, "learning_rate": 1e-05, "loss": 0.9436, "step": 70105 }, { "epoch": 62.09920283436669, "grad_norm": 0.2667850852012634, "learning_rate": 1e-05, "loss": 1.0041, "step": 70110 }, { "epoch": 62.103631532329494, "grad_norm": 0.2515510320663452, "learning_rate": 1e-05, "loss": 0.9961, "step": 70115 }, { "epoch": 62.108060230292296, "grad_norm": 0.21684180200099945, "learning_rate": 1e-05, "loss": 1.0015, "step": 70120 }, { "epoch": 62.11248892825509, "grad_norm": 0.25035208463668823, "learning_rate": 1e-05, "loss": 1.0175, "step": 70125 }, { "epoch": 62.11691762621789, "grad_norm": 0.23104338347911835, "learning_rate": 1e-05, "loss": 0.997, "step": 70130 }, { "epoch": 62.12134632418069, "grad_norm": 0.22898367047309875, "learning_rate": 1e-05, "loss": 1.0124, "step": 70135 }, { "epoch": 62.12577502214349, "grad_norm": 0.2066776305437088, "learning_rate": 1e-05, "loss": 0.9862, "step": 70140 }, { "epoch": 62.13020372010629, "grad_norm": 0.2225179374217987, "learning_rate": 1e-05, "loss": 0.9715, "step": 70145 }, { "epoch": 62.13463241806909, "grad_norm": 0.24942712485790253, "learning_rate": 1e-05, "loss": 0.9576, "step": 70150 }, { "epoch": 62.13906111603189, "grad_norm": 0.19825562834739685, "learning_rate": 1e-05, "loss": 0.9187, "step": 70155 }, { "epoch": 62.143489813994684, "grad_norm": 0.2645453214645386, "learning_rate": 1e-05, "loss": 0.9564, "step": 70160 }, { "epoch": 62.147918511957485, "grad_norm": 0.24226155877113342, "learning_rate": 1e-05, "loss": 0.9658, "step": 70165 }, { "epoch": 62.152347209920286, "grad_norm": 0.2127036154270172, "learning_rate": 1e-05, "loss": 0.9177, "step": 70170 }, { "epoch": 62.15677590788308, "grad_norm": 0.22022129595279694, "learning_rate": 1e-05, "loss": 0.9486, "step": 70175 }, { "epoch": 62.16120460584588, "grad_norm": 0.2422579973936081, "learning_rate": 1e-05, "loss": 0.9427, "step": 70180 }, { "epoch": 62.16563330380868, "grad_norm": 0.19733184576034546, "learning_rate": 1e-05, "loss": 0.9304, "step": 70185 }, { "epoch": 62.17006200177148, "grad_norm": 0.2555609941482544, "learning_rate": 1e-05, "loss": 1.0455, "step": 70190 }, { "epoch": 62.17449069973428, "grad_norm": 0.2240137755870819, "learning_rate": 1e-05, "loss": 0.9825, "step": 70195 }, { "epoch": 62.17891939769708, "grad_norm": 0.26627662777900696, "learning_rate": 1e-05, "loss": 0.9706, "step": 70200 }, { "epoch": 62.18334809565987, "grad_norm": 0.31554529070854187, "learning_rate": 1e-05, "loss": 0.9671, "step": 70205 }, { "epoch": 62.187776793622675, "grad_norm": 0.2500566244125366, "learning_rate": 1e-05, "loss": 0.9516, "step": 70210 }, { "epoch": 62.192205491585476, "grad_norm": 0.24197325110435486, "learning_rate": 1e-05, "loss": 0.9668, "step": 70215 }, { "epoch": 62.19663418954827, "grad_norm": 0.26426810026168823, "learning_rate": 1e-05, "loss": 1.027, "step": 70220 }, { "epoch": 62.20106288751107, "grad_norm": 0.2679857611656189, "learning_rate": 1e-05, "loss": 0.9058, "step": 70225 }, { "epoch": 62.20549158547387, "grad_norm": 0.20104993879795074, "learning_rate": 1e-05, "loss": 0.9898, "step": 70230 }, { "epoch": 62.20992028343667, "grad_norm": 0.24380265176296234, "learning_rate": 1e-05, "loss": 0.9622, "step": 70235 }, { "epoch": 62.21434898139947, "grad_norm": 0.22323939204216003, "learning_rate": 1e-05, "loss": 0.9622, "step": 70240 }, { "epoch": 62.21877767936227, "grad_norm": 0.25179633498191833, "learning_rate": 1e-05, "loss": 0.9671, "step": 70245 }, { "epoch": 62.22320637732506, "grad_norm": 0.23339669406414032, "learning_rate": 1e-05, "loss": 0.9409, "step": 70250 }, { "epoch": 62.227635075287864, "grad_norm": 0.26794078946113586, "learning_rate": 1e-05, "loss": 0.96, "step": 70255 }, { "epoch": 62.232063773250665, "grad_norm": 0.24298353493213654, "learning_rate": 1e-05, "loss": 0.9531, "step": 70260 }, { "epoch": 62.23649247121347, "grad_norm": 0.22416622936725616, "learning_rate": 1e-05, "loss": 0.938, "step": 70265 }, { "epoch": 62.24092116917626, "grad_norm": 0.24579066038131714, "learning_rate": 1e-05, "loss": 0.9341, "step": 70270 }, { "epoch": 62.24534986713906, "grad_norm": 0.2506297826766968, "learning_rate": 1e-05, "loss": 0.9898, "step": 70275 }, { "epoch": 62.24977856510186, "grad_norm": 0.24719461798667908, "learning_rate": 1e-05, "loss": 0.9803, "step": 70280 }, { "epoch": 62.25420726306466, "grad_norm": 0.19893841445446014, "learning_rate": 1e-05, "loss": 0.9147, "step": 70285 }, { "epoch": 62.25863596102746, "grad_norm": 0.2632090449333191, "learning_rate": 1e-05, "loss": 0.9282, "step": 70290 }, { "epoch": 62.26306465899026, "grad_norm": 0.27455762028694153, "learning_rate": 1e-05, "loss": 0.9683, "step": 70295 }, { "epoch": 62.267493356953054, "grad_norm": 0.3039446175098419, "learning_rate": 1e-05, "loss": 0.9603, "step": 70300 }, { "epoch": 62.271922054915855, "grad_norm": 0.2396402359008789, "learning_rate": 1e-05, "loss": 0.9899, "step": 70305 }, { "epoch": 62.276350752878656, "grad_norm": 0.2342238873243332, "learning_rate": 1e-05, "loss": 0.9798, "step": 70310 }, { "epoch": 62.28077945084145, "grad_norm": 0.20139279961585999, "learning_rate": 1e-05, "loss": 0.9671, "step": 70315 }, { "epoch": 62.28520814880425, "grad_norm": 0.2541227638721466, "learning_rate": 1e-05, "loss": 0.9997, "step": 70320 }, { "epoch": 62.28963684676705, "grad_norm": 0.26092657446861267, "learning_rate": 1e-05, "loss": 0.9807, "step": 70325 }, { "epoch": 62.29406554472985, "grad_norm": 0.2117588073015213, "learning_rate": 1e-05, "loss": 0.9644, "step": 70330 }, { "epoch": 62.29849424269265, "grad_norm": 0.22186078131198883, "learning_rate": 1e-05, "loss": 1.0051, "step": 70335 }, { "epoch": 62.30292294065545, "grad_norm": 0.2326059639453888, "learning_rate": 1e-05, "loss": 0.9995, "step": 70340 }, { "epoch": 62.30735163861824, "grad_norm": 0.251787006855011, "learning_rate": 1e-05, "loss": 0.9379, "step": 70345 }, { "epoch": 62.311780336581045, "grad_norm": 0.28133392333984375, "learning_rate": 1e-05, "loss": 0.9351, "step": 70350 }, { "epoch": 62.316209034543846, "grad_norm": 0.2618042528629303, "learning_rate": 1e-05, "loss": 0.9615, "step": 70355 }, { "epoch": 62.32063773250664, "grad_norm": 0.21033257246017456, "learning_rate": 1e-05, "loss": 0.9734, "step": 70360 }, { "epoch": 62.32506643046944, "grad_norm": 0.2005743533372879, "learning_rate": 1e-05, "loss": 0.9328, "step": 70365 }, { "epoch": 62.32949512843224, "grad_norm": 0.21412776410579681, "learning_rate": 1e-05, "loss": 0.9732, "step": 70370 }, { "epoch": 62.333923826395036, "grad_norm": 0.24447116255760193, "learning_rate": 1e-05, "loss": 0.9616, "step": 70375 }, { "epoch": 62.33835252435784, "grad_norm": 0.20145437121391296, "learning_rate": 1e-05, "loss": 0.9794, "step": 70380 }, { "epoch": 62.34278122232064, "grad_norm": 0.2918694317340851, "learning_rate": 1e-05, "loss": 0.9808, "step": 70385 }, { "epoch": 62.34720992028344, "grad_norm": 0.25959402322769165, "learning_rate": 1e-05, "loss": 1.0275, "step": 70390 }, { "epoch": 62.351638618246234, "grad_norm": 0.29668527841567993, "learning_rate": 1e-05, "loss": 0.9574, "step": 70395 }, { "epoch": 62.356067316209035, "grad_norm": 0.2272024303674698, "learning_rate": 1e-05, "loss": 0.9405, "step": 70400 }, { "epoch": 62.36049601417184, "grad_norm": 0.23765182495117188, "learning_rate": 1e-05, "loss": 0.9916, "step": 70405 }, { "epoch": 62.36492471213463, "grad_norm": 0.2663049101829529, "learning_rate": 1e-05, "loss": 1.0061, "step": 70410 }, { "epoch": 62.36935341009743, "grad_norm": 0.23255185782909393, "learning_rate": 1e-05, "loss": 0.9973, "step": 70415 }, { "epoch": 62.37378210806023, "grad_norm": 0.27086231112480164, "learning_rate": 1e-05, "loss": 0.9316, "step": 70420 }, { "epoch": 62.37821080602303, "grad_norm": 0.21752439439296722, "learning_rate": 1e-05, "loss": 1.046, "step": 70425 }, { "epoch": 62.38263950398583, "grad_norm": 0.2265576422214508, "learning_rate": 1e-05, "loss": 0.9488, "step": 70430 }, { "epoch": 62.38706820194863, "grad_norm": 0.2743570804595947, "learning_rate": 1e-05, "loss": 0.9682, "step": 70435 }, { "epoch": 62.391496899911424, "grad_norm": 0.26250413060188293, "learning_rate": 1e-05, "loss": 0.9812, "step": 70440 }, { "epoch": 62.395925597874225, "grad_norm": 0.24101440608501434, "learning_rate": 1e-05, "loss": 0.9165, "step": 70445 }, { "epoch": 62.400354295837026, "grad_norm": 0.22953088581562042, "learning_rate": 1e-05, "loss": 0.9021, "step": 70450 }, { "epoch": 62.40478299379982, "grad_norm": 0.32851457595825195, "learning_rate": 1e-05, "loss": 0.9319, "step": 70455 }, { "epoch": 62.40921169176262, "grad_norm": 0.2697694003582001, "learning_rate": 1e-05, "loss": 0.9881, "step": 70460 }, { "epoch": 62.41364038972542, "grad_norm": 0.21063600480556488, "learning_rate": 1e-05, "loss": 0.9363, "step": 70465 }, { "epoch": 62.41806908768822, "grad_norm": 0.21948233246803284, "learning_rate": 1e-05, "loss": 0.9914, "step": 70470 }, { "epoch": 62.42249778565102, "grad_norm": 0.23780766129493713, "learning_rate": 1e-05, "loss": 0.9095, "step": 70475 }, { "epoch": 62.42692648361382, "grad_norm": 0.22337086498737335, "learning_rate": 1e-05, "loss": 0.9607, "step": 70480 }, { "epoch": 62.43135518157661, "grad_norm": 0.24864935874938965, "learning_rate": 1e-05, "loss": 0.9648, "step": 70485 }, { "epoch": 62.435783879539414, "grad_norm": 0.25221261382102966, "learning_rate": 1e-05, "loss": 0.9909, "step": 70490 }, { "epoch": 62.440212577502216, "grad_norm": 0.22788521647453308, "learning_rate": 1e-05, "loss": 0.9401, "step": 70495 }, { "epoch": 62.44464127546502, "grad_norm": 0.23247942328453064, "learning_rate": 1e-05, "loss": 1.0143, "step": 70500 }, { "epoch": 62.44906997342781, "grad_norm": 0.24569058418273926, "learning_rate": 1e-05, "loss": 0.9753, "step": 70505 }, { "epoch": 62.45349867139061, "grad_norm": 0.3016890585422516, "learning_rate": 1e-05, "loss": 1.0195, "step": 70510 }, { "epoch": 62.45792736935341, "grad_norm": 0.24460209906101227, "learning_rate": 1e-05, "loss": 0.9897, "step": 70515 }, { "epoch": 62.46235606731621, "grad_norm": 0.22484734654426575, "learning_rate": 1e-05, "loss": 0.9676, "step": 70520 }, { "epoch": 62.46678476527901, "grad_norm": 0.22628504037857056, "learning_rate": 1e-05, "loss": 1.0206, "step": 70525 }, { "epoch": 62.47121346324181, "grad_norm": 0.2309778332710266, "learning_rate": 1e-05, "loss": 1.019, "step": 70530 }, { "epoch": 62.475642161204604, "grad_norm": 0.23494674265384674, "learning_rate": 1e-05, "loss": 0.9528, "step": 70535 }, { "epoch": 62.480070859167405, "grad_norm": 0.22516237199306488, "learning_rate": 1e-05, "loss": 0.9572, "step": 70540 }, { "epoch": 62.484499557130206, "grad_norm": 0.22986817359924316, "learning_rate": 1e-05, "loss": 0.9732, "step": 70545 }, { "epoch": 62.488928255093, "grad_norm": 0.22091831266880035, "learning_rate": 1e-05, "loss": 0.9519, "step": 70550 }, { "epoch": 62.4933569530558, "grad_norm": 0.21616840362548828, "learning_rate": 1e-05, "loss": 1.0017, "step": 70555 }, { "epoch": 62.4977856510186, "grad_norm": 0.25040966272354126, "learning_rate": 1e-05, "loss": 1.0382, "step": 70560 }, { "epoch": 62.5022143489814, "grad_norm": 0.2095143347978592, "learning_rate": 1e-05, "loss": 0.9619, "step": 70565 }, { "epoch": 62.5066430469442, "grad_norm": 0.21836015582084656, "learning_rate": 1e-05, "loss": 0.984, "step": 70570 }, { "epoch": 62.511071744907, "grad_norm": 0.22668179869651794, "learning_rate": 1e-05, "loss": 0.9029, "step": 70575 }, { "epoch": 62.515500442869794, "grad_norm": 0.23770293593406677, "learning_rate": 1e-05, "loss": 0.9286, "step": 70580 }, { "epoch": 62.519929140832595, "grad_norm": 0.26848098635673523, "learning_rate": 1e-05, "loss": 0.9515, "step": 70585 }, { "epoch": 62.524357838795396, "grad_norm": 0.2318633645772934, "learning_rate": 1e-05, "loss": 1.0048, "step": 70590 }, { "epoch": 62.52878653675819, "grad_norm": 0.2602775990962982, "learning_rate": 1e-05, "loss": 0.9464, "step": 70595 }, { "epoch": 62.53321523472099, "grad_norm": 0.2255481779575348, "learning_rate": 1e-05, "loss": 0.9793, "step": 70600 }, { "epoch": 62.53764393268379, "grad_norm": 0.23722165822982788, "learning_rate": 1e-05, "loss": 0.939, "step": 70605 }, { "epoch": 62.54207263064659, "grad_norm": 0.23507237434387207, "learning_rate": 1e-05, "loss": 0.9639, "step": 70610 }, { "epoch": 62.54650132860939, "grad_norm": 0.24398557841777802, "learning_rate": 1e-05, "loss": 1.0022, "step": 70615 }, { "epoch": 62.55093002657219, "grad_norm": 0.22806020081043243, "learning_rate": 1e-05, "loss": 0.9671, "step": 70620 }, { "epoch": 62.55535872453498, "grad_norm": 0.24399439990520477, "learning_rate": 1e-05, "loss": 1.0006, "step": 70625 }, { "epoch": 62.559787422497784, "grad_norm": 0.21834306418895721, "learning_rate": 1e-05, "loss": 0.9487, "step": 70630 }, { "epoch": 62.564216120460586, "grad_norm": 0.2814664840698242, "learning_rate": 1e-05, "loss": 0.9165, "step": 70635 }, { "epoch": 62.56864481842339, "grad_norm": 0.22061903774738312, "learning_rate": 1e-05, "loss": 0.9158, "step": 70640 }, { "epoch": 62.57307351638618, "grad_norm": 0.22100220620632172, "learning_rate": 1e-05, "loss": 0.9383, "step": 70645 }, { "epoch": 62.57750221434898, "grad_norm": 0.27740535140037537, "learning_rate": 1e-05, "loss": 0.968, "step": 70650 }, { "epoch": 62.58193091231178, "grad_norm": 0.23855076730251312, "learning_rate": 1e-05, "loss": 0.9287, "step": 70655 }, { "epoch": 62.58635961027458, "grad_norm": 0.2657449245452881, "learning_rate": 1e-05, "loss": 0.9724, "step": 70660 }, { "epoch": 62.59078830823738, "grad_norm": 0.28157928586006165, "learning_rate": 1e-05, "loss": 1.0015, "step": 70665 }, { "epoch": 62.59521700620018, "grad_norm": 0.24486863613128662, "learning_rate": 1e-05, "loss": 0.9647, "step": 70670 }, { "epoch": 62.599645704162974, "grad_norm": 0.2499570995569229, "learning_rate": 1e-05, "loss": 0.9704, "step": 70675 }, { "epoch": 62.604074402125775, "grad_norm": 0.25258275866508484, "learning_rate": 1e-05, "loss": 0.9801, "step": 70680 }, { "epoch": 62.608503100088576, "grad_norm": 0.24013161659240723, "learning_rate": 1e-05, "loss": 0.9239, "step": 70685 }, { "epoch": 62.61293179805137, "grad_norm": 0.2438352257013321, "learning_rate": 1e-05, "loss": 1.0117, "step": 70690 }, { "epoch": 62.61736049601417, "grad_norm": 0.25572359561920166, "learning_rate": 1e-05, "loss": 0.968, "step": 70695 }, { "epoch": 62.62178919397697, "grad_norm": 0.22293159365653992, "learning_rate": 1e-05, "loss": 0.9709, "step": 70700 }, { "epoch": 62.62621789193977, "grad_norm": 0.2221287488937378, "learning_rate": 1e-05, "loss": 0.974, "step": 70705 }, { "epoch": 62.63064658990257, "grad_norm": 0.2349444031715393, "learning_rate": 1e-05, "loss": 0.9548, "step": 70710 }, { "epoch": 62.63507528786537, "grad_norm": 0.21984359622001648, "learning_rate": 1e-05, "loss": 0.9551, "step": 70715 }, { "epoch": 62.63950398582816, "grad_norm": 0.22304648160934448, "learning_rate": 1e-05, "loss": 1.0195, "step": 70720 }, { "epoch": 62.643932683790965, "grad_norm": 0.2281741350889206, "learning_rate": 1e-05, "loss": 0.9596, "step": 70725 }, { "epoch": 62.648361381753766, "grad_norm": 0.22168071568012238, "learning_rate": 1e-05, "loss": 1.0511, "step": 70730 }, { "epoch": 62.65279007971656, "grad_norm": 0.20521099865436554, "learning_rate": 1e-05, "loss": 0.9537, "step": 70735 }, { "epoch": 62.65721877767936, "grad_norm": 0.2321094423532486, "learning_rate": 1e-05, "loss": 1.0273, "step": 70740 }, { "epoch": 62.66164747564216, "grad_norm": 0.20615778863430023, "learning_rate": 1e-05, "loss": 0.9742, "step": 70745 }, { "epoch": 62.666076173604964, "grad_norm": 0.23900240659713745, "learning_rate": 1e-05, "loss": 0.9948, "step": 70750 }, { "epoch": 62.67050487156776, "grad_norm": 0.23599305748939514, "learning_rate": 1e-05, "loss": 0.9443, "step": 70755 }, { "epoch": 62.67493356953056, "grad_norm": 0.26711955666542053, "learning_rate": 1e-05, "loss": 0.9717, "step": 70760 }, { "epoch": 62.67936226749336, "grad_norm": 0.22688394784927368, "learning_rate": 1e-05, "loss": 1.0397, "step": 70765 }, { "epoch": 62.683790965456154, "grad_norm": 0.2360047549009323, "learning_rate": 1e-05, "loss": 0.9777, "step": 70770 }, { "epoch": 62.688219663418955, "grad_norm": 0.286018043756485, "learning_rate": 1e-05, "loss": 0.9892, "step": 70775 }, { "epoch": 62.69264836138176, "grad_norm": 0.21145670115947723, "learning_rate": 1e-05, "loss": 0.9658, "step": 70780 }, { "epoch": 62.69707705934455, "grad_norm": 0.2701103389263153, "learning_rate": 1e-05, "loss": 0.966, "step": 70785 }, { "epoch": 62.70150575730735, "grad_norm": 0.2744518518447876, "learning_rate": 1e-05, "loss": 0.9687, "step": 70790 }, { "epoch": 62.70593445527015, "grad_norm": 0.25096407532691956, "learning_rate": 1e-05, "loss": 0.9552, "step": 70795 }, { "epoch": 62.71036315323295, "grad_norm": 0.24646686017513275, "learning_rate": 1e-05, "loss": 0.9566, "step": 70800 }, { "epoch": 62.71479185119575, "grad_norm": 0.2550307810306549, "learning_rate": 1e-05, "loss": 0.9332, "step": 70805 }, { "epoch": 62.71922054915855, "grad_norm": 0.240295872092247, "learning_rate": 1e-05, "loss": 0.951, "step": 70810 }, { "epoch": 62.723649247121344, "grad_norm": 0.2812483608722687, "learning_rate": 1e-05, "loss": 0.9978, "step": 70815 }, { "epoch": 62.728077945084145, "grad_norm": 0.231979638338089, "learning_rate": 1e-05, "loss": 0.9675, "step": 70820 }, { "epoch": 62.732506643046946, "grad_norm": 0.24539978802204132, "learning_rate": 1e-05, "loss": 0.9829, "step": 70825 }, { "epoch": 62.73693534100974, "grad_norm": 0.27017080783843994, "learning_rate": 1e-05, "loss": 1.0021, "step": 70830 }, { "epoch": 62.74136403897254, "grad_norm": 0.2220301777124405, "learning_rate": 1e-05, "loss": 0.9891, "step": 70835 }, { "epoch": 62.74579273693534, "grad_norm": 0.2858640253543854, "learning_rate": 1e-05, "loss": 1.0215, "step": 70840 }, { "epoch": 62.75022143489814, "grad_norm": 0.25268128514289856, "learning_rate": 1e-05, "loss": 0.9577, "step": 70845 }, { "epoch": 62.75465013286094, "grad_norm": 0.21712400019168854, "learning_rate": 1e-05, "loss": 0.9426, "step": 70850 }, { "epoch": 62.75907883082374, "grad_norm": 0.22115670144557953, "learning_rate": 1e-05, "loss": 0.9939, "step": 70855 }, { "epoch": 62.76350752878653, "grad_norm": 0.20956386625766754, "learning_rate": 1e-05, "loss": 0.9478, "step": 70860 }, { "epoch": 62.767936226749335, "grad_norm": 0.22501781582832336, "learning_rate": 1e-05, "loss": 1.0086, "step": 70865 }, { "epoch": 62.772364924712136, "grad_norm": 0.24337220191955566, "learning_rate": 1e-05, "loss": 0.9043, "step": 70870 }, { "epoch": 62.77679362267494, "grad_norm": 0.22969840466976166, "learning_rate": 1e-05, "loss": 0.9206, "step": 70875 }, { "epoch": 62.78122232063773, "grad_norm": 0.22875837981700897, "learning_rate": 1e-05, "loss": 0.9602, "step": 70880 }, { "epoch": 62.78565101860053, "grad_norm": 0.19865119457244873, "learning_rate": 1e-05, "loss": 0.9464, "step": 70885 }, { "epoch": 62.79007971656333, "grad_norm": 0.23736608028411865, "learning_rate": 1e-05, "loss": 0.968, "step": 70890 }, { "epoch": 62.79450841452613, "grad_norm": 0.2708814740180969, "learning_rate": 1e-05, "loss": 0.9407, "step": 70895 }, { "epoch": 62.79893711248893, "grad_norm": 0.22967125475406647, "learning_rate": 1e-05, "loss": 0.945, "step": 70900 }, { "epoch": 62.80336581045173, "grad_norm": 0.22558431327342987, "learning_rate": 1e-05, "loss": 0.9026, "step": 70905 }, { "epoch": 62.807794508414524, "grad_norm": 0.22295479476451874, "learning_rate": 1e-05, "loss": 0.9724, "step": 70910 }, { "epoch": 62.812223206377325, "grad_norm": 0.2839282751083374, "learning_rate": 1e-05, "loss": 0.9266, "step": 70915 }, { "epoch": 62.81665190434013, "grad_norm": 0.23600752651691437, "learning_rate": 1e-05, "loss": 0.9052, "step": 70920 }, { "epoch": 62.82108060230292, "grad_norm": 0.30957773327827454, "learning_rate": 1e-05, "loss": 0.9541, "step": 70925 }, { "epoch": 62.82550930026572, "grad_norm": 0.21585112810134888, "learning_rate": 1e-05, "loss": 0.97, "step": 70930 }, { "epoch": 62.82993799822852, "grad_norm": 0.2283477783203125, "learning_rate": 1e-05, "loss": 0.9643, "step": 70935 }, { "epoch": 62.83436669619132, "grad_norm": 0.21538548171520233, "learning_rate": 1e-05, "loss": 0.9339, "step": 70940 }, { "epoch": 62.83879539415412, "grad_norm": 0.2529657483100891, "learning_rate": 1e-05, "loss": 0.9706, "step": 70945 }, { "epoch": 62.84322409211692, "grad_norm": 0.22416697442531586, "learning_rate": 1e-05, "loss": 0.9678, "step": 70950 }, { "epoch": 62.847652790079714, "grad_norm": 0.2417793720960617, "learning_rate": 1e-05, "loss": 0.9751, "step": 70955 }, { "epoch": 62.852081488042515, "grad_norm": 0.2744355797767639, "learning_rate": 1e-05, "loss": 1.016, "step": 70960 }, { "epoch": 62.856510186005316, "grad_norm": 0.2282475084066391, "learning_rate": 1e-05, "loss": 0.9547, "step": 70965 }, { "epoch": 62.86093888396811, "grad_norm": 0.23291133344173431, "learning_rate": 1e-05, "loss": 0.9326, "step": 70970 }, { "epoch": 62.86536758193091, "grad_norm": 0.2341146320104599, "learning_rate": 1e-05, "loss": 0.997, "step": 70975 }, { "epoch": 62.86979627989371, "grad_norm": 0.2175760120153427, "learning_rate": 1e-05, "loss": 0.9558, "step": 70980 }, { "epoch": 62.87422497785651, "grad_norm": 0.21026121079921722, "learning_rate": 1e-05, "loss": 0.9835, "step": 70985 }, { "epoch": 62.87865367581931, "grad_norm": 0.22480472922325134, "learning_rate": 1e-05, "loss": 0.9979, "step": 70990 }, { "epoch": 62.88308237378211, "grad_norm": 0.22405695915222168, "learning_rate": 1e-05, "loss": 0.9846, "step": 70995 }, { "epoch": 62.88751107174491, "grad_norm": 0.27734410762786865, "learning_rate": 1e-05, "loss": 0.9374, "step": 71000 }, { "epoch": 62.891939769707704, "grad_norm": 0.25987330079078674, "learning_rate": 1e-05, "loss": 0.9505, "step": 71005 }, { "epoch": 62.896368467670506, "grad_norm": 0.2726013660430908, "learning_rate": 1e-05, "loss": 0.9599, "step": 71010 }, { "epoch": 62.90079716563331, "grad_norm": 0.2747352719306946, "learning_rate": 1e-05, "loss": 0.9714, "step": 71015 }, { "epoch": 62.9052258635961, "grad_norm": 0.2267664074897766, "learning_rate": 1e-05, "loss": 0.9586, "step": 71020 }, { "epoch": 62.9096545615589, "grad_norm": 0.24789299070835114, "learning_rate": 1e-05, "loss": 0.9231, "step": 71025 }, { "epoch": 62.9140832595217, "grad_norm": 0.25955846905708313, "learning_rate": 1e-05, "loss": 0.9246, "step": 71030 }, { "epoch": 62.9185119574845, "grad_norm": 0.24115349352359772, "learning_rate": 1e-05, "loss": 0.9409, "step": 71035 }, { "epoch": 62.9229406554473, "grad_norm": 0.2540762424468994, "learning_rate": 1e-05, "loss": 0.9238, "step": 71040 }, { "epoch": 62.9273693534101, "grad_norm": 0.30991625785827637, "learning_rate": 1e-05, "loss": 0.9059, "step": 71045 }, { "epoch": 62.931798051372894, "grad_norm": 0.30098727345466614, "learning_rate": 1e-05, "loss": 0.9742, "step": 71050 }, { "epoch": 62.936226749335695, "grad_norm": 0.2261722981929779, "learning_rate": 1e-05, "loss": 1.001, "step": 71055 }, { "epoch": 62.9406554472985, "grad_norm": 0.2749830484390259, "learning_rate": 1e-05, "loss": 0.9872, "step": 71060 }, { "epoch": 62.94508414526129, "grad_norm": 0.23942245543003082, "learning_rate": 1e-05, "loss": 0.931, "step": 71065 }, { "epoch": 62.94951284322409, "grad_norm": 0.2497217208147049, "learning_rate": 1e-05, "loss": 0.9673, "step": 71070 }, { "epoch": 62.95394154118689, "grad_norm": 0.2423870861530304, "learning_rate": 1e-05, "loss": 0.9962, "step": 71075 }, { "epoch": 62.95837023914969, "grad_norm": 0.23776225745677948, "learning_rate": 1e-05, "loss": 0.9519, "step": 71080 }, { "epoch": 62.96279893711249, "grad_norm": 0.22410358488559723, "learning_rate": 1e-05, "loss": 0.9528, "step": 71085 }, { "epoch": 62.96722763507529, "grad_norm": 0.2592860758304596, "learning_rate": 1e-05, "loss": 0.9796, "step": 71090 }, { "epoch": 62.971656333038084, "grad_norm": 0.2347031831741333, "learning_rate": 1e-05, "loss": 0.9779, "step": 71095 }, { "epoch": 62.976085031000885, "grad_norm": 0.256392240524292, "learning_rate": 1e-05, "loss": 0.9568, "step": 71100 }, { "epoch": 62.980513728963686, "grad_norm": 0.31371161341667175, "learning_rate": 1e-05, "loss": 0.9467, "step": 71105 }, { "epoch": 62.98494242692648, "grad_norm": 0.25879257917404175, "learning_rate": 1e-05, "loss": 0.9643, "step": 71110 }, { "epoch": 62.98937112488928, "grad_norm": 0.26602286100387573, "learning_rate": 1e-05, "loss": 0.9154, "step": 71115 }, { "epoch": 62.99379982285208, "grad_norm": 0.28012028336524963, "learning_rate": 1e-05, "loss": 0.9291, "step": 71120 }, { "epoch": 62.998228520814884, "grad_norm": 0.2472427934408188, "learning_rate": 1e-05, "loss": 0.9765, "step": 71125 }, { "epoch": 63.00265721877768, "grad_norm": 0.21624986827373505, "learning_rate": 1e-05, "loss": 0.9693, "step": 71130 }, { "epoch": 63.00708591674048, "grad_norm": 0.2152177393436432, "learning_rate": 1e-05, "loss": 0.9979, "step": 71135 }, { "epoch": 63.01151461470328, "grad_norm": 0.2457621693611145, "learning_rate": 1e-05, "loss": 0.9729, "step": 71140 }, { "epoch": 63.015943312666074, "grad_norm": 0.21310003101825714, "learning_rate": 1e-05, "loss": 0.923, "step": 71145 }, { "epoch": 63.020372010628876, "grad_norm": 0.2093529999256134, "learning_rate": 1e-05, "loss": 0.9581, "step": 71150 }, { "epoch": 63.02480070859168, "grad_norm": 0.2449687123298645, "learning_rate": 1e-05, "loss": 1.0302, "step": 71155 }, { "epoch": 63.02922940655447, "grad_norm": 0.2772822976112366, "learning_rate": 1e-05, "loss": 0.9894, "step": 71160 }, { "epoch": 63.03365810451727, "grad_norm": 0.23291809856891632, "learning_rate": 1e-05, "loss": 0.9768, "step": 71165 }, { "epoch": 63.03808680248007, "grad_norm": 0.23847796022891998, "learning_rate": 1e-05, "loss": 0.965, "step": 71170 }, { "epoch": 63.04251550044287, "grad_norm": 0.2273930460214615, "learning_rate": 1e-05, "loss": 0.9578, "step": 71175 }, { "epoch": 63.04694419840567, "grad_norm": 0.22644150257110596, "learning_rate": 1e-05, "loss": 0.9807, "step": 71180 }, { "epoch": 63.05137289636847, "grad_norm": 0.25376737117767334, "learning_rate": 1e-05, "loss": 0.9655, "step": 71185 }, { "epoch": 63.055801594331264, "grad_norm": 0.2361001819372177, "learning_rate": 1e-05, "loss": 0.9631, "step": 71190 }, { "epoch": 63.060230292294065, "grad_norm": 0.2948321998119354, "learning_rate": 1e-05, "loss": 0.9524, "step": 71195 }, { "epoch": 63.064658990256866, "grad_norm": 0.23853449523448944, "learning_rate": 1e-05, "loss": 0.9525, "step": 71200 }, { "epoch": 63.06908768821966, "grad_norm": 0.23073089122772217, "learning_rate": 1e-05, "loss": 0.9526, "step": 71205 }, { "epoch": 63.07351638618246, "grad_norm": 0.26085132360458374, "learning_rate": 1e-05, "loss": 0.9692, "step": 71210 }, { "epoch": 63.07794508414526, "grad_norm": 0.24262838065624237, "learning_rate": 1e-05, "loss": 0.9851, "step": 71215 }, { "epoch": 63.08237378210806, "grad_norm": 0.22725196182727814, "learning_rate": 1e-05, "loss": 0.9628, "step": 71220 }, { "epoch": 63.08680248007086, "grad_norm": 0.25621891021728516, "learning_rate": 1e-05, "loss": 0.9439, "step": 71225 }, { "epoch": 63.09123117803366, "grad_norm": 0.20483575761318207, "learning_rate": 1e-05, "loss": 0.9736, "step": 71230 }, { "epoch": 63.09565987599645, "grad_norm": 0.23137125372886658, "learning_rate": 1e-05, "loss": 0.9818, "step": 71235 }, { "epoch": 63.100088573959255, "grad_norm": 0.22602321207523346, "learning_rate": 1e-05, "loss": 0.9752, "step": 71240 }, { "epoch": 63.104517271922056, "grad_norm": 0.2657042443752289, "learning_rate": 1e-05, "loss": 0.9755, "step": 71245 }, { "epoch": 63.10894596988486, "grad_norm": 0.23578548431396484, "learning_rate": 1e-05, "loss": 0.9019, "step": 71250 }, { "epoch": 63.11337466784765, "grad_norm": 0.22353316843509674, "learning_rate": 1e-05, "loss": 0.9911, "step": 71255 }, { "epoch": 63.11780336581045, "grad_norm": 0.2512418329715729, "learning_rate": 1e-05, "loss": 0.9656, "step": 71260 }, { "epoch": 63.122232063773254, "grad_norm": 0.22224655747413635, "learning_rate": 1e-05, "loss": 1.0213, "step": 71265 }, { "epoch": 63.12666076173605, "grad_norm": 0.2689298093318939, "learning_rate": 1e-05, "loss": 0.9402, "step": 71270 }, { "epoch": 63.13108945969885, "grad_norm": 0.2617366313934326, "learning_rate": 1e-05, "loss": 1.002, "step": 71275 }, { "epoch": 63.13551815766165, "grad_norm": 0.3360458016395569, "learning_rate": 1e-05, "loss": 0.9603, "step": 71280 }, { "epoch": 63.139946855624444, "grad_norm": 0.2393290251493454, "learning_rate": 1e-05, "loss": 0.8593, "step": 71285 }, { "epoch": 63.144375553587246, "grad_norm": 0.24505038559436798, "learning_rate": 1e-05, "loss": 1.0136, "step": 71290 }, { "epoch": 63.14880425155005, "grad_norm": 0.27710598707199097, "learning_rate": 1e-05, "loss": 0.9393, "step": 71295 }, { "epoch": 63.15323294951284, "grad_norm": 0.2641274929046631, "learning_rate": 1e-05, "loss": 0.8996, "step": 71300 }, { "epoch": 63.15766164747564, "grad_norm": 0.2361448109149933, "learning_rate": 1e-05, "loss": 0.9644, "step": 71305 }, { "epoch": 63.16209034543844, "grad_norm": 0.2303447425365448, "learning_rate": 1e-05, "loss": 0.9516, "step": 71310 }, { "epoch": 63.16651904340124, "grad_norm": 0.238552525639534, "learning_rate": 1e-05, "loss": 0.995, "step": 71315 }, { "epoch": 63.17094774136404, "grad_norm": 0.23807229101657867, "learning_rate": 1e-05, "loss": 0.9308, "step": 71320 }, { "epoch": 63.17537643932684, "grad_norm": 0.2130032479763031, "learning_rate": 1e-05, "loss": 0.9638, "step": 71325 }, { "epoch": 63.179805137289634, "grad_norm": 0.19494573771953583, "learning_rate": 1e-05, "loss": 0.9844, "step": 71330 }, { "epoch": 63.184233835252435, "grad_norm": 0.24512140452861786, "learning_rate": 1e-05, "loss": 0.9596, "step": 71335 }, { "epoch": 63.188662533215236, "grad_norm": 0.26029345393180847, "learning_rate": 1e-05, "loss": 0.9776, "step": 71340 }, { "epoch": 63.19309123117803, "grad_norm": 0.2286328673362732, "learning_rate": 1e-05, "loss": 0.9415, "step": 71345 }, { "epoch": 63.19751992914083, "grad_norm": 0.23320408165454865, "learning_rate": 1e-05, "loss": 0.95, "step": 71350 }, { "epoch": 63.20194862710363, "grad_norm": 0.2648443579673767, "learning_rate": 1e-05, "loss": 0.9419, "step": 71355 }, { "epoch": 63.20637732506643, "grad_norm": 0.25996845960617065, "learning_rate": 1e-05, "loss": 0.9884, "step": 71360 }, { "epoch": 63.21080602302923, "grad_norm": 0.27111363410949707, "learning_rate": 1e-05, "loss": 0.9696, "step": 71365 }, { "epoch": 63.21523472099203, "grad_norm": 0.2943268418312073, "learning_rate": 1e-05, "loss": 0.987, "step": 71370 }, { "epoch": 63.21966341895483, "grad_norm": 0.25065112113952637, "learning_rate": 1e-05, "loss": 0.9315, "step": 71375 }, { "epoch": 63.224092116917625, "grad_norm": 0.26021096110343933, "learning_rate": 1e-05, "loss": 0.9223, "step": 71380 }, { "epoch": 63.228520814880426, "grad_norm": 0.2268586903810501, "learning_rate": 1e-05, "loss": 0.9633, "step": 71385 }, { "epoch": 63.23294951284323, "grad_norm": 0.23720504343509674, "learning_rate": 1e-05, "loss": 0.9289, "step": 71390 }, { "epoch": 63.23737821080602, "grad_norm": 0.24163025617599487, "learning_rate": 1e-05, "loss": 0.9884, "step": 71395 }, { "epoch": 63.24180690876882, "grad_norm": 0.2378177046775818, "learning_rate": 1e-05, "loss": 0.9726, "step": 71400 }, { "epoch": 63.246235606731624, "grad_norm": 0.24011115729808807, "learning_rate": 1e-05, "loss": 0.9922, "step": 71405 }, { "epoch": 63.25066430469442, "grad_norm": 0.2730252146720886, "learning_rate": 1e-05, "loss": 0.9672, "step": 71410 }, { "epoch": 63.25509300265722, "grad_norm": 0.23889029026031494, "learning_rate": 1e-05, "loss": 0.9217, "step": 71415 }, { "epoch": 63.25952170062002, "grad_norm": 0.7274526953697205, "learning_rate": 1e-05, "loss": 0.9492, "step": 71420 }, { "epoch": 63.263950398582814, "grad_norm": 0.27280551195144653, "learning_rate": 1e-05, "loss": 0.9186, "step": 71425 }, { "epoch": 63.268379096545615, "grad_norm": 0.27427947521209717, "learning_rate": 1e-05, "loss": 0.964, "step": 71430 }, { "epoch": 63.27280779450842, "grad_norm": 0.26814350485801697, "learning_rate": 1e-05, "loss": 1.0079, "step": 71435 }, { "epoch": 63.27723649247121, "grad_norm": 0.2442561835050583, "learning_rate": 1e-05, "loss": 0.9901, "step": 71440 }, { "epoch": 63.28166519043401, "grad_norm": 0.22382545471191406, "learning_rate": 1e-05, "loss": 0.972, "step": 71445 }, { "epoch": 63.28609388839681, "grad_norm": 0.2901572287082672, "learning_rate": 1e-05, "loss": 1.0091, "step": 71450 }, { "epoch": 63.29052258635961, "grad_norm": 0.23307915031909943, "learning_rate": 1e-05, "loss": 0.9901, "step": 71455 }, { "epoch": 63.29495128432241, "grad_norm": 0.24821032583713531, "learning_rate": 1e-05, "loss": 0.9468, "step": 71460 }, { "epoch": 63.29937998228521, "grad_norm": 0.25503966212272644, "learning_rate": 1e-05, "loss": 0.9465, "step": 71465 }, { "epoch": 63.303808680248004, "grad_norm": 0.27275925874710083, "learning_rate": 1e-05, "loss": 0.9475, "step": 71470 }, { "epoch": 63.308237378210805, "grad_norm": 0.2198983132839203, "learning_rate": 1e-05, "loss": 0.9047, "step": 71475 }, { "epoch": 63.312666076173606, "grad_norm": 0.22909466922283173, "learning_rate": 1e-05, "loss": 0.9634, "step": 71480 }, { "epoch": 63.31709477413641, "grad_norm": 0.3188401460647583, "learning_rate": 1e-05, "loss": 0.8985, "step": 71485 }, { "epoch": 63.3215234720992, "grad_norm": 0.2388790398836136, "learning_rate": 1e-05, "loss": 0.9476, "step": 71490 }, { "epoch": 63.325952170062, "grad_norm": 0.21546371281147003, "learning_rate": 1e-05, "loss": 1.0058, "step": 71495 }, { "epoch": 63.330380868024804, "grad_norm": 0.2413410097360611, "learning_rate": 1e-05, "loss": 1.003, "step": 71500 }, { "epoch": 63.3348095659876, "grad_norm": 0.33520326018333435, "learning_rate": 1e-05, "loss": 0.9778, "step": 71505 }, { "epoch": 63.3392382639504, "grad_norm": 0.24245035648345947, "learning_rate": 1e-05, "loss": 0.9683, "step": 71510 }, { "epoch": 63.3436669619132, "grad_norm": 0.24469655752182007, "learning_rate": 1e-05, "loss": 0.9354, "step": 71515 }, { "epoch": 63.348095659875995, "grad_norm": 0.2601113021373749, "learning_rate": 1e-05, "loss": 1.0263, "step": 71520 }, { "epoch": 63.352524357838796, "grad_norm": 0.2387658804655075, "learning_rate": 1e-05, "loss": 0.989, "step": 71525 }, { "epoch": 63.3569530558016, "grad_norm": 0.23423263430595398, "learning_rate": 1e-05, "loss": 0.9403, "step": 71530 }, { "epoch": 63.36138175376439, "grad_norm": 0.24021795392036438, "learning_rate": 1e-05, "loss": 0.9448, "step": 71535 }, { "epoch": 63.36581045172719, "grad_norm": 0.22292114794254303, "learning_rate": 1e-05, "loss": 0.9427, "step": 71540 }, { "epoch": 63.37023914968999, "grad_norm": 0.21965794265270233, "learning_rate": 1e-05, "loss": 0.9494, "step": 71545 }, { "epoch": 63.37466784765279, "grad_norm": 0.2741168737411499, "learning_rate": 1e-05, "loss": 0.9605, "step": 71550 }, { "epoch": 63.37909654561559, "grad_norm": 0.24786996841430664, "learning_rate": 1e-05, "loss": 0.961, "step": 71555 }, { "epoch": 63.38352524357839, "grad_norm": 0.26409363746643066, "learning_rate": 1e-05, "loss": 0.978, "step": 71560 }, { "epoch": 63.387953941541184, "grad_norm": 0.26504209637641907, "learning_rate": 1e-05, "loss": 1.0012, "step": 71565 }, { "epoch": 63.392382639503985, "grad_norm": 0.2318962812423706, "learning_rate": 1e-05, "loss": 0.9594, "step": 71570 }, { "epoch": 63.39681133746679, "grad_norm": 0.2678954601287842, "learning_rate": 1e-05, "loss": 0.985, "step": 71575 }, { "epoch": 63.40124003542958, "grad_norm": 0.23237016797065735, "learning_rate": 1e-05, "loss": 0.9244, "step": 71580 }, { "epoch": 63.40566873339238, "grad_norm": 0.2803463935852051, "learning_rate": 1e-05, "loss": 0.9263, "step": 71585 }, { "epoch": 63.41009743135518, "grad_norm": 0.22890187799930573, "learning_rate": 1e-05, "loss": 1.0073, "step": 71590 }, { "epoch": 63.41452612931798, "grad_norm": 0.23410005867481232, "learning_rate": 1e-05, "loss": 0.9793, "step": 71595 }, { "epoch": 63.41895482728078, "grad_norm": 0.20497246086597443, "learning_rate": 1e-05, "loss": 0.9803, "step": 71600 }, { "epoch": 63.42338352524358, "grad_norm": 0.23170189559459686, "learning_rate": 1e-05, "loss": 0.9829, "step": 71605 }, { "epoch": 63.42781222320638, "grad_norm": 0.2319885641336441, "learning_rate": 1e-05, "loss": 0.9031, "step": 71610 }, { "epoch": 63.432240921169175, "grad_norm": 0.24319502711296082, "learning_rate": 1e-05, "loss": 0.9707, "step": 71615 }, { "epoch": 63.436669619131976, "grad_norm": 0.2243577241897583, "learning_rate": 1e-05, "loss": 0.9292, "step": 71620 }, { "epoch": 63.44109831709478, "grad_norm": 0.22944214940071106, "learning_rate": 1e-05, "loss": 0.9477, "step": 71625 }, { "epoch": 63.44552701505757, "grad_norm": 0.26419004797935486, "learning_rate": 1e-05, "loss": 0.9789, "step": 71630 }, { "epoch": 63.44995571302037, "grad_norm": 0.2424696683883667, "learning_rate": 1e-05, "loss": 0.9399, "step": 71635 }, { "epoch": 63.454384410983174, "grad_norm": 0.29082199931144714, "learning_rate": 1e-05, "loss": 0.9607, "step": 71640 }, { "epoch": 63.45881310894597, "grad_norm": 0.22004655003547668, "learning_rate": 1e-05, "loss": 0.9436, "step": 71645 }, { "epoch": 63.46324180690877, "grad_norm": 0.24860699474811554, "learning_rate": 1e-05, "loss": 1.007, "step": 71650 }, { "epoch": 63.46767050487157, "grad_norm": 0.24170424044132233, "learning_rate": 1e-05, "loss": 0.9828, "step": 71655 }, { "epoch": 63.472099202834364, "grad_norm": 0.2371632158756256, "learning_rate": 1e-05, "loss": 0.9553, "step": 71660 }, { "epoch": 63.476527900797166, "grad_norm": 0.22572371363639832, "learning_rate": 1e-05, "loss": 1.0072, "step": 71665 }, { "epoch": 63.48095659875997, "grad_norm": 0.23342150449752808, "learning_rate": 1e-05, "loss": 0.9682, "step": 71670 }, { "epoch": 63.48538529672276, "grad_norm": 0.21047848463058472, "learning_rate": 1e-05, "loss": 0.963, "step": 71675 }, { "epoch": 63.48981399468556, "grad_norm": 0.37906166911125183, "learning_rate": 1e-05, "loss": 0.9998, "step": 71680 }, { "epoch": 63.49424269264836, "grad_norm": 0.20648927986621857, "learning_rate": 1e-05, "loss": 1.0, "step": 71685 }, { "epoch": 63.49867139061116, "grad_norm": 0.2352408915758133, "learning_rate": 1e-05, "loss": 1.0005, "step": 71690 }, { "epoch": 63.50310008857396, "grad_norm": 0.2716488540172577, "learning_rate": 1e-05, "loss": 1.0003, "step": 71695 }, { "epoch": 63.50752878653676, "grad_norm": 0.2358202040195465, "learning_rate": 1e-05, "loss": 0.9975, "step": 71700 }, { "epoch": 63.511957484499554, "grad_norm": 0.24556708335876465, "learning_rate": 1e-05, "loss": 1.002, "step": 71705 }, { "epoch": 63.516386182462355, "grad_norm": 0.23424102365970612, "learning_rate": 1e-05, "loss": 1.0, "step": 71710 }, { "epoch": 63.520814880425156, "grad_norm": 0.2296905219554901, "learning_rate": 1e-05, "loss": 0.9081, "step": 71715 }, { "epoch": 63.52524357838795, "grad_norm": 0.2563684284687042, "learning_rate": 1e-05, "loss": 0.9178, "step": 71720 }, { "epoch": 63.52967227635075, "grad_norm": 0.26498961448669434, "learning_rate": 1e-05, "loss": 0.9388, "step": 71725 }, { "epoch": 63.53410097431355, "grad_norm": 0.2527771592140198, "learning_rate": 1e-05, "loss": 0.9901, "step": 71730 }, { "epoch": 63.538529672276354, "grad_norm": 0.30983543395996094, "learning_rate": 1e-05, "loss": 0.9556, "step": 71735 }, { "epoch": 63.54295837023915, "grad_norm": 0.2953641712665558, "learning_rate": 1e-05, "loss": 1.0072, "step": 71740 }, { "epoch": 63.54738706820195, "grad_norm": 0.2481699287891388, "learning_rate": 1e-05, "loss": 0.9598, "step": 71745 }, { "epoch": 63.55181576616475, "grad_norm": 0.23895703256130219, "learning_rate": 1e-05, "loss": 0.9357, "step": 71750 }, { "epoch": 63.556244464127545, "grad_norm": 0.24418756365776062, "learning_rate": 1e-05, "loss": 0.9091, "step": 71755 }, { "epoch": 63.560673162090346, "grad_norm": 0.24183018505573273, "learning_rate": 1e-05, "loss": 0.9526, "step": 71760 }, { "epoch": 63.56510186005315, "grad_norm": 0.23627054691314697, "learning_rate": 1e-05, "loss": 1.0355, "step": 71765 }, { "epoch": 63.56953055801594, "grad_norm": 0.2245316207408905, "learning_rate": 1e-05, "loss": 0.9839, "step": 71770 }, { "epoch": 63.57395925597874, "grad_norm": 0.23440031707286835, "learning_rate": 1e-05, "loss": 0.9382, "step": 71775 }, { "epoch": 63.578387953941544, "grad_norm": 0.2233452945947647, "learning_rate": 1e-05, "loss": 0.9233, "step": 71780 }, { "epoch": 63.58281665190434, "grad_norm": 0.28246229887008667, "learning_rate": 1e-05, "loss": 0.9751, "step": 71785 }, { "epoch": 63.58724534986714, "grad_norm": 0.25369954109191895, "learning_rate": 1e-05, "loss": 0.976, "step": 71790 }, { "epoch": 63.59167404782994, "grad_norm": 0.2215196043252945, "learning_rate": 1e-05, "loss": 0.9817, "step": 71795 }, { "epoch": 63.596102745792734, "grad_norm": 0.22083483636379242, "learning_rate": 1e-05, "loss": 0.9927, "step": 71800 }, { "epoch": 63.600531443755536, "grad_norm": 0.2275639921426773, "learning_rate": 1e-05, "loss": 0.9654, "step": 71805 }, { "epoch": 63.60496014171834, "grad_norm": 0.23594975471496582, "learning_rate": 1e-05, "loss": 1.0485, "step": 71810 }, { "epoch": 63.60938883968113, "grad_norm": 0.23736220598220825, "learning_rate": 1e-05, "loss": 0.9409, "step": 71815 }, { "epoch": 63.61381753764393, "grad_norm": 0.23253846168518066, "learning_rate": 1e-05, "loss": 1.0049, "step": 71820 }, { "epoch": 63.61824623560673, "grad_norm": 0.23370932042598724, "learning_rate": 1e-05, "loss": 1.0206, "step": 71825 }, { "epoch": 63.62267493356953, "grad_norm": 0.2638593912124634, "learning_rate": 1e-05, "loss": 0.9725, "step": 71830 }, { "epoch": 63.62710363153233, "grad_norm": 0.2509211301803589, "learning_rate": 1e-05, "loss": 0.9637, "step": 71835 }, { "epoch": 63.63153232949513, "grad_norm": 0.23171059787273407, "learning_rate": 1e-05, "loss": 0.9616, "step": 71840 }, { "epoch": 63.635961027457924, "grad_norm": 0.2203523814678192, "learning_rate": 1e-05, "loss": 0.9445, "step": 71845 }, { "epoch": 63.640389725420725, "grad_norm": 0.2221454530954361, "learning_rate": 1e-05, "loss": 0.9687, "step": 71850 }, { "epoch": 63.644818423383526, "grad_norm": 0.26503869891166687, "learning_rate": 1e-05, "loss": 0.9541, "step": 71855 }, { "epoch": 63.64924712134633, "grad_norm": 0.2604830265045166, "learning_rate": 1e-05, "loss": 0.9532, "step": 71860 }, { "epoch": 63.65367581930912, "grad_norm": 0.257392555475235, "learning_rate": 1e-05, "loss": 0.9799, "step": 71865 }, { "epoch": 63.65810451727192, "grad_norm": 0.257062166929245, "learning_rate": 1e-05, "loss": 0.9899, "step": 71870 }, { "epoch": 63.662533215234724, "grad_norm": 0.23192249238491058, "learning_rate": 1e-05, "loss": 0.972, "step": 71875 }, { "epoch": 63.66696191319752, "grad_norm": 0.2728661000728607, "learning_rate": 1e-05, "loss": 0.9644, "step": 71880 }, { "epoch": 63.67139061116032, "grad_norm": 0.2383316457271576, "learning_rate": 1e-05, "loss": 0.9784, "step": 71885 }, { "epoch": 63.67581930912312, "grad_norm": 0.24647721648216248, "learning_rate": 1e-05, "loss": 1.0405, "step": 71890 }, { "epoch": 63.680248007085915, "grad_norm": 0.18873251974582672, "learning_rate": 1e-05, "loss": 0.9424, "step": 71895 }, { "epoch": 63.684676705048716, "grad_norm": 0.20158159732818604, "learning_rate": 1e-05, "loss": 0.888, "step": 71900 }, { "epoch": 63.68910540301152, "grad_norm": 0.222474604845047, "learning_rate": 1e-05, "loss": 0.9592, "step": 71905 }, { "epoch": 63.69353410097431, "grad_norm": 0.21104910969734192, "learning_rate": 1e-05, "loss": 0.9631, "step": 71910 }, { "epoch": 63.69796279893711, "grad_norm": 0.23700547218322754, "learning_rate": 1e-05, "loss": 1.0, "step": 71915 }, { "epoch": 63.702391496899914, "grad_norm": 0.21648821234703064, "learning_rate": 1e-05, "loss": 0.9784, "step": 71920 }, { "epoch": 63.70682019486271, "grad_norm": 0.23354972898960114, "learning_rate": 1e-05, "loss": 0.9961, "step": 71925 }, { "epoch": 63.71124889282551, "grad_norm": 0.25457149744033813, "learning_rate": 1e-05, "loss": 0.925, "step": 71930 }, { "epoch": 63.71567759078831, "grad_norm": 0.25546082854270935, "learning_rate": 1e-05, "loss": 1.0027, "step": 71935 }, { "epoch": 63.720106288751104, "grad_norm": 0.24389851093292236, "learning_rate": 1e-05, "loss": 0.9539, "step": 71940 }, { "epoch": 63.724534986713905, "grad_norm": 0.27593424916267395, "learning_rate": 1e-05, "loss": 0.9628, "step": 71945 }, { "epoch": 63.72896368467671, "grad_norm": 0.23904871940612793, "learning_rate": 1e-05, "loss": 0.9848, "step": 71950 }, { "epoch": 63.7333923826395, "grad_norm": 0.2235032021999359, "learning_rate": 1e-05, "loss": 0.9591, "step": 71955 }, { "epoch": 63.7378210806023, "grad_norm": 0.2393193393945694, "learning_rate": 1e-05, "loss": 0.9876, "step": 71960 }, { "epoch": 63.7422497785651, "grad_norm": 0.2622387409210205, "learning_rate": 1e-05, "loss": 0.9406, "step": 71965 }, { "epoch": 63.7466784765279, "grad_norm": 0.23345951735973358, "learning_rate": 1e-05, "loss": 1.0003, "step": 71970 }, { "epoch": 63.7511071744907, "grad_norm": 0.28554511070251465, "learning_rate": 1e-05, "loss": 0.9454, "step": 71975 }, { "epoch": 63.7555358724535, "grad_norm": 0.33774247765541077, "learning_rate": 1e-05, "loss": 0.9396, "step": 71980 }, { "epoch": 63.7599645704163, "grad_norm": 0.23593991994857788, "learning_rate": 1e-05, "loss": 0.976, "step": 71985 }, { "epoch": 63.764393268379095, "grad_norm": 0.26405423879623413, "learning_rate": 1e-05, "loss": 0.9733, "step": 71990 }, { "epoch": 63.768821966341896, "grad_norm": 0.26620954275131226, "learning_rate": 1e-05, "loss": 0.9697, "step": 71995 }, { "epoch": 63.7732506643047, "grad_norm": 0.25992855429649353, "learning_rate": 1e-05, "loss": 0.9838, "step": 72000 }, { "epoch": 63.77767936226749, "grad_norm": 0.27696675062179565, "learning_rate": 1e-05, "loss": 0.9805, "step": 72005 }, { "epoch": 63.78210806023029, "grad_norm": 0.263491153717041, "learning_rate": 1e-05, "loss": 0.9693, "step": 72010 }, { "epoch": 63.786536758193094, "grad_norm": 0.26416218280792236, "learning_rate": 1e-05, "loss": 0.9667, "step": 72015 }, { "epoch": 63.79096545615589, "grad_norm": 0.23178862035274506, "learning_rate": 1e-05, "loss": 0.9725, "step": 72020 }, { "epoch": 63.79539415411869, "grad_norm": 0.23109832406044006, "learning_rate": 1e-05, "loss": 0.9981, "step": 72025 }, { "epoch": 63.79982285208149, "grad_norm": 0.2070980817079544, "learning_rate": 1e-05, "loss": 0.9979, "step": 72030 }, { "epoch": 63.804251550044285, "grad_norm": 0.2491668462753296, "learning_rate": 1e-05, "loss": 1.0059, "step": 72035 }, { "epoch": 63.808680248007086, "grad_norm": 0.24766898155212402, "learning_rate": 1e-05, "loss": 0.9443, "step": 72040 }, { "epoch": 63.81310894596989, "grad_norm": 0.21834276616573334, "learning_rate": 1e-05, "loss": 0.9083, "step": 72045 }, { "epoch": 63.81753764393268, "grad_norm": 0.24240577220916748, "learning_rate": 1e-05, "loss": 1.0007, "step": 72050 }, { "epoch": 63.82196634189548, "grad_norm": 0.28192341327667236, "learning_rate": 1e-05, "loss": 1.0055, "step": 72055 }, { "epoch": 63.826395039858284, "grad_norm": 0.2615720331668854, "learning_rate": 1e-05, "loss": 0.9601, "step": 72060 }, { "epoch": 63.83082373782108, "grad_norm": 0.28793010115623474, "learning_rate": 1e-05, "loss": 0.992, "step": 72065 }, { "epoch": 63.83525243578388, "grad_norm": 0.27413439750671387, "learning_rate": 1e-05, "loss": 0.9249, "step": 72070 }, { "epoch": 63.83968113374668, "grad_norm": 0.25113892555236816, "learning_rate": 1e-05, "loss": 1.0043, "step": 72075 }, { "epoch": 63.844109831709474, "grad_norm": 0.21922902762889862, "learning_rate": 1e-05, "loss": 0.9845, "step": 72080 }, { "epoch": 63.848538529672275, "grad_norm": 0.23861823976039886, "learning_rate": 1e-05, "loss": 0.979, "step": 72085 }, { "epoch": 63.85296722763508, "grad_norm": 0.2628633677959442, "learning_rate": 1e-05, "loss": 0.9684, "step": 72090 }, { "epoch": 63.85739592559787, "grad_norm": 0.20280368626117706, "learning_rate": 1e-05, "loss": 0.9009, "step": 72095 }, { "epoch": 63.86182462356067, "grad_norm": 0.22723224759101868, "learning_rate": 1e-05, "loss": 0.9968, "step": 72100 }, { "epoch": 63.86625332152347, "grad_norm": 0.2559661269187927, "learning_rate": 1e-05, "loss": 0.9829, "step": 72105 }, { "epoch": 63.870682019486274, "grad_norm": 0.22751013934612274, "learning_rate": 1e-05, "loss": 0.9013, "step": 72110 }, { "epoch": 63.87511071744907, "grad_norm": 0.2760406732559204, "learning_rate": 1e-05, "loss": 0.9781, "step": 72115 }, { "epoch": 63.87953941541187, "grad_norm": 0.23910555243492126, "learning_rate": 1e-05, "loss": 0.9879, "step": 72120 }, { "epoch": 63.88396811337467, "grad_norm": 0.2394097000360489, "learning_rate": 1e-05, "loss": 0.9257, "step": 72125 }, { "epoch": 63.888396811337465, "grad_norm": 0.2322576940059662, "learning_rate": 1e-05, "loss": 0.9606, "step": 72130 }, { "epoch": 63.892825509300266, "grad_norm": 0.20410165190696716, "learning_rate": 1e-05, "loss": 0.9003, "step": 72135 }, { "epoch": 63.89725420726307, "grad_norm": 0.2347601056098938, "learning_rate": 1e-05, "loss": 0.978, "step": 72140 }, { "epoch": 63.90168290522586, "grad_norm": 0.256761759519577, "learning_rate": 1e-05, "loss": 0.9857, "step": 72145 }, { "epoch": 63.90611160318866, "grad_norm": 0.22375032305717468, "learning_rate": 1e-05, "loss": 0.9116, "step": 72150 }, { "epoch": 63.910540301151464, "grad_norm": 0.23194104433059692, "learning_rate": 1e-05, "loss": 0.9426, "step": 72155 }, { "epoch": 63.91496899911426, "grad_norm": 0.21748337149620056, "learning_rate": 1e-05, "loss": 0.9998, "step": 72160 }, { "epoch": 63.91939769707706, "grad_norm": 0.22062312066555023, "learning_rate": 1e-05, "loss": 1.0071, "step": 72165 }, { "epoch": 63.92382639503986, "grad_norm": 0.30419921875, "learning_rate": 1e-05, "loss": 0.9775, "step": 72170 }, { "epoch": 63.928255093002655, "grad_norm": 0.24396952986717224, "learning_rate": 1e-05, "loss": 1.013, "step": 72175 }, { "epoch": 63.932683790965456, "grad_norm": 0.2516191601753235, "learning_rate": 1e-05, "loss": 1.0284, "step": 72180 }, { "epoch": 63.93711248892826, "grad_norm": 0.23053854703903198, "learning_rate": 1e-05, "loss": 0.9303, "step": 72185 }, { "epoch": 63.94154118689105, "grad_norm": 0.2863028049468994, "learning_rate": 1e-05, "loss": 0.9894, "step": 72190 }, { "epoch": 63.94596988485385, "grad_norm": 0.20314890146255493, "learning_rate": 1e-05, "loss": 0.9841, "step": 72195 }, { "epoch": 63.95039858281665, "grad_norm": 0.20461708307266235, "learning_rate": 1e-05, "loss": 0.942, "step": 72200 }, { "epoch": 63.95482728077945, "grad_norm": 0.26836422085762024, "learning_rate": 1e-05, "loss": 0.9273, "step": 72205 }, { "epoch": 63.95925597874225, "grad_norm": 0.28519341349601746, "learning_rate": 1e-05, "loss": 0.9746, "step": 72210 }, { "epoch": 63.96368467670505, "grad_norm": 0.25519445538520813, "learning_rate": 1e-05, "loss": 0.9763, "step": 72215 }, { "epoch": 63.96811337466785, "grad_norm": 0.315518856048584, "learning_rate": 1e-05, "loss": 0.939, "step": 72220 }, { "epoch": 63.972542072630645, "grad_norm": 0.2512063980102539, "learning_rate": 1e-05, "loss": 1.0419, "step": 72225 }, { "epoch": 63.97697077059345, "grad_norm": 0.2222267985343933, "learning_rate": 1e-05, "loss": 0.9384, "step": 72230 }, { "epoch": 63.98139946855625, "grad_norm": 0.2543501555919647, "learning_rate": 1e-05, "loss": 0.967, "step": 72235 }, { "epoch": 63.98582816651904, "grad_norm": 0.2504511773586273, "learning_rate": 1e-05, "loss": 0.9453, "step": 72240 }, { "epoch": 63.99025686448184, "grad_norm": 0.2905332148075104, "learning_rate": 1e-05, "loss": 0.9329, "step": 72245 }, { "epoch": 63.994685562444644, "grad_norm": 0.2392033487558365, "learning_rate": 1e-05, "loss": 0.9746, "step": 72250 }, { "epoch": 63.99911426040744, "grad_norm": 0.26713433861732483, "learning_rate": 1e-05, "loss": 0.9958, "step": 72255 }, { "epoch": 64.00354295837023, "grad_norm": 0.2496492713689804, "learning_rate": 1e-05, "loss": 0.989, "step": 72260 }, { "epoch": 64.00797165633304, "grad_norm": 0.2492613047361374, "learning_rate": 1e-05, "loss": 0.9591, "step": 72265 }, { "epoch": 64.01240035429583, "grad_norm": 0.230791836977005, "learning_rate": 1e-05, "loss": 0.9902, "step": 72270 }, { "epoch": 64.01682905225863, "grad_norm": 0.21724149584770203, "learning_rate": 1e-05, "loss": 0.9689, "step": 72275 }, { "epoch": 64.02125775022144, "grad_norm": 0.21066518127918243, "learning_rate": 1e-05, "loss": 1.0012, "step": 72280 }, { "epoch": 64.02568644818423, "grad_norm": 0.2353580743074417, "learning_rate": 1e-05, "loss": 1.0026, "step": 72285 }, { "epoch": 64.03011514614704, "grad_norm": 0.2535284757614136, "learning_rate": 1e-05, "loss": 0.943, "step": 72290 }, { "epoch": 64.03454384410983, "grad_norm": 0.2701921761035919, "learning_rate": 1e-05, "loss": 0.9802, "step": 72295 }, { "epoch": 64.03897254207263, "grad_norm": 0.22650481760501862, "learning_rate": 1e-05, "loss": 0.9108, "step": 72300 }, { "epoch": 64.04340124003544, "grad_norm": 0.26875796914100647, "learning_rate": 1e-05, "loss": 0.9626, "step": 72305 }, { "epoch": 64.04782993799823, "grad_norm": 0.27226752042770386, "learning_rate": 1e-05, "loss": 0.9771, "step": 72310 }, { "epoch": 64.05225863596102, "grad_norm": 0.26001182198524475, "learning_rate": 1e-05, "loss": 0.9682, "step": 72315 }, { "epoch": 64.05668733392383, "grad_norm": 0.2839227318763733, "learning_rate": 1e-05, "loss": 0.9334, "step": 72320 }, { "epoch": 64.06111603188663, "grad_norm": 0.2377711981534958, "learning_rate": 1e-05, "loss": 1.0246, "step": 72325 }, { "epoch": 64.06554472984942, "grad_norm": 0.2790060341358185, "learning_rate": 1e-05, "loss": 0.9777, "step": 72330 }, { "epoch": 64.06997342781223, "grad_norm": 0.2965708076953888, "learning_rate": 1e-05, "loss": 0.9868, "step": 72335 }, { "epoch": 64.07440212577502, "grad_norm": 0.2182554006576538, "learning_rate": 1e-05, "loss": 0.9747, "step": 72340 }, { "epoch": 64.07883082373782, "grad_norm": 0.2384357750415802, "learning_rate": 1e-05, "loss": 0.9616, "step": 72345 }, { "epoch": 64.08325952170063, "grad_norm": 0.2502974569797516, "learning_rate": 1e-05, "loss": 0.9445, "step": 72350 }, { "epoch": 64.08768821966342, "grad_norm": 0.24464303255081177, "learning_rate": 1e-05, "loss": 0.949, "step": 72355 }, { "epoch": 64.09211691762621, "grad_norm": 0.3505728244781494, "learning_rate": 1e-05, "loss": 0.9657, "step": 72360 }, { "epoch": 64.09654561558902, "grad_norm": 0.2568131685256958, "learning_rate": 1e-05, "loss": 0.9977, "step": 72365 }, { "epoch": 64.10097431355182, "grad_norm": 0.26593470573425293, "learning_rate": 1e-05, "loss": 0.9631, "step": 72370 }, { "epoch": 64.10540301151461, "grad_norm": 0.24346861243247986, "learning_rate": 1e-05, "loss": 0.9453, "step": 72375 }, { "epoch": 64.10983170947742, "grad_norm": 0.22596806287765503, "learning_rate": 1e-05, "loss": 0.9459, "step": 72380 }, { "epoch": 64.11426040744021, "grad_norm": 0.2355467975139618, "learning_rate": 1e-05, "loss": 1.0159, "step": 72385 }, { "epoch": 64.118689105403, "grad_norm": 0.26663997769355774, "learning_rate": 1e-05, "loss": 0.8946, "step": 72390 }, { "epoch": 64.12311780336582, "grad_norm": 0.2563101053237915, "learning_rate": 1e-05, "loss": 0.9689, "step": 72395 }, { "epoch": 64.12754650132861, "grad_norm": 0.23266133666038513, "learning_rate": 1e-05, "loss": 0.9582, "step": 72400 }, { "epoch": 64.1319751992914, "grad_norm": 0.25268301367759705, "learning_rate": 1e-05, "loss": 0.9839, "step": 72405 }, { "epoch": 64.13640389725421, "grad_norm": 0.27930742502212524, "learning_rate": 1e-05, "loss": 0.8997, "step": 72410 }, { "epoch": 64.140832595217, "grad_norm": 0.2327995002269745, "learning_rate": 1e-05, "loss": 0.9659, "step": 72415 }, { "epoch": 64.1452612931798, "grad_norm": 0.24939550459384918, "learning_rate": 1e-05, "loss": 0.976, "step": 72420 }, { "epoch": 64.14968999114261, "grad_norm": 0.3613887131214142, "learning_rate": 1e-05, "loss": 0.9717, "step": 72425 }, { "epoch": 64.1541186891054, "grad_norm": 0.25087034702301025, "learning_rate": 1e-05, "loss": 0.946, "step": 72430 }, { "epoch": 64.1585473870682, "grad_norm": 0.2715052366256714, "learning_rate": 1e-05, "loss": 0.9826, "step": 72435 }, { "epoch": 64.162976085031, "grad_norm": 0.2986374795436859, "learning_rate": 1e-05, "loss": 0.966, "step": 72440 }, { "epoch": 64.1674047829938, "grad_norm": 0.3153024911880493, "learning_rate": 1e-05, "loss": 0.9326, "step": 72445 }, { "epoch": 64.1718334809566, "grad_norm": 0.2618449032306671, "learning_rate": 1e-05, "loss": 1.0004, "step": 72450 }, { "epoch": 64.1762621789194, "grad_norm": 0.24955107271671295, "learning_rate": 1e-05, "loss": 1.0088, "step": 72455 }, { "epoch": 64.1806908768822, "grad_norm": 0.267992228269577, "learning_rate": 1e-05, "loss": 0.9084, "step": 72460 }, { "epoch": 64.18511957484499, "grad_norm": 0.23598214983940125, "learning_rate": 1e-05, "loss": 0.9894, "step": 72465 }, { "epoch": 64.1895482728078, "grad_norm": 0.262320876121521, "learning_rate": 1e-05, "loss": 0.9876, "step": 72470 }, { "epoch": 64.19397697077059, "grad_norm": 0.20598241686820984, "learning_rate": 1e-05, "loss": 0.9991, "step": 72475 }, { "epoch": 64.19840566873339, "grad_norm": 0.28596749901771545, "learning_rate": 1e-05, "loss": 0.9373, "step": 72480 }, { "epoch": 64.2028343666962, "grad_norm": 0.2384020835161209, "learning_rate": 1e-05, "loss": 1.0016, "step": 72485 }, { "epoch": 64.20726306465899, "grad_norm": 0.27076131105422974, "learning_rate": 1e-05, "loss": 0.9986, "step": 72490 }, { "epoch": 64.21169176262178, "grad_norm": 0.22116747498512268, "learning_rate": 1e-05, "loss": 0.9567, "step": 72495 }, { "epoch": 64.21612046058459, "grad_norm": 0.3124556243419647, "learning_rate": 1e-05, "loss": 0.9923, "step": 72500 }, { "epoch": 64.22054915854739, "grad_norm": 0.2269141525030136, "learning_rate": 1e-05, "loss": 0.9782, "step": 72505 }, { "epoch": 64.22497785651018, "grad_norm": 0.22330693900585175, "learning_rate": 1e-05, "loss": 0.9604, "step": 72510 }, { "epoch": 64.22940655447299, "grad_norm": 0.2259770631790161, "learning_rate": 1e-05, "loss": 1.0324, "step": 72515 }, { "epoch": 64.23383525243578, "grad_norm": 0.2648800015449524, "learning_rate": 1e-05, "loss": 0.9722, "step": 72520 }, { "epoch": 64.23826395039858, "grad_norm": 0.22514502704143524, "learning_rate": 1e-05, "loss": 0.9761, "step": 72525 }, { "epoch": 64.24269264836138, "grad_norm": 0.22584038972854614, "learning_rate": 1e-05, "loss": 0.8964, "step": 72530 }, { "epoch": 64.24712134632418, "grad_norm": 0.21360959112644196, "learning_rate": 1e-05, "loss": 0.96, "step": 72535 }, { "epoch": 64.25155004428699, "grad_norm": 0.23636780679225922, "learning_rate": 1e-05, "loss": 0.9831, "step": 72540 }, { "epoch": 64.25597874224978, "grad_norm": 0.2663382291793823, "learning_rate": 1e-05, "loss": 0.965, "step": 72545 }, { "epoch": 64.26040744021257, "grad_norm": 0.3076880872249603, "learning_rate": 1e-05, "loss": 1.006, "step": 72550 }, { "epoch": 64.26483613817538, "grad_norm": 0.2816512882709503, "learning_rate": 1e-05, "loss": 0.9628, "step": 72555 }, { "epoch": 64.26926483613818, "grad_norm": 0.3166913688182831, "learning_rate": 1e-05, "loss": 0.9813, "step": 72560 }, { "epoch": 64.27369353410097, "grad_norm": 0.26130399107933044, "learning_rate": 1e-05, "loss": 0.9808, "step": 72565 }, { "epoch": 64.27812223206378, "grad_norm": 0.18850895762443542, "learning_rate": 1e-05, "loss": 1.0157, "step": 72570 }, { "epoch": 64.28255093002657, "grad_norm": 0.24359814822673798, "learning_rate": 1e-05, "loss": 0.9674, "step": 72575 }, { "epoch": 64.28697962798937, "grad_norm": 0.21340949833393097, "learning_rate": 1e-05, "loss": 0.9713, "step": 72580 }, { "epoch": 64.29140832595218, "grad_norm": 0.22033889591693878, "learning_rate": 1e-05, "loss": 0.9643, "step": 72585 }, { "epoch": 64.29583702391497, "grad_norm": 0.24071216583251953, "learning_rate": 1e-05, "loss": 0.9852, "step": 72590 }, { "epoch": 64.30026572187776, "grad_norm": 0.21995486319065094, "learning_rate": 1e-05, "loss": 0.9717, "step": 72595 }, { "epoch": 64.30469441984057, "grad_norm": 0.23339150846004486, "learning_rate": 1e-05, "loss": 0.9421, "step": 72600 }, { "epoch": 64.30912311780337, "grad_norm": 0.24846330285072327, "learning_rate": 1e-05, "loss": 0.9608, "step": 72605 }, { "epoch": 64.31355181576616, "grad_norm": 0.21609875559806824, "learning_rate": 1e-05, "loss": 0.9665, "step": 72610 }, { "epoch": 64.31798051372897, "grad_norm": 0.22578540444374084, "learning_rate": 1e-05, "loss": 1.004, "step": 72615 }, { "epoch": 64.32240921169176, "grad_norm": 0.2506599426269531, "learning_rate": 1e-05, "loss": 0.9192, "step": 72620 }, { "epoch": 64.32683790965456, "grad_norm": 0.23522557318210602, "learning_rate": 1e-05, "loss": 0.9675, "step": 72625 }, { "epoch": 64.33126660761737, "grad_norm": 0.2518937587738037, "learning_rate": 1e-05, "loss": 0.9713, "step": 72630 }, { "epoch": 64.33569530558016, "grad_norm": 0.2291484773159027, "learning_rate": 1e-05, "loss": 0.9332, "step": 72635 }, { "epoch": 64.34012400354295, "grad_norm": 0.2440386414527893, "learning_rate": 1e-05, "loss": 0.9997, "step": 72640 }, { "epoch": 64.34455270150576, "grad_norm": 0.24349361658096313, "learning_rate": 1e-05, "loss": 0.9824, "step": 72645 }, { "epoch": 64.34898139946856, "grad_norm": 0.240842804312706, "learning_rate": 1e-05, "loss": 1.0274, "step": 72650 }, { "epoch": 64.35341009743135, "grad_norm": 0.235815167427063, "learning_rate": 1e-05, "loss": 0.9364, "step": 72655 }, { "epoch": 64.35783879539416, "grad_norm": 0.27491965889930725, "learning_rate": 1e-05, "loss": 0.9871, "step": 72660 }, { "epoch": 64.36226749335695, "grad_norm": 0.22957608103752136, "learning_rate": 1e-05, "loss": 1.0316, "step": 72665 }, { "epoch": 64.36669619131975, "grad_norm": 0.2438986599445343, "learning_rate": 1e-05, "loss": 0.9702, "step": 72670 }, { "epoch": 64.37112488928256, "grad_norm": 0.2598194479942322, "learning_rate": 1e-05, "loss": 1.0219, "step": 72675 }, { "epoch": 64.37555358724535, "grad_norm": 0.2896093428134918, "learning_rate": 1e-05, "loss": 0.9326, "step": 72680 }, { "epoch": 64.37998228520814, "grad_norm": 0.2230769246816635, "learning_rate": 1e-05, "loss": 0.9096, "step": 72685 }, { "epoch": 64.38441098317095, "grad_norm": 0.24404607713222504, "learning_rate": 1e-05, "loss": 0.9682, "step": 72690 }, { "epoch": 64.38883968113375, "grad_norm": 0.2649652659893036, "learning_rate": 1e-05, "loss": 0.9389, "step": 72695 }, { "epoch": 64.39326837909654, "grad_norm": 0.25047820806503296, "learning_rate": 1e-05, "loss": 0.9816, "step": 72700 }, { "epoch": 64.39769707705935, "grad_norm": 0.2452995777130127, "learning_rate": 1e-05, "loss": 0.897, "step": 72705 }, { "epoch": 64.40212577502214, "grad_norm": 0.22855930030345917, "learning_rate": 1e-05, "loss": 1.0535, "step": 72710 }, { "epoch": 64.40655447298494, "grad_norm": 0.21578523516654968, "learning_rate": 1e-05, "loss": 0.9146, "step": 72715 }, { "epoch": 64.41098317094774, "grad_norm": 0.20851679146289825, "learning_rate": 1e-05, "loss": 0.965, "step": 72720 }, { "epoch": 64.41541186891054, "grad_norm": 0.22085556387901306, "learning_rate": 1e-05, "loss": 0.9993, "step": 72725 }, { "epoch": 64.41984056687333, "grad_norm": 0.24598106741905212, "learning_rate": 1e-05, "loss": 0.9542, "step": 72730 }, { "epoch": 64.42426926483614, "grad_norm": 0.2659984230995178, "learning_rate": 1e-05, "loss": 0.989, "step": 72735 }, { "epoch": 64.42869796279894, "grad_norm": 0.20612941682338715, "learning_rate": 1e-05, "loss": 0.9758, "step": 72740 }, { "epoch": 64.43312666076173, "grad_norm": 0.21529501676559448, "learning_rate": 1e-05, "loss": 0.9491, "step": 72745 }, { "epoch": 64.43755535872454, "grad_norm": 0.263980507850647, "learning_rate": 1e-05, "loss": 0.9571, "step": 72750 }, { "epoch": 64.44198405668733, "grad_norm": 0.22312942147254944, "learning_rate": 1e-05, "loss": 0.9332, "step": 72755 }, { "epoch": 64.44641275465013, "grad_norm": 0.24249732494354248, "learning_rate": 1e-05, "loss": 0.9604, "step": 72760 }, { "epoch": 64.45084145261293, "grad_norm": 0.21512573957443237, "learning_rate": 1e-05, "loss": 0.9388, "step": 72765 }, { "epoch": 64.45527015057573, "grad_norm": 0.23423995077610016, "learning_rate": 1e-05, "loss": 0.9375, "step": 72770 }, { "epoch": 64.45969884853854, "grad_norm": 0.26808351278305054, "learning_rate": 1e-05, "loss": 0.9727, "step": 72775 }, { "epoch": 64.46412754650133, "grad_norm": 0.2010992467403412, "learning_rate": 1e-05, "loss": 0.953, "step": 72780 }, { "epoch": 64.46855624446412, "grad_norm": 0.26097404956817627, "learning_rate": 1e-05, "loss": 0.9994, "step": 72785 }, { "epoch": 64.47298494242693, "grad_norm": 0.23877692222595215, "learning_rate": 1e-05, "loss": 1.0039, "step": 72790 }, { "epoch": 64.47741364038973, "grad_norm": 0.19634488224983215, "learning_rate": 1e-05, "loss": 1.0108, "step": 72795 }, { "epoch": 64.48184233835252, "grad_norm": 0.21636557579040527, "learning_rate": 1e-05, "loss": 0.945, "step": 72800 }, { "epoch": 64.48627103631533, "grad_norm": 0.2246290147304535, "learning_rate": 1e-05, "loss": 0.9996, "step": 72805 }, { "epoch": 64.49069973427812, "grad_norm": 0.22305701673030853, "learning_rate": 1e-05, "loss": 0.9703, "step": 72810 }, { "epoch": 64.49512843224092, "grad_norm": 0.20937050879001617, "learning_rate": 1e-05, "loss": 0.9275, "step": 72815 }, { "epoch": 64.49955713020373, "grad_norm": 0.22782689332962036, "learning_rate": 1e-05, "loss": 0.9864, "step": 72820 }, { "epoch": 64.50398582816652, "grad_norm": 0.24238668382167816, "learning_rate": 1e-05, "loss": 0.9293, "step": 72825 }, { "epoch": 64.50841452612931, "grad_norm": 0.2320430874824524, "learning_rate": 1e-05, "loss": 0.9327, "step": 72830 }, { "epoch": 64.51284322409212, "grad_norm": 0.31743624806404114, "learning_rate": 1e-05, "loss": 0.9435, "step": 72835 }, { "epoch": 64.51727192205492, "grad_norm": 0.216452956199646, "learning_rate": 1e-05, "loss": 0.9846, "step": 72840 }, { "epoch": 64.52170062001771, "grad_norm": 0.23494507372379303, "learning_rate": 1e-05, "loss": 0.9322, "step": 72845 }, { "epoch": 64.52612931798052, "grad_norm": 0.22299674153327942, "learning_rate": 1e-05, "loss": 0.9257, "step": 72850 }, { "epoch": 64.53055801594331, "grad_norm": 0.22777986526489258, "learning_rate": 1e-05, "loss": 0.9613, "step": 72855 }, { "epoch": 64.53498671390611, "grad_norm": 0.22655801475048065, "learning_rate": 1e-05, "loss": 0.9845, "step": 72860 }, { "epoch": 64.53941541186892, "grad_norm": 0.26625117659568787, "learning_rate": 1e-05, "loss": 0.9447, "step": 72865 }, { "epoch": 64.54384410983171, "grad_norm": 0.22187185287475586, "learning_rate": 1e-05, "loss": 0.9584, "step": 72870 }, { "epoch": 64.5482728077945, "grad_norm": 0.2565276324748993, "learning_rate": 1e-05, "loss": 0.9594, "step": 72875 }, { "epoch": 64.55270150575731, "grad_norm": 0.31812727451324463, "learning_rate": 1e-05, "loss": 0.9842, "step": 72880 }, { "epoch": 64.5571302037201, "grad_norm": 0.30237773060798645, "learning_rate": 1e-05, "loss": 1.0153, "step": 72885 }, { "epoch": 64.5615589016829, "grad_norm": 0.22413788735866547, "learning_rate": 1e-05, "loss": 0.9292, "step": 72890 }, { "epoch": 64.56598759964571, "grad_norm": 0.26362931728363037, "learning_rate": 1e-05, "loss": 0.9485, "step": 72895 }, { "epoch": 64.5704162976085, "grad_norm": 0.22944939136505127, "learning_rate": 1e-05, "loss": 0.9569, "step": 72900 }, { "epoch": 64.5748449955713, "grad_norm": 0.24276171624660492, "learning_rate": 1e-05, "loss": 0.9394, "step": 72905 }, { "epoch": 64.5792736935341, "grad_norm": 0.2938569486141205, "learning_rate": 1e-05, "loss": 0.9798, "step": 72910 }, { "epoch": 64.5837023914969, "grad_norm": 0.2229621559381485, "learning_rate": 1e-05, "loss": 0.9339, "step": 72915 }, { "epoch": 64.5881310894597, "grad_norm": 0.22491954267024994, "learning_rate": 1e-05, "loss": 0.9963, "step": 72920 }, { "epoch": 64.5925597874225, "grad_norm": 0.2031039297580719, "learning_rate": 1e-05, "loss": 0.9383, "step": 72925 }, { "epoch": 64.5969884853853, "grad_norm": 0.23491178452968597, "learning_rate": 1e-05, "loss": 0.9905, "step": 72930 }, { "epoch": 64.60141718334809, "grad_norm": 0.278828501701355, "learning_rate": 1e-05, "loss": 0.9744, "step": 72935 }, { "epoch": 64.6058458813109, "grad_norm": 0.2112453579902649, "learning_rate": 1e-05, "loss": 0.9776, "step": 72940 }, { "epoch": 64.61027457927369, "grad_norm": 0.3014611601829529, "learning_rate": 1e-05, "loss": 0.9956, "step": 72945 }, { "epoch": 64.61470327723649, "grad_norm": 0.22272783517837524, "learning_rate": 1e-05, "loss": 0.9255, "step": 72950 }, { "epoch": 64.6191319751993, "grad_norm": 0.2961048185825348, "learning_rate": 1e-05, "loss": 0.9727, "step": 72955 }, { "epoch": 64.62356067316209, "grad_norm": 0.22155489027500153, "learning_rate": 1e-05, "loss": 0.8963, "step": 72960 }, { "epoch": 64.62798937112488, "grad_norm": 0.22372622787952423, "learning_rate": 1e-05, "loss": 0.9982, "step": 72965 }, { "epoch": 64.63241806908769, "grad_norm": 0.22023390233516693, "learning_rate": 1e-05, "loss": 0.9987, "step": 72970 }, { "epoch": 64.63684676705049, "grad_norm": 0.23097185790538788, "learning_rate": 1e-05, "loss": 0.9288, "step": 72975 }, { "epoch": 64.64127546501328, "grad_norm": 0.2689822316169739, "learning_rate": 1e-05, "loss": 0.9941, "step": 72980 }, { "epoch": 64.64570416297609, "grad_norm": 0.24470609426498413, "learning_rate": 1e-05, "loss": 0.9681, "step": 72985 }, { "epoch": 64.65013286093888, "grad_norm": 0.23677454888820648, "learning_rate": 1e-05, "loss": 0.9985, "step": 72990 }, { "epoch": 64.65456155890168, "grad_norm": 0.19466763734817505, "learning_rate": 1e-05, "loss": 0.9692, "step": 72995 }, { "epoch": 64.65899025686448, "grad_norm": 0.22303299605846405, "learning_rate": 1e-05, "loss": 0.9571, "step": 73000 }, { "epoch": 64.66341895482728, "grad_norm": 0.2066963165998459, "learning_rate": 1e-05, "loss": 0.9659, "step": 73005 }, { "epoch": 64.66784765279007, "grad_norm": 0.23120933771133423, "learning_rate": 1e-05, "loss": 0.9715, "step": 73010 }, { "epoch": 64.67227635075288, "grad_norm": 0.2797692120075226, "learning_rate": 1e-05, "loss": 0.9737, "step": 73015 }, { "epoch": 64.67670504871568, "grad_norm": 0.2526929974555969, "learning_rate": 1e-05, "loss": 1.0176, "step": 73020 }, { "epoch": 64.68113374667848, "grad_norm": 0.2414504438638687, "learning_rate": 1e-05, "loss": 0.9449, "step": 73025 }, { "epoch": 64.68556244464128, "grad_norm": 0.23376959562301636, "learning_rate": 1e-05, "loss": 0.9615, "step": 73030 }, { "epoch": 64.68999114260407, "grad_norm": 0.2564552426338196, "learning_rate": 1e-05, "loss": 0.9964, "step": 73035 }, { "epoch": 64.69441984056688, "grad_norm": 0.2301609367132187, "learning_rate": 1e-05, "loss": 0.9814, "step": 73040 }, { "epoch": 64.69884853852967, "grad_norm": 0.22173574566841125, "learning_rate": 1e-05, "loss": 0.9831, "step": 73045 }, { "epoch": 64.70327723649247, "grad_norm": 0.25225400924682617, "learning_rate": 1e-05, "loss": 0.9503, "step": 73050 }, { "epoch": 64.70770593445528, "grad_norm": 0.21192489564418793, "learning_rate": 1e-05, "loss": 0.9436, "step": 73055 }, { "epoch": 64.71213463241807, "grad_norm": 0.26313936710357666, "learning_rate": 1e-05, "loss": 0.9481, "step": 73060 }, { "epoch": 64.71656333038086, "grad_norm": 0.22775939106941223, "learning_rate": 1e-05, "loss": 0.9778, "step": 73065 }, { "epoch": 64.72099202834367, "grad_norm": 0.22733718156814575, "learning_rate": 1e-05, "loss": 0.9383, "step": 73070 }, { "epoch": 64.72542072630647, "grad_norm": 0.21024346351623535, "learning_rate": 1e-05, "loss": 0.9793, "step": 73075 }, { "epoch": 64.72984942426926, "grad_norm": 0.24041904509067535, "learning_rate": 1e-05, "loss": 0.9312, "step": 73080 }, { "epoch": 64.73427812223207, "grad_norm": 0.20395861566066742, "learning_rate": 1e-05, "loss": 0.9617, "step": 73085 }, { "epoch": 64.73870682019486, "grad_norm": 0.22118213772773743, "learning_rate": 1e-05, "loss": 0.96, "step": 73090 }, { "epoch": 64.74313551815766, "grad_norm": 0.21690507233142853, "learning_rate": 1e-05, "loss": 1.0005, "step": 73095 }, { "epoch": 64.74756421612047, "grad_norm": 0.2162257432937622, "learning_rate": 1e-05, "loss": 1.0369, "step": 73100 }, { "epoch": 64.75199291408326, "grad_norm": 0.20733825862407684, "learning_rate": 1e-05, "loss": 0.9485, "step": 73105 }, { "epoch": 64.75642161204605, "grad_norm": 0.24995101988315582, "learning_rate": 1e-05, "loss": 0.9611, "step": 73110 }, { "epoch": 64.76085031000886, "grad_norm": 0.23254866898059845, "learning_rate": 1e-05, "loss": 0.948, "step": 73115 }, { "epoch": 64.76527900797166, "grad_norm": 0.266143262386322, "learning_rate": 1e-05, "loss": 0.9638, "step": 73120 }, { "epoch": 64.76970770593445, "grad_norm": 0.2318686693906784, "learning_rate": 1e-05, "loss": 0.9609, "step": 73125 }, { "epoch": 64.77413640389726, "grad_norm": 0.20897050201892853, "learning_rate": 1e-05, "loss": 0.9629, "step": 73130 }, { "epoch": 64.77856510186005, "grad_norm": 0.3021281957626343, "learning_rate": 1e-05, "loss": 0.9504, "step": 73135 }, { "epoch": 64.78299379982285, "grad_norm": 0.24280782043933868, "learning_rate": 1e-05, "loss": 0.9482, "step": 73140 }, { "epoch": 64.78742249778566, "grad_norm": 0.2503491938114166, "learning_rate": 1e-05, "loss": 0.9623, "step": 73145 }, { "epoch": 64.79185119574845, "grad_norm": 0.21739760041236877, "learning_rate": 1e-05, "loss": 0.9564, "step": 73150 }, { "epoch": 64.79627989371124, "grad_norm": 0.2679872214794159, "learning_rate": 1e-05, "loss": 0.967, "step": 73155 }, { "epoch": 64.80070859167405, "grad_norm": 0.2514238953590393, "learning_rate": 1e-05, "loss": 0.9681, "step": 73160 }, { "epoch": 64.80513728963685, "grad_norm": 0.24457912147045135, "learning_rate": 1e-05, "loss": 0.973, "step": 73165 }, { "epoch": 64.80956598759964, "grad_norm": 0.23919068276882172, "learning_rate": 1e-05, "loss": 0.9744, "step": 73170 }, { "epoch": 64.81399468556245, "grad_norm": 0.24494221806526184, "learning_rate": 1e-05, "loss": 0.9423, "step": 73175 }, { "epoch": 64.81842338352524, "grad_norm": 0.258117139339447, "learning_rate": 1e-05, "loss": 0.9986, "step": 73180 }, { "epoch": 64.82285208148804, "grad_norm": 0.23422940075397491, "learning_rate": 1e-05, "loss": 0.9259, "step": 73185 }, { "epoch": 64.82728077945085, "grad_norm": 0.2692304253578186, "learning_rate": 1e-05, "loss": 1.0308, "step": 73190 }, { "epoch": 64.83170947741364, "grad_norm": 0.23676486313343048, "learning_rate": 1e-05, "loss": 0.9493, "step": 73195 }, { "epoch": 64.83613817537643, "grad_norm": 0.2411409467458725, "learning_rate": 1e-05, "loss": 0.946, "step": 73200 }, { "epoch": 64.84056687333924, "grad_norm": 0.24172060191631317, "learning_rate": 1e-05, "loss": 0.9606, "step": 73205 }, { "epoch": 64.84499557130204, "grad_norm": 0.24388645589351654, "learning_rate": 1e-05, "loss": 0.9594, "step": 73210 }, { "epoch": 64.84942426926483, "grad_norm": 0.27366477251052856, "learning_rate": 1e-05, "loss": 0.9925, "step": 73215 }, { "epoch": 64.85385296722764, "grad_norm": 0.3080722391605377, "learning_rate": 1e-05, "loss": 0.9535, "step": 73220 }, { "epoch": 64.85828166519043, "grad_norm": 0.2512790858745575, "learning_rate": 1e-05, "loss": 0.9896, "step": 73225 }, { "epoch": 64.86271036315323, "grad_norm": 0.259541392326355, "learning_rate": 1e-05, "loss": 0.9484, "step": 73230 }, { "epoch": 64.86713906111603, "grad_norm": 0.3149668574333191, "learning_rate": 1e-05, "loss": 0.9524, "step": 73235 }, { "epoch": 64.87156775907883, "grad_norm": 0.24256470799446106, "learning_rate": 1e-05, "loss": 0.9163, "step": 73240 }, { "epoch": 64.87599645704162, "grad_norm": 0.2324226349592209, "learning_rate": 1e-05, "loss": 0.975, "step": 73245 }, { "epoch": 64.88042515500443, "grad_norm": 0.2165728211402893, "learning_rate": 1e-05, "loss": 0.9296, "step": 73250 }, { "epoch": 64.88485385296723, "grad_norm": 0.21871446073055267, "learning_rate": 1e-05, "loss": 0.9801, "step": 73255 }, { "epoch": 64.88928255093003, "grad_norm": 0.21416890621185303, "learning_rate": 1e-05, "loss": 1.0059, "step": 73260 }, { "epoch": 64.89371124889283, "grad_norm": 0.2610345780849457, "learning_rate": 1e-05, "loss": 1.0018, "step": 73265 }, { "epoch": 64.89813994685562, "grad_norm": 0.23710957169532776, "learning_rate": 1e-05, "loss": 0.9979, "step": 73270 }, { "epoch": 64.90256864481843, "grad_norm": 0.2321448177099228, "learning_rate": 1e-05, "loss": 0.9541, "step": 73275 }, { "epoch": 64.90699734278122, "grad_norm": 0.23969289660453796, "learning_rate": 1e-05, "loss": 0.9682, "step": 73280 }, { "epoch": 64.91142604074402, "grad_norm": 0.23932261765003204, "learning_rate": 1e-05, "loss": 0.9862, "step": 73285 }, { "epoch": 64.91585473870683, "grad_norm": 0.24695177376270294, "learning_rate": 1e-05, "loss": 0.9254, "step": 73290 }, { "epoch": 64.92028343666962, "grad_norm": 0.21476314961910248, "learning_rate": 1e-05, "loss": 0.9757, "step": 73295 }, { "epoch": 64.92471213463241, "grad_norm": 0.24135638773441315, "learning_rate": 1e-05, "loss": 0.9732, "step": 73300 }, { "epoch": 64.92914083259522, "grad_norm": 0.2539912164211273, "learning_rate": 1e-05, "loss": 0.9439, "step": 73305 }, { "epoch": 64.93356953055802, "grad_norm": 0.2453600913286209, "learning_rate": 1e-05, "loss": 0.9776, "step": 73310 }, { "epoch": 64.93799822852081, "grad_norm": 0.2225649058818817, "learning_rate": 1e-05, "loss": 0.9531, "step": 73315 }, { "epoch": 64.94242692648362, "grad_norm": 0.21839207410812378, "learning_rate": 1e-05, "loss": 0.9125, "step": 73320 }, { "epoch": 64.94685562444641, "grad_norm": 0.2366826832294464, "learning_rate": 1e-05, "loss": 0.9981, "step": 73325 }, { "epoch": 64.95128432240921, "grad_norm": 0.22882409393787384, "learning_rate": 1e-05, "loss": 0.9701, "step": 73330 }, { "epoch": 64.95571302037202, "grad_norm": 0.21799173951148987, "learning_rate": 1e-05, "loss": 0.9873, "step": 73335 }, { "epoch": 64.96014171833481, "grad_norm": 0.23177845776081085, "learning_rate": 1e-05, "loss": 0.9974, "step": 73340 }, { "epoch": 64.9645704162976, "grad_norm": 0.20961230993270874, "learning_rate": 1e-05, "loss": 1.0125, "step": 73345 }, { "epoch": 64.96899911426041, "grad_norm": 0.27216798067092896, "learning_rate": 1e-05, "loss": 1.0064, "step": 73350 }, { "epoch": 64.9734278122232, "grad_norm": 0.20005322992801666, "learning_rate": 1e-05, "loss": 1.0102, "step": 73355 }, { "epoch": 64.977856510186, "grad_norm": 0.22230158746242523, "learning_rate": 1e-05, "loss": 1.048, "step": 73360 }, { "epoch": 64.98228520814881, "grad_norm": 0.2673405706882477, "learning_rate": 1e-05, "loss": 0.9683, "step": 73365 }, { "epoch": 64.9867139061116, "grad_norm": 0.2241121232509613, "learning_rate": 1e-05, "loss": 0.9262, "step": 73370 }, { "epoch": 64.9911426040744, "grad_norm": 0.2412911057472229, "learning_rate": 1e-05, "loss": 0.9884, "step": 73375 }, { "epoch": 64.9955713020372, "grad_norm": 0.2260071337223053, "learning_rate": 1e-05, "loss": 0.9505, "step": 73380 }, { "epoch": 65.0, "grad_norm": 0.2273583561182022, "learning_rate": 1e-05, "loss": 0.9914, "step": 73385 }, { "epoch": 65.0044286979628, "grad_norm": 0.2455538660287857, "learning_rate": 1e-05, "loss": 1.0013, "step": 73390 }, { "epoch": 65.0088573959256, "grad_norm": 0.30271026492118835, "learning_rate": 1e-05, "loss": 0.9545, "step": 73395 }, { "epoch": 65.0132860938884, "grad_norm": 0.2328118085861206, "learning_rate": 1e-05, "loss": 0.9367, "step": 73400 }, { "epoch": 65.01771479185119, "grad_norm": 0.245832160115242, "learning_rate": 1e-05, "loss": 0.9675, "step": 73405 }, { "epoch": 65.022143489814, "grad_norm": 0.26279518008232117, "learning_rate": 1e-05, "loss": 0.965, "step": 73410 }, { "epoch": 65.0265721877768, "grad_norm": 0.2553558051586151, "learning_rate": 1e-05, "loss": 0.9833, "step": 73415 }, { "epoch": 65.03100088573959, "grad_norm": 0.2141643613576889, "learning_rate": 1e-05, "loss": 0.9599, "step": 73420 }, { "epoch": 65.0354295837024, "grad_norm": 0.23492883145809174, "learning_rate": 1e-05, "loss": 0.9531, "step": 73425 }, { "epoch": 65.03985828166519, "grad_norm": 0.2577956020832062, "learning_rate": 1e-05, "loss": 0.9781, "step": 73430 }, { "epoch": 65.04428697962798, "grad_norm": 0.21198050677776337, "learning_rate": 1e-05, "loss": 0.912, "step": 73435 }, { "epoch": 65.04871567759079, "grad_norm": 0.24318081140518188, "learning_rate": 1e-05, "loss": 0.9395, "step": 73440 }, { "epoch": 65.05314437555359, "grad_norm": 0.25270235538482666, "learning_rate": 1e-05, "loss": 0.9558, "step": 73445 }, { "epoch": 65.05757307351638, "grad_norm": 0.23072810471057892, "learning_rate": 1e-05, "loss": 1.0103, "step": 73450 }, { "epoch": 65.06200177147919, "grad_norm": 0.24660111963748932, "learning_rate": 1e-05, "loss": 0.9544, "step": 73455 }, { "epoch": 65.06643046944198, "grad_norm": 0.2252207100391388, "learning_rate": 1e-05, "loss": 1.0153, "step": 73460 }, { "epoch": 65.07085916740478, "grad_norm": 0.25394079089164734, "learning_rate": 1e-05, "loss": 0.9381, "step": 73465 }, { "epoch": 65.07528786536759, "grad_norm": 0.25519198179244995, "learning_rate": 1e-05, "loss": 0.9623, "step": 73470 }, { "epoch": 65.07971656333038, "grad_norm": 0.23888011276721954, "learning_rate": 1e-05, "loss": 0.9708, "step": 73475 }, { "epoch": 65.08414526129317, "grad_norm": 0.2619520425796509, "learning_rate": 1e-05, "loss": 0.9342, "step": 73480 }, { "epoch": 65.08857395925598, "grad_norm": 0.25942203402519226, "learning_rate": 1e-05, "loss": 0.9764, "step": 73485 }, { "epoch": 65.09300265721878, "grad_norm": 0.24151575565338135, "learning_rate": 1e-05, "loss": 0.9224, "step": 73490 }, { "epoch": 65.09743135518157, "grad_norm": 0.23074884712696075, "learning_rate": 1e-05, "loss": 0.967, "step": 73495 }, { "epoch": 65.10186005314438, "grad_norm": 0.28484755754470825, "learning_rate": 1e-05, "loss": 0.9558, "step": 73500 }, { "epoch": 65.10628875110717, "grad_norm": 0.24937203526496887, "learning_rate": 1e-05, "loss": 0.9309, "step": 73505 }, { "epoch": 65.11071744906998, "grad_norm": 0.22056695818901062, "learning_rate": 1e-05, "loss": 0.9183, "step": 73510 }, { "epoch": 65.11514614703277, "grad_norm": 0.2412230223417282, "learning_rate": 1e-05, "loss": 0.9408, "step": 73515 }, { "epoch": 65.11957484499557, "grad_norm": 0.2385357767343521, "learning_rate": 1e-05, "loss": 0.956, "step": 73520 }, { "epoch": 65.12400354295838, "grad_norm": 0.226994127035141, "learning_rate": 1e-05, "loss": 0.9266, "step": 73525 }, { "epoch": 65.12843224092117, "grad_norm": 0.23409655690193176, "learning_rate": 1e-05, "loss": 0.9322, "step": 73530 }, { "epoch": 65.13286093888397, "grad_norm": 0.2716817557811737, "learning_rate": 1e-05, "loss": 0.9926, "step": 73535 }, { "epoch": 65.13728963684677, "grad_norm": 0.22187738120555878, "learning_rate": 1e-05, "loss": 0.9425, "step": 73540 }, { "epoch": 65.14171833480957, "grad_norm": 0.28643107414245605, "learning_rate": 1e-05, "loss": 0.9302, "step": 73545 }, { "epoch": 65.14614703277236, "grad_norm": 0.23184771835803986, "learning_rate": 1e-05, "loss": 0.9762, "step": 73550 }, { "epoch": 65.15057573073517, "grad_norm": 0.2321070432662964, "learning_rate": 1e-05, "loss": 0.954, "step": 73555 }, { "epoch": 65.15500442869796, "grad_norm": 0.23224906623363495, "learning_rate": 1e-05, "loss": 0.9378, "step": 73560 }, { "epoch": 65.15943312666076, "grad_norm": 0.22908441722393036, "learning_rate": 1e-05, "loss": 0.9871, "step": 73565 }, { "epoch": 65.16386182462357, "grad_norm": 0.22927556931972504, "learning_rate": 1e-05, "loss": 0.9872, "step": 73570 }, { "epoch": 65.16829052258636, "grad_norm": 0.26013466715812683, "learning_rate": 1e-05, "loss": 0.9679, "step": 73575 }, { "epoch": 65.17271922054915, "grad_norm": 0.2474551647901535, "learning_rate": 1e-05, "loss": 0.9668, "step": 73580 }, { "epoch": 65.17714791851196, "grad_norm": 0.23870502412319183, "learning_rate": 1e-05, "loss": 1.0293, "step": 73585 }, { "epoch": 65.18157661647476, "grad_norm": 0.24945245683193207, "learning_rate": 1e-05, "loss": 0.969, "step": 73590 }, { "epoch": 65.18600531443755, "grad_norm": 0.21831509470939636, "learning_rate": 1e-05, "loss": 0.9637, "step": 73595 }, { "epoch": 65.19043401240036, "grad_norm": 0.26080167293548584, "learning_rate": 1e-05, "loss": 0.9408, "step": 73600 }, { "epoch": 65.19486271036315, "grad_norm": 0.22626294195652008, "learning_rate": 1e-05, "loss": 0.9908, "step": 73605 }, { "epoch": 65.19929140832595, "grad_norm": 0.22210374474525452, "learning_rate": 1e-05, "loss": 0.9986, "step": 73610 }, { "epoch": 65.20372010628876, "grad_norm": 0.2490384578704834, "learning_rate": 1e-05, "loss": 0.9532, "step": 73615 }, { "epoch": 65.20814880425155, "grad_norm": 0.2724076509475708, "learning_rate": 1e-05, "loss": 0.9467, "step": 73620 }, { "epoch": 65.21257750221434, "grad_norm": 0.26132825016975403, "learning_rate": 1e-05, "loss": 1.0118, "step": 73625 }, { "epoch": 65.21700620017715, "grad_norm": 0.2411799132823944, "learning_rate": 1e-05, "loss": 0.9836, "step": 73630 }, { "epoch": 65.22143489813995, "grad_norm": 0.23282384872436523, "learning_rate": 1e-05, "loss": 1.0179, "step": 73635 }, { "epoch": 65.22586359610274, "grad_norm": 0.2657560110092163, "learning_rate": 1e-05, "loss": 0.9706, "step": 73640 }, { "epoch": 65.23029229406555, "grad_norm": 0.23267632722854614, "learning_rate": 1e-05, "loss": 0.966, "step": 73645 }, { "epoch": 65.23472099202834, "grad_norm": 0.2692844569683075, "learning_rate": 1e-05, "loss": 0.9741, "step": 73650 }, { "epoch": 65.23914968999114, "grad_norm": 0.2364145815372467, "learning_rate": 1e-05, "loss": 0.9551, "step": 73655 }, { "epoch": 65.24357838795395, "grad_norm": 0.24558281898498535, "learning_rate": 1e-05, "loss": 1.0302, "step": 73660 }, { "epoch": 65.24800708591674, "grad_norm": 0.23422978818416595, "learning_rate": 1e-05, "loss": 0.9419, "step": 73665 }, { "epoch": 65.25243578387953, "grad_norm": 0.2597558796405792, "learning_rate": 1e-05, "loss": 1.0274, "step": 73670 }, { "epoch": 65.25686448184234, "grad_norm": 0.22502420842647552, "learning_rate": 1e-05, "loss": 0.9418, "step": 73675 }, { "epoch": 65.26129317980514, "grad_norm": 0.30387818813323975, "learning_rate": 1e-05, "loss": 0.9852, "step": 73680 }, { "epoch": 65.26572187776793, "grad_norm": 0.2563590705394745, "learning_rate": 1e-05, "loss": 0.9991, "step": 73685 }, { "epoch": 65.27015057573074, "grad_norm": 0.22517463564872742, "learning_rate": 1e-05, "loss": 0.9267, "step": 73690 }, { "epoch": 65.27457927369353, "grad_norm": 0.210673525929451, "learning_rate": 1e-05, "loss": 1.0176, "step": 73695 }, { "epoch": 65.27900797165633, "grad_norm": 0.24069519340991974, "learning_rate": 1e-05, "loss": 1.0047, "step": 73700 }, { "epoch": 65.28343666961914, "grad_norm": 0.2506633996963501, "learning_rate": 1e-05, "loss": 0.957, "step": 73705 }, { "epoch": 65.28786536758193, "grad_norm": 0.22733008861541748, "learning_rate": 1e-05, "loss": 0.9927, "step": 73710 }, { "epoch": 65.29229406554472, "grad_norm": 0.21477723121643066, "learning_rate": 1e-05, "loss": 0.9636, "step": 73715 }, { "epoch": 65.29672276350753, "grad_norm": 0.23473884165287018, "learning_rate": 1e-05, "loss": 0.9283, "step": 73720 }, { "epoch": 65.30115146147033, "grad_norm": 0.23256225883960724, "learning_rate": 1e-05, "loss": 0.9832, "step": 73725 }, { "epoch": 65.30558015943312, "grad_norm": 0.2348213940858841, "learning_rate": 1e-05, "loss": 0.9415, "step": 73730 }, { "epoch": 65.31000885739593, "grad_norm": 0.2542742192745209, "learning_rate": 1e-05, "loss": 0.9891, "step": 73735 }, { "epoch": 65.31443755535872, "grad_norm": 0.24554532766342163, "learning_rate": 1e-05, "loss": 0.9693, "step": 73740 }, { "epoch": 65.31886625332152, "grad_norm": 0.24468572437763214, "learning_rate": 1e-05, "loss": 0.9672, "step": 73745 }, { "epoch": 65.32329495128432, "grad_norm": 0.24310672283172607, "learning_rate": 1e-05, "loss": 0.9382, "step": 73750 }, { "epoch": 65.32772364924712, "grad_norm": 0.24722982943058014, "learning_rate": 1e-05, "loss": 1.0166, "step": 73755 }, { "epoch": 65.33215234720993, "grad_norm": 0.23034626245498657, "learning_rate": 1e-05, "loss": 0.9934, "step": 73760 }, { "epoch": 65.33658104517272, "grad_norm": 0.23216694593429565, "learning_rate": 1e-05, "loss": 0.9195, "step": 73765 }, { "epoch": 65.34100974313552, "grad_norm": 0.22310815751552582, "learning_rate": 1e-05, "loss": 0.9149, "step": 73770 }, { "epoch": 65.34543844109832, "grad_norm": 0.246931791305542, "learning_rate": 1e-05, "loss": 0.9287, "step": 73775 }, { "epoch": 65.34986713906112, "grad_norm": 0.23285101354122162, "learning_rate": 1e-05, "loss": 0.9911, "step": 73780 }, { "epoch": 65.35429583702391, "grad_norm": 0.23956657946109772, "learning_rate": 1e-05, "loss": 0.9789, "step": 73785 }, { "epoch": 65.35872453498672, "grad_norm": 0.2419046014547348, "learning_rate": 1e-05, "loss": 0.935, "step": 73790 }, { "epoch": 65.36315323294951, "grad_norm": 0.23861533403396606, "learning_rate": 1e-05, "loss": 0.975, "step": 73795 }, { "epoch": 65.36758193091231, "grad_norm": 0.25837117433547974, "learning_rate": 1e-05, "loss": 0.9257, "step": 73800 }, { "epoch": 65.37201062887512, "grad_norm": 0.2363186776638031, "learning_rate": 1e-05, "loss": 1.0125, "step": 73805 }, { "epoch": 65.37643932683791, "grad_norm": 0.23103666305541992, "learning_rate": 1e-05, "loss": 0.9557, "step": 73810 }, { "epoch": 65.3808680248007, "grad_norm": 0.24365819990634918, "learning_rate": 1e-05, "loss": 0.9857, "step": 73815 }, { "epoch": 65.38529672276351, "grad_norm": 0.2451895922422409, "learning_rate": 1e-05, "loss": 0.9821, "step": 73820 }, { "epoch": 65.38972542072631, "grad_norm": 0.24540302157402039, "learning_rate": 1e-05, "loss": 0.9599, "step": 73825 }, { "epoch": 65.3941541186891, "grad_norm": 0.2697092890739441, "learning_rate": 1e-05, "loss": 0.9472, "step": 73830 }, { "epoch": 65.39858281665191, "grad_norm": 0.2869071364402771, "learning_rate": 1e-05, "loss": 0.9921, "step": 73835 }, { "epoch": 65.4030115146147, "grad_norm": 0.2370927929878235, "learning_rate": 1e-05, "loss": 0.9582, "step": 73840 }, { "epoch": 65.4074402125775, "grad_norm": 0.2766813635826111, "learning_rate": 1e-05, "loss": 0.9592, "step": 73845 }, { "epoch": 65.4118689105403, "grad_norm": 0.25103282928466797, "learning_rate": 1e-05, "loss": 0.9634, "step": 73850 }, { "epoch": 65.4162976085031, "grad_norm": 0.2520640194416046, "learning_rate": 1e-05, "loss": 0.9569, "step": 73855 }, { "epoch": 65.4207263064659, "grad_norm": 0.215667262673378, "learning_rate": 1e-05, "loss": 0.9327, "step": 73860 }, { "epoch": 65.4251550044287, "grad_norm": 0.22285738587379456, "learning_rate": 1e-05, "loss": 0.9207, "step": 73865 }, { "epoch": 65.4295837023915, "grad_norm": 0.2278720736503601, "learning_rate": 1e-05, "loss": 0.9381, "step": 73870 }, { "epoch": 65.43401240035429, "grad_norm": 0.24399523437023163, "learning_rate": 1e-05, "loss": 0.9504, "step": 73875 }, { "epoch": 65.4384410983171, "grad_norm": 0.23723246157169342, "learning_rate": 1e-05, "loss": 0.9589, "step": 73880 }, { "epoch": 65.4428697962799, "grad_norm": 0.2579454481601715, "learning_rate": 1e-05, "loss": 0.9582, "step": 73885 }, { "epoch": 65.44729849424269, "grad_norm": 0.27759817242622375, "learning_rate": 1e-05, "loss": 0.9647, "step": 73890 }, { "epoch": 65.4517271922055, "grad_norm": 0.24446061253547668, "learning_rate": 1e-05, "loss": 0.9855, "step": 73895 }, { "epoch": 65.45615589016829, "grad_norm": 0.26221561431884766, "learning_rate": 1e-05, "loss": 0.9069, "step": 73900 }, { "epoch": 65.46058458813108, "grad_norm": 0.2655339241027832, "learning_rate": 1e-05, "loss": 0.9917, "step": 73905 }, { "epoch": 65.46501328609389, "grad_norm": 0.25286513566970825, "learning_rate": 1e-05, "loss": 0.9602, "step": 73910 }, { "epoch": 65.46944198405669, "grad_norm": 0.22625558078289032, "learning_rate": 1e-05, "loss": 0.9544, "step": 73915 }, { "epoch": 65.47387068201948, "grad_norm": 0.24174721539020538, "learning_rate": 1e-05, "loss": 0.9634, "step": 73920 }, { "epoch": 65.47829937998229, "grad_norm": 0.2104085385799408, "learning_rate": 1e-05, "loss": 0.9286, "step": 73925 }, { "epoch": 65.48272807794508, "grad_norm": 0.21638138592243195, "learning_rate": 1e-05, "loss": 0.9652, "step": 73930 }, { "epoch": 65.48715677590788, "grad_norm": 0.23180367052555084, "learning_rate": 1e-05, "loss": 0.9265, "step": 73935 }, { "epoch": 65.49158547387069, "grad_norm": 0.29226547479629517, "learning_rate": 1e-05, "loss": 0.9475, "step": 73940 }, { "epoch": 65.49601417183348, "grad_norm": 0.2184530347585678, "learning_rate": 1e-05, "loss": 0.9738, "step": 73945 }, { "epoch": 65.50044286979627, "grad_norm": 0.24443991482257843, "learning_rate": 1e-05, "loss": 0.9893, "step": 73950 }, { "epoch": 65.50487156775908, "grad_norm": 0.31451648473739624, "learning_rate": 1e-05, "loss": 1.0302, "step": 73955 }, { "epoch": 65.50930026572188, "grad_norm": 0.2482128143310547, "learning_rate": 1e-05, "loss": 0.9328, "step": 73960 }, { "epoch": 65.51372896368467, "grad_norm": 0.22831083834171295, "learning_rate": 1e-05, "loss": 0.9727, "step": 73965 }, { "epoch": 65.51815766164748, "grad_norm": 0.27633482217788696, "learning_rate": 1e-05, "loss": 0.9472, "step": 73970 }, { "epoch": 65.52258635961027, "grad_norm": 0.250397652387619, "learning_rate": 1e-05, "loss": 0.9337, "step": 73975 }, { "epoch": 65.52701505757307, "grad_norm": 0.2273891717195511, "learning_rate": 1e-05, "loss": 1.0019, "step": 73980 }, { "epoch": 65.53144375553588, "grad_norm": 0.24531710147857666, "learning_rate": 1e-05, "loss": 0.9676, "step": 73985 }, { "epoch": 65.53587245349867, "grad_norm": 0.22834379971027374, "learning_rate": 1e-05, "loss": 0.9731, "step": 73990 }, { "epoch": 65.54030115146146, "grad_norm": 0.28772053122520447, "learning_rate": 1e-05, "loss": 0.9611, "step": 73995 }, { "epoch": 65.54472984942427, "grad_norm": 0.24808190762996674, "learning_rate": 1e-05, "loss": 0.9709, "step": 74000 }, { "epoch": 65.54915854738707, "grad_norm": 0.24969829618930817, "learning_rate": 1e-05, "loss": 0.9636, "step": 74005 }, { "epoch": 65.55358724534987, "grad_norm": 0.2515394985675812, "learning_rate": 1e-05, "loss": 0.9662, "step": 74010 }, { "epoch": 65.55801594331267, "grad_norm": 0.22127506136894226, "learning_rate": 1e-05, "loss": 0.9545, "step": 74015 }, { "epoch": 65.56244464127546, "grad_norm": 0.29235970973968506, "learning_rate": 1e-05, "loss": 0.9623, "step": 74020 }, { "epoch": 65.56687333923827, "grad_norm": 0.21909882128238678, "learning_rate": 1e-05, "loss": 0.9812, "step": 74025 }, { "epoch": 65.57130203720106, "grad_norm": 0.2424968034029007, "learning_rate": 1e-05, "loss": 0.976, "step": 74030 }, { "epoch": 65.57573073516386, "grad_norm": 0.2307959496974945, "learning_rate": 1e-05, "loss": 0.9379, "step": 74035 }, { "epoch": 65.58015943312667, "grad_norm": 0.2197333723306656, "learning_rate": 1e-05, "loss": 0.9278, "step": 74040 }, { "epoch": 65.58458813108946, "grad_norm": 0.2280236929655075, "learning_rate": 1e-05, "loss": 0.9951, "step": 74045 }, { "epoch": 65.58901682905226, "grad_norm": 0.3449203670024872, "learning_rate": 1e-05, "loss": 0.9372, "step": 74050 }, { "epoch": 65.59344552701506, "grad_norm": 0.25828278064727783, "learning_rate": 1e-05, "loss": 0.9331, "step": 74055 }, { "epoch": 65.59787422497786, "grad_norm": 0.29056501388549805, "learning_rate": 1e-05, "loss": 0.971, "step": 74060 }, { "epoch": 65.60230292294065, "grad_norm": 0.2404964417219162, "learning_rate": 1e-05, "loss": 0.9548, "step": 74065 }, { "epoch": 65.60673162090346, "grad_norm": 0.23660553991794586, "learning_rate": 1e-05, "loss": 0.9477, "step": 74070 }, { "epoch": 65.61116031886625, "grad_norm": 0.26986849308013916, "learning_rate": 1e-05, "loss": 0.9688, "step": 74075 }, { "epoch": 65.61558901682905, "grad_norm": 0.27582237124443054, "learning_rate": 1e-05, "loss": 0.9559, "step": 74080 }, { "epoch": 65.62001771479186, "grad_norm": 0.2464091181755066, "learning_rate": 1e-05, "loss": 1.0019, "step": 74085 }, { "epoch": 65.62444641275465, "grad_norm": 0.21993358433246613, "learning_rate": 1e-05, "loss": 0.9981, "step": 74090 }, { "epoch": 65.62887511071744, "grad_norm": 0.23220138251781464, "learning_rate": 1e-05, "loss": 0.996, "step": 74095 }, { "epoch": 65.63330380868025, "grad_norm": 0.1981947273015976, "learning_rate": 1e-05, "loss": 0.9869, "step": 74100 }, { "epoch": 65.63773250664305, "grad_norm": 0.28225255012512207, "learning_rate": 1e-05, "loss": 0.9905, "step": 74105 }, { "epoch": 65.64216120460584, "grad_norm": 0.2839025557041168, "learning_rate": 1e-05, "loss": 0.9334, "step": 74110 }, { "epoch": 65.64658990256865, "grad_norm": 0.2281114161014557, "learning_rate": 1e-05, "loss": 0.9902, "step": 74115 }, { "epoch": 65.65101860053144, "grad_norm": 0.2327207624912262, "learning_rate": 1e-05, "loss": 0.9658, "step": 74120 }, { "epoch": 65.65544729849424, "grad_norm": 0.20485705137252808, "learning_rate": 1e-05, "loss": 0.9642, "step": 74125 }, { "epoch": 65.65987599645705, "grad_norm": 0.21389590203762054, "learning_rate": 1e-05, "loss": 0.9568, "step": 74130 }, { "epoch": 65.66430469441984, "grad_norm": 0.25099921226501465, "learning_rate": 1e-05, "loss": 0.9269, "step": 74135 }, { "epoch": 65.66873339238263, "grad_norm": 0.27750277519226074, "learning_rate": 1e-05, "loss": 0.9565, "step": 74140 }, { "epoch": 65.67316209034544, "grad_norm": 0.25508829951286316, "learning_rate": 1e-05, "loss": 0.9565, "step": 74145 }, { "epoch": 65.67759078830824, "grad_norm": 0.24631275236606598, "learning_rate": 1e-05, "loss": 0.9971, "step": 74150 }, { "epoch": 65.68201948627103, "grad_norm": 0.23914340138435364, "learning_rate": 1e-05, "loss": 0.9648, "step": 74155 }, { "epoch": 65.68644818423384, "grad_norm": 0.24078847467899323, "learning_rate": 1e-05, "loss": 0.9694, "step": 74160 }, { "epoch": 65.69087688219663, "grad_norm": 0.22129502892494202, "learning_rate": 1e-05, "loss": 0.948, "step": 74165 }, { "epoch": 65.69530558015943, "grad_norm": 0.2491893470287323, "learning_rate": 1e-05, "loss": 0.9033, "step": 74170 }, { "epoch": 65.69973427812224, "grad_norm": 0.24785545468330383, "learning_rate": 1e-05, "loss": 0.9313, "step": 74175 }, { "epoch": 65.70416297608503, "grad_norm": 0.2541751265525818, "learning_rate": 1e-05, "loss": 0.9448, "step": 74180 }, { "epoch": 65.70859167404782, "grad_norm": 0.22128938138484955, "learning_rate": 1e-05, "loss": 0.9829, "step": 74185 }, { "epoch": 65.71302037201063, "grad_norm": 0.21123871207237244, "learning_rate": 1e-05, "loss": 0.9217, "step": 74190 }, { "epoch": 65.71744906997343, "grad_norm": 0.2514908015727997, "learning_rate": 1e-05, "loss": 0.9797, "step": 74195 }, { "epoch": 65.72187776793622, "grad_norm": 0.26354873180389404, "learning_rate": 1e-05, "loss": 0.9628, "step": 74200 }, { "epoch": 65.72630646589903, "grad_norm": 0.2506631016731262, "learning_rate": 1e-05, "loss": 0.9639, "step": 74205 }, { "epoch": 65.73073516386182, "grad_norm": 0.2458077222108841, "learning_rate": 1e-05, "loss": 0.9744, "step": 74210 }, { "epoch": 65.73516386182462, "grad_norm": 0.22851774096488953, "learning_rate": 1e-05, "loss": 0.9649, "step": 74215 }, { "epoch": 65.73959255978743, "grad_norm": 0.25505608320236206, "learning_rate": 1e-05, "loss": 0.9918, "step": 74220 }, { "epoch": 65.74402125775022, "grad_norm": 0.263139933347702, "learning_rate": 1e-05, "loss": 0.9638, "step": 74225 }, { "epoch": 65.74844995571301, "grad_norm": 0.20865462720394135, "learning_rate": 1e-05, "loss": 0.9865, "step": 74230 }, { "epoch": 65.75287865367582, "grad_norm": 0.21958093345165253, "learning_rate": 1e-05, "loss": 0.995, "step": 74235 }, { "epoch": 65.75730735163862, "grad_norm": 0.24629531800746918, "learning_rate": 1e-05, "loss": 0.9777, "step": 74240 }, { "epoch": 65.76173604960141, "grad_norm": 0.25311487913131714, "learning_rate": 1e-05, "loss": 0.9585, "step": 74245 }, { "epoch": 65.76616474756422, "grad_norm": 0.26450374722480774, "learning_rate": 1e-05, "loss": 0.943, "step": 74250 }, { "epoch": 65.77059344552701, "grad_norm": 0.273154616355896, "learning_rate": 1e-05, "loss": 0.9723, "step": 74255 }, { "epoch": 65.77502214348982, "grad_norm": 0.2843685448169708, "learning_rate": 1e-05, "loss": 0.9911, "step": 74260 }, { "epoch": 65.77945084145261, "grad_norm": 0.24004797637462616, "learning_rate": 1e-05, "loss": 0.9957, "step": 74265 }, { "epoch": 65.78387953941541, "grad_norm": 0.21119549870491028, "learning_rate": 1e-05, "loss": 0.9808, "step": 74270 }, { "epoch": 65.78830823737822, "grad_norm": 0.24425192177295685, "learning_rate": 1e-05, "loss": 1.0246, "step": 74275 }, { "epoch": 65.79273693534101, "grad_norm": 0.2254219800233841, "learning_rate": 1e-05, "loss": 0.9598, "step": 74280 }, { "epoch": 65.7971656333038, "grad_norm": 0.22379523515701294, "learning_rate": 1e-05, "loss": 0.9324, "step": 74285 }, { "epoch": 65.80159433126661, "grad_norm": 0.23435257375240326, "learning_rate": 1e-05, "loss": 0.962, "step": 74290 }, { "epoch": 65.80602302922941, "grad_norm": 0.2412867397069931, "learning_rate": 1e-05, "loss": 0.8612, "step": 74295 }, { "epoch": 65.8104517271922, "grad_norm": 0.25451773405075073, "learning_rate": 1e-05, "loss": 1.0113, "step": 74300 }, { "epoch": 65.81488042515501, "grad_norm": 0.24115265905857086, "learning_rate": 1e-05, "loss": 0.9739, "step": 74305 }, { "epoch": 65.8193091231178, "grad_norm": 0.21350601315498352, "learning_rate": 1e-05, "loss": 0.9875, "step": 74310 }, { "epoch": 65.8237378210806, "grad_norm": 0.22150246798992157, "learning_rate": 1e-05, "loss": 0.9304, "step": 74315 }, { "epoch": 65.8281665190434, "grad_norm": 0.22074013948440552, "learning_rate": 1e-05, "loss": 0.9157, "step": 74320 }, { "epoch": 65.8325952170062, "grad_norm": 0.30737680196762085, "learning_rate": 1e-05, "loss": 0.9397, "step": 74325 }, { "epoch": 65.837023914969, "grad_norm": 0.23562325537204742, "learning_rate": 1e-05, "loss": 0.929, "step": 74330 }, { "epoch": 65.8414526129318, "grad_norm": 0.1977197527885437, "learning_rate": 1e-05, "loss": 1.0242, "step": 74335 }, { "epoch": 65.8458813108946, "grad_norm": 0.23536312580108643, "learning_rate": 1e-05, "loss": 1.0081, "step": 74340 }, { "epoch": 65.85031000885739, "grad_norm": 0.2220110446214676, "learning_rate": 1e-05, "loss": 0.983, "step": 74345 }, { "epoch": 65.8547387068202, "grad_norm": 0.23775538802146912, "learning_rate": 1e-05, "loss": 0.9658, "step": 74350 }, { "epoch": 65.859167404783, "grad_norm": 0.24388448894023895, "learning_rate": 1e-05, "loss": 0.9963, "step": 74355 }, { "epoch": 65.86359610274579, "grad_norm": 0.23526248335838318, "learning_rate": 1e-05, "loss": 0.9882, "step": 74360 }, { "epoch": 65.8680248007086, "grad_norm": 0.23477354645729065, "learning_rate": 1e-05, "loss": 1.003, "step": 74365 }, { "epoch": 65.87245349867139, "grad_norm": 0.26307356357574463, "learning_rate": 1e-05, "loss": 0.9285, "step": 74370 }, { "epoch": 65.87688219663418, "grad_norm": 0.27177801728248596, "learning_rate": 1e-05, "loss": 0.9175, "step": 74375 }, { "epoch": 65.881310894597, "grad_norm": 0.23924562335014343, "learning_rate": 1e-05, "loss": 0.9584, "step": 74380 }, { "epoch": 65.88573959255979, "grad_norm": 0.25265198945999146, "learning_rate": 1e-05, "loss": 0.9224, "step": 74385 }, { "epoch": 65.89016829052258, "grad_norm": 0.273100346326828, "learning_rate": 1e-05, "loss": 0.9261, "step": 74390 }, { "epoch": 65.89459698848539, "grad_norm": 0.2339247465133667, "learning_rate": 1e-05, "loss": 0.976, "step": 74395 }, { "epoch": 65.89902568644818, "grad_norm": 0.22921106219291687, "learning_rate": 1e-05, "loss": 0.9735, "step": 74400 }, { "epoch": 65.90345438441098, "grad_norm": 0.22646678984165192, "learning_rate": 1e-05, "loss": 0.9842, "step": 74405 }, { "epoch": 65.90788308237379, "grad_norm": 0.25305068492889404, "learning_rate": 1e-05, "loss": 0.9381, "step": 74410 }, { "epoch": 65.91231178033658, "grad_norm": 0.2483273148536682, "learning_rate": 1e-05, "loss": 0.9475, "step": 74415 }, { "epoch": 65.91674047829937, "grad_norm": 0.22778117656707764, "learning_rate": 1e-05, "loss": 0.9813, "step": 74420 }, { "epoch": 65.92116917626218, "grad_norm": 0.2253522127866745, "learning_rate": 1e-05, "loss": 1.0101, "step": 74425 }, { "epoch": 65.92559787422498, "grad_norm": 0.1965600550174713, "learning_rate": 1e-05, "loss": 0.9455, "step": 74430 }, { "epoch": 65.93002657218777, "grad_norm": 0.2007874697446823, "learning_rate": 1e-05, "loss": 0.9573, "step": 74435 }, { "epoch": 65.93445527015058, "grad_norm": 0.22544962167739868, "learning_rate": 1e-05, "loss": 0.9901, "step": 74440 }, { "epoch": 65.93888396811337, "grad_norm": 0.2848958373069763, "learning_rate": 1e-05, "loss": 1.016, "step": 74445 }, { "epoch": 65.94331266607617, "grad_norm": 0.2699938714504242, "learning_rate": 1e-05, "loss": 0.9533, "step": 74450 }, { "epoch": 65.94774136403898, "grad_norm": 0.2770007252693176, "learning_rate": 1e-05, "loss": 0.9649, "step": 74455 }, { "epoch": 65.95217006200177, "grad_norm": 0.2443130612373352, "learning_rate": 1e-05, "loss": 0.9812, "step": 74460 }, { "epoch": 65.95659875996456, "grad_norm": 0.24658843874931335, "learning_rate": 1e-05, "loss": 0.9998, "step": 74465 }, { "epoch": 65.96102745792737, "grad_norm": 0.22709037363529205, "learning_rate": 1e-05, "loss": 0.9417, "step": 74470 }, { "epoch": 65.96545615589017, "grad_norm": 0.2640805244445801, "learning_rate": 1e-05, "loss": 0.9524, "step": 74475 }, { "epoch": 65.96988485385296, "grad_norm": 0.21873131394386292, "learning_rate": 1e-05, "loss": 0.9995, "step": 74480 }, { "epoch": 65.97431355181577, "grad_norm": 0.21051909029483795, "learning_rate": 1e-05, "loss": 0.9747, "step": 74485 }, { "epoch": 65.97874224977856, "grad_norm": 0.28755322098731995, "learning_rate": 1e-05, "loss": 0.9352, "step": 74490 }, { "epoch": 65.98317094774137, "grad_norm": 0.2537829279899597, "learning_rate": 1e-05, "loss": 0.9446, "step": 74495 }, { "epoch": 65.98759964570417, "grad_norm": 0.25761479139328003, "learning_rate": 1e-05, "loss": 0.9239, "step": 74500 }, { "epoch": 65.99202834366696, "grad_norm": 0.23359157145023346, "learning_rate": 1e-05, "loss": 0.9532, "step": 74505 }, { "epoch": 65.99645704162977, "grad_norm": 0.2516773045063019, "learning_rate": 1e-05, "loss": 0.9787, "step": 74510 }, { "epoch": 66.00088573959256, "grad_norm": 0.28383520245552063, "learning_rate": 1e-05, "loss": 0.9178, "step": 74515 }, { "epoch": 66.00531443755536, "grad_norm": 0.20141509175300598, "learning_rate": 1e-05, "loss": 0.9867, "step": 74520 }, { "epoch": 66.00974313551816, "grad_norm": 0.23093950748443604, "learning_rate": 1e-05, "loss": 0.9854, "step": 74525 }, { "epoch": 66.01417183348096, "grad_norm": 0.21200443804264069, "learning_rate": 1e-05, "loss": 0.9774, "step": 74530 }, { "epoch": 66.01860053144375, "grad_norm": 0.2544075846672058, "learning_rate": 1e-05, "loss": 0.9565, "step": 74535 }, { "epoch": 66.02302922940656, "grad_norm": 0.26948198676109314, "learning_rate": 1e-05, "loss": 0.9597, "step": 74540 }, { "epoch": 66.02745792736935, "grad_norm": 0.2253427356481552, "learning_rate": 1e-05, "loss": 1.0045, "step": 74545 }, { "epoch": 66.03188662533215, "grad_norm": 0.2516725957393646, "learning_rate": 1e-05, "loss": 0.9932, "step": 74550 }, { "epoch": 66.03631532329496, "grad_norm": 0.25191354751586914, "learning_rate": 1e-05, "loss": 0.9868, "step": 74555 }, { "epoch": 66.04074402125775, "grad_norm": 0.21623797714710236, "learning_rate": 1e-05, "loss": 0.9871, "step": 74560 }, { "epoch": 66.04517271922055, "grad_norm": 0.28452759981155396, "learning_rate": 1e-05, "loss": 0.9098, "step": 74565 }, { "epoch": 66.04960141718335, "grad_norm": 0.23730280995368958, "learning_rate": 1e-05, "loss": 0.9247, "step": 74570 }, { "epoch": 66.05403011514615, "grad_norm": 0.2643267810344696, "learning_rate": 1e-05, "loss": 0.931, "step": 74575 }, { "epoch": 66.05845881310894, "grad_norm": 0.2191995084285736, "learning_rate": 1e-05, "loss": 0.9153, "step": 74580 }, { "epoch": 66.06288751107175, "grad_norm": 0.24467483162879944, "learning_rate": 1e-05, "loss": 1.003, "step": 74585 }, { "epoch": 66.06731620903454, "grad_norm": 0.25363361835479736, "learning_rate": 1e-05, "loss": 0.9386, "step": 74590 }, { "epoch": 66.07174490699734, "grad_norm": 0.29141151905059814, "learning_rate": 1e-05, "loss": 0.9711, "step": 74595 }, { "epoch": 66.07617360496015, "grad_norm": 0.2630162537097931, "learning_rate": 1e-05, "loss": 0.9808, "step": 74600 }, { "epoch": 66.08060230292294, "grad_norm": 0.2733655869960785, "learning_rate": 1e-05, "loss": 1.013, "step": 74605 }, { "epoch": 66.08503100088573, "grad_norm": 0.2453538179397583, "learning_rate": 1e-05, "loss": 0.939, "step": 74610 }, { "epoch": 66.08945969884854, "grad_norm": 0.2916133999824524, "learning_rate": 1e-05, "loss": 0.9228, "step": 74615 }, { "epoch": 66.09388839681134, "grad_norm": 0.23237910866737366, "learning_rate": 1e-05, "loss": 0.9476, "step": 74620 }, { "epoch": 66.09831709477413, "grad_norm": 0.2668355703353882, "learning_rate": 1e-05, "loss": 0.9769, "step": 74625 }, { "epoch": 66.10274579273694, "grad_norm": 0.23668146133422852, "learning_rate": 1e-05, "loss": 0.9526, "step": 74630 }, { "epoch": 66.10717449069973, "grad_norm": 0.24643099308013916, "learning_rate": 1e-05, "loss": 0.9971, "step": 74635 }, { "epoch": 66.11160318866253, "grad_norm": 0.22631680965423584, "learning_rate": 1e-05, "loss": 0.9443, "step": 74640 }, { "epoch": 66.11603188662534, "grad_norm": 0.23542432487010956, "learning_rate": 1e-05, "loss": 0.9653, "step": 74645 }, { "epoch": 66.12046058458813, "grad_norm": 0.28409436345100403, "learning_rate": 1e-05, "loss": 0.9837, "step": 74650 }, { "epoch": 66.12488928255092, "grad_norm": 0.23772531747817993, "learning_rate": 1e-05, "loss": 0.9288, "step": 74655 }, { "epoch": 66.12931798051373, "grad_norm": 0.2425490915775299, "learning_rate": 1e-05, "loss": 0.977, "step": 74660 }, { "epoch": 66.13374667847653, "grad_norm": 0.26845383644104004, "learning_rate": 1e-05, "loss": 0.9841, "step": 74665 }, { "epoch": 66.13817537643932, "grad_norm": 0.22238360345363617, "learning_rate": 1e-05, "loss": 0.9609, "step": 74670 }, { "epoch": 66.14260407440213, "grad_norm": 0.2718292474746704, "learning_rate": 1e-05, "loss": 0.9723, "step": 74675 }, { "epoch": 66.14703277236492, "grad_norm": 0.25698161125183105, "learning_rate": 1e-05, "loss": 0.9493, "step": 74680 }, { "epoch": 66.15146147032772, "grad_norm": 0.2645103931427002, "learning_rate": 1e-05, "loss": 0.9856, "step": 74685 }, { "epoch": 66.15589016829053, "grad_norm": 0.2501746714115143, "learning_rate": 1e-05, "loss": 0.9416, "step": 74690 }, { "epoch": 66.16031886625332, "grad_norm": 0.23271210491657257, "learning_rate": 1e-05, "loss": 0.9254, "step": 74695 }, { "epoch": 66.16474756421611, "grad_norm": 0.22491124272346497, "learning_rate": 1e-05, "loss": 0.9497, "step": 74700 }, { "epoch": 66.16917626217892, "grad_norm": 0.2692507803440094, "learning_rate": 1e-05, "loss": 0.9468, "step": 74705 }, { "epoch": 66.17360496014172, "grad_norm": 0.22798876464366913, "learning_rate": 1e-05, "loss": 0.9402, "step": 74710 }, { "epoch": 66.17803365810451, "grad_norm": 0.2707555294036865, "learning_rate": 1e-05, "loss": 0.9853, "step": 74715 }, { "epoch": 66.18246235606732, "grad_norm": 0.2852475345134735, "learning_rate": 1e-05, "loss": 1.0018, "step": 74720 }, { "epoch": 66.18689105403011, "grad_norm": 0.24982333183288574, "learning_rate": 1e-05, "loss": 0.9301, "step": 74725 }, { "epoch": 66.1913197519929, "grad_norm": 0.23818860948085785, "learning_rate": 1e-05, "loss": 0.9561, "step": 74730 }, { "epoch": 66.19574844995572, "grad_norm": 0.24792850017547607, "learning_rate": 1e-05, "loss": 0.9352, "step": 74735 }, { "epoch": 66.20017714791851, "grad_norm": 0.26514294743537903, "learning_rate": 1e-05, "loss": 1.0094, "step": 74740 }, { "epoch": 66.20460584588132, "grad_norm": 0.28952962160110474, "learning_rate": 1e-05, "loss": 0.984, "step": 74745 }, { "epoch": 66.20903454384411, "grad_norm": 0.2227969616651535, "learning_rate": 1e-05, "loss": 0.9413, "step": 74750 }, { "epoch": 66.2134632418069, "grad_norm": 0.21819466352462769, "learning_rate": 1e-05, "loss": 0.9696, "step": 74755 }, { "epoch": 66.21789193976971, "grad_norm": 0.2691785991191864, "learning_rate": 1e-05, "loss": 0.9713, "step": 74760 }, { "epoch": 66.22232063773251, "grad_norm": 0.25031018257141113, "learning_rate": 1e-05, "loss": 0.9207, "step": 74765 }, { "epoch": 66.2267493356953, "grad_norm": 0.21965476870536804, "learning_rate": 1e-05, "loss": 0.9139, "step": 74770 }, { "epoch": 66.23117803365811, "grad_norm": 0.22157379984855652, "learning_rate": 1e-05, "loss": 0.9954, "step": 74775 }, { "epoch": 66.2356067316209, "grad_norm": 0.21819068491458893, "learning_rate": 1e-05, "loss": 0.9434, "step": 74780 }, { "epoch": 66.2400354295837, "grad_norm": 0.24777469038963318, "learning_rate": 1e-05, "loss": 0.9225, "step": 74785 }, { "epoch": 66.24446412754651, "grad_norm": 0.28822726011276245, "learning_rate": 1e-05, "loss": 0.9795, "step": 74790 }, { "epoch": 66.2488928255093, "grad_norm": 0.2769969701766968, "learning_rate": 1e-05, "loss": 0.9373, "step": 74795 }, { "epoch": 66.2533215234721, "grad_norm": 0.22472482919692993, "learning_rate": 1e-05, "loss": 0.9644, "step": 74800 }, { "epoch": 66.2577502214349, "grad_norm": 0.2421802580356598, "learning_rate": 1e-05, "loss": 0.9545, "step": 74805 }, { "epoch": 66.2621789193977, "grad_norm": 0.2716982364654541, "learning_rate": 1e-05, "loss": 0.9746, "step": 74810 }, { "epoch": 66.26660761736049, "grad_norm": 0.2902645170688629, "learning_rate": 1e-05, "loss": 0.9893, "step": 74815 }, { "epoch": 66.2710363153233, "grad_norm": 0.23928608000278473, "learning_rate": 1e-05, "loss": 0.9808, "step": 74820 }, { "epoch": 66.2754650132861, "grad_norm": 0.24107012152671814, "learning_rate": 1e-05, "loss": 0.936, "step": 74825 }, { "epoch": 66.27989371124889, "grad_norm": 0.23326340317726135, "learning_rate": 1e-05, "loss": 0.9546, "step": 74830 }, { "epoch": 66.2843224092117, "grad_norm": 0.22210094332695007, "learning_rate": 1e-05, "loss": 0.9364, "step": 74835 }, { "epoch": 66.28875110717449, "grad_norm": 0.2259248048067093, "learning_rate": 1e-05, "loss": 1.0066, "step": 74840 }, { "epoch": 66.29317980513729, "grad_norm": 0.2693815529346466, "learning_rate": 1e-05, "loss": 0.9968, "step": 74845 }, { "epoch": 66.2976085031001, "grad_norm": 0.2788546085357666, "learning_rate": 1e-05, "loss": 0.9632, "step": 74850 }, { "epoch": 66.30203720106289, "grad_norm": 0.22876811027526855, "learning_rate": 1e-05, "loss": 0.9631, "step": 74855 }, { "epoch": 66.30646589902568, "grad_norm": 0.24928165972232819, "learning_rate": 1e-05, "loss": 1.0002, "step": 74860 }, { "epoch": 66.31089459698849, "grad_norm": 0.2944789528846741, "learning_rate": 1e-05, "loss": 0.9789, "step": 74865 }, { "epoch": 66.31532329495128, "grad_norm": 0.2194368839263916, "learning_rate": 1e-05, "loss": 0.981, "step": 74870 }, { "epoch": 66.31975199291408, "grad_norm": 0.2220890074968338, "learning_rate": 1e-05, "loss": 0.9386, "step": 74875 }, { "epoch": 66.32418069087689, "grad_norm": 0.2729151248931885, "learning_rate": 1e-05, "loss": 0.9583, "step": 74880 }, { "epoch": 66.32860938883968, "grad_norm": 0.22160911560058594, "learning_rate": 1e-05, "loss": 0.9809, "step": 74885 }, { "epoch": 66.33303808680247, "grad_norm": 0.22314119338989258, "learning_rate": 1e-05, "loss": 0.984, "step": 74890 }, { "epoch": 66.33746678476528, "grad_norm": 0.22863273322582245, "learning_rate": 1e-05, "loss": 0.9601, "step": 74895 }, { "epoch": 66.34189548272808, "grad_norm": 0.24421074986457825, "learning_rate": 1e-05, "loss": 0.9435, "step": 74900 }, { "epoch": 66.34632418069087, "grad_norm": 0.22514158487319946, "learning_rate": 1e-05, "loss": 0.9362, "step": 74905 }, { "epoch": 66.35075287865368, "grad_norm": 0.2802341878414154, "learning_rate": 1e-05, "loss": 0.9735, "step": 74910 }, { "epoch": 66.35518157661647, "grad_norm": 0.24391338229179382, "learning_rate": 1e-05, "loss": 0.9087, "step": 74915 }, { "epoch": 66.35961027457927, "grad_norm": 0.2581331133842468, "learning_rate": 1e-05, "loss": 0.9386, "step": 74920 }, { "epoch": 66.36403897254208, "grad_norm": 0.2371310293674469, "learning_rate": 1e-05, "loss": 0.9125, "step": 74925 }, { "epoch": 66.36846767050487, "grad_norm": 0.27201002836227417, "learning_rate": 1e-05, "loss": 0.9729, "step": 74930 }, { "epoch": 66.37289636846766, "grad_norm": 0.2739558517932892, "learning_rate": 1e-05, "loss": 0.945, "step": 74935 }, { "epoch": 66.37732506643047, "grad_norm": 0.24453838169574738, "learning_rate": 1e-05, "loss": 1.0419, "step": 74940 }, { "epoch": 66.38175376439327, "grad_norm": 0.2607939839363098, "learning_rate": 1e-05, "loss": 0.9606, "step": 74945 }, { "epoch": 66.38618246235606, "grad_norm": 0.22902780771255493, "learning_rate": 1e-05, "loss": 0.9703, "step": 74950 }, { "epoch": 66.39061116031887, "grad_norm": 0.22381523251533508, "learning_rate": 1e-05, "loss": 0.9726, "step": 74955 }, { "epoch": 66.39503985828166, "grad_norm": 0.2658435106277466, "learning_rate": 1e-05, "loss": 0.9744, "step": 74960 }, { "epoch": 66.39946855624446, "grad_norm": 0.22766417264938354, "learning_rate": 1e-05, "loss": 0.956, "step": 74965 }, { "epoch": 66.40389725420727, "grad_norm": 0.21936286985874176, "learning_rate": 1e-05, "loss": 0.9357, "step": 74970 }, { "epoch": 66.40832595217006, "grad_norm": 0.24941982328891754, "learning_rate": 1e-05, "loss": 0.9784, "step": 74975 }, { "epoch": 66.41275465013285, "grad_norm": 0.25433406233787537, "learning_rate": 1e-05, "loss": 0.9564, "step": 74980 }, { "epoch": 66.41718334809566, "grad_norm": 0.23422107100486755, "learning_rate": 1e-05, "loss": 0.9696, "step": 74985 }, { "epoch": 66.42161204605846, "grad_norm": 0.2508363425731659, "learning_rate": 1e-05, "loss": 0.9505, "step": 74990 }, { "epoch": 66.42604074402126, "grad_norm": 0.2616310119628906, "learning_rate": 1e-05, "loss": 0.9079, "step": 74995 }, { "epoch": 66.43046944198406, "grad_norm": 0.2855851650238037, "learning_rate": 1e-05, "loss": 0.9739, "step": 75000 }, { "epoch": 66.43489813994685, "grad_norm": 0.2324899435043335, "learning_rate": 1e-05, "loss": 0.9614, "step": 75005 }, { "epoch": 66.43932683790966, "grad_norm": 0.2708640396595001, "learning_rate": 1e-05, "loss": 1.0039, "step": 75010 }, { "epoch": 66.44375553587246, "grad_norm": 0.24579563736915588, "learning_rate": 1e-05, "loss": 0.9873, "step": 75015 }, { "epoch": 66.44818423383525, "grad_norm": 0.23982743918895721, "learning_rate": 1e-05, "loss": 0.9569, "step": 75020 }, { "epoch": 66.45261293179806, "grad_norm": 0.2698413133621216, "learning_rate": 1e-05, "loss": 0.9602, "step": 75025 }, { "epoch": 66.45704162976085, "grad_norm": 0.2634207308292389, "learning_rate": 1e-05, "loss": 0.9238, "step": 75030 }, { "epoch": 66.46147032772365, "grad_norm": 0.2523522973060608, "learning_rate": 1e-05, "loss": 1.0056, "step": 75035 }, { "epoch": 66.46589902568645, "grad_norm": 0.20769092440605164, "learning_rate": 1e-05, "loss": 0.9858, "step": 75040 }, { "epoch": 66.47032772364925, "grad_norm": 0.23730620741844177, "learning_rate": 1e-05, "loss": 1.0083, "step": 75045 }, { "epoch": 66.47475642161204, "grad_norm": 0.2567179501056671, "learning_rate": 1e-05, "loss": 0.9293, "step": 75050 }, { "epoch": 66.47918511957485, "grad_norm": 0.23126822710037231, "learning_rate": 1e-05, "loss": 0.9622, "step": 75055 }, { "epoch": 66.48361381753764, "grad_norm": 0.23821425437927246, "learning_rate": 1e-05, "loss": 0.9931, "step": 75060 }, { "epoch": 66.48804251550044, "grad_norm": 0.25249359011650085, "learning_rate": 1e-05, "loss": 0.9027, "step": 75065 }, { "epoch": 66.49247121346325, "grad_norm": 0.28909602761268616, "learning_rate": 1e-05, "loss": 0.9654, "step": 75070 }, { "epoch": 66.49689991142604, "grad_norm": 0.2202627956867218, "learning_rate": 1e-05, "loss": 0.9607, "step": 75075 }, { "epoch": 66.50132860938884, "grad_norm": 0.24920712411403656, "learning_rate": 1e-05, "loss": 0.9829, "step": 75080 }, { "epoch": 66.50575730735164, "grad_norm": 0.22108668088912964, "learning_rate": 1e-05, "loss": 1.0182, "step": 75085 }, { "epoch": 66.51018600531444, "grad_norm": 0.26375913619995117, "learning_rate": 1e-05, "loss": 0.9043, "step": 75090 }, { "epoch": 66.51461470327723, "grad_norm": 0.266618013381958, "learning_rate": 1e-05, "loss": 0.9664, "step": 75095 }, { "epoch": 66.51904340124004, "grad_norm": 0.24958953261375427, "learning_rate": 1e-05, "loss": 0.9787, "step": 75100 }, { "epoch": 66.52347209920283, "grad_norm": 0.21478323638439178, "learning_rate": 1e-05, "loss": 0.971, "step": 75105 }, { "epoch": 66.52790079716563, "grad_norm": 0.2612699568271637, "learning_rate": 1e-05, "loss": 0.9714, "step": 75110 }, { "epoch": 66.53232949512844, "grad_norm": 0.20067405700683594, "learning_rate": 1e-05, "loss": 0.9859, "step": 75115 }, { "epoch": 66.53675819309123, "grad_norm": 0.20420116186141968, "learning_rate": 1e-05, "loss": 1.0206, "step": 75120 }, { "epoch": 66.54118689105402, "grad_norm": 0.2514948844909668, "learning_rate": 1e-05, "loss": 0.9769, "step": 75125 }, { "epoch": 66.54561558901683, "grad_norm": 0.24784861505031586, "learning_rate": 1e-05, "loss": 0.9242, "step": 75130 }, { "epoch": 66.55004428697963, "grad_norm": 0.1898975521326065, "learning_rate": 1e-05, "loss": 1.006, "step": 75135 }, { "epoch": 66.55447298494242, "grad_norm": 0.2076498419046402, "learning_rate": 1e-05, "loss": 0.9581, "step": 75140 }, { "epoch": 66.55890168290523, "grad_norm": 0.2726189196109772, "learning_rate": 1e-05, "loss": 0.9568, "step": 75145 }, { "epoch": 66.56333038086802, "grad_norm": 0.2076188027858734, "learning_rate": 1e-05, "loss": 1.0038, "step": 75150 }, { "epoch": 66.56775907883082, "grad_norm": 0.21906183660030365, "learning_rate": 1e-05, "loss": 0.9503, "step": 75155 }, { "epoch": 66.57218777679363, "grad_norm": 0.20580622553825378, "learning_rate": 1e-05, "loss": 0.9095, "step": 75160 }, { "epoch": 66.57661647475642, "grad_norm": 0.23688721656799316, "learning_rate": 1e-05, "loss": 0.9392, "step": 75165 }, { "epoch": 66.58104517271921, "grad_norm": 0.24313907325267792, "learning_rate": 1e-05, "loss": 0.9534, "step": 75170 }, { "epoch": 66.58547387068202, "grad_norm": 0.23236240446567535, "learning_rate": 1e-05, "loss": 0.9332, "step": 75175 }, { "epoch": 66.58990256864482, "grad_norm": 0.26118573546409607, "learning_rate": 1e-05, "loss": 0.9992, "step": 75180 }, { "epoch": 66.59433126660761, "grad_norm": 0.2630217373371124, "learning_rate": 1e-05, "loss": 0.9855, "step": 75185 }, { "epoch": 66.59875996457042, "grad_norm": 0.2817946970462799, "learning_rate": 1e-05, "loss": 0.9343, "step": 75190 }, { "epoch": 66.60318866253321, "grad_norm": 0.2811495363712311, "learning_rate": 1e-05, "loss": 0.9379, "step": 75195 }, { "epoch": 66.60761736049601, "grad_norm": 0.319832444190979, "learning_rate": 1e-05, "loss": 0.971, "step": 75200 }, { "epoch": 66.61204605845882, "grad_norm": 0.23089855909347534, "learning_rate": 1e-05, "loss": 0.9399, "step": 75205 }, { "epoch": 66.61647475642161, "grad_norm": 0.22274315357208252, "learning_rate": 1e-05, "loss": 1.0017, "step": 75210 }, { "epoch": 66.6209034543844, "grad_norm": 0.24172724783420563, "learning_rate": 1e-05, "loss": 0.9241, "step": 75215 }, { "epoch": 66.62533215234721, "grad_norm": 0.22229234874248505, "learning_rate": 1e-05, "loss": 0.9148, "step": 75220 }, { "epoch": 66.62976085031, "grad_norm": 0.23254527151584625, "learning_rate": 1e-05, "loss": 0.9665, "step": 75225 }, { "epoch": 66.63418954827281, "grad_norm": 0.22574812173843384, "learning_rate": 1e-05, "loss": 0.918, "step": 75230 }, { "epoch": 66.63861824623561, "grad_norm": 0.2288178652524948, "learning_rate": 1e-05, "loss": 0.9386, "step": 75235 }, { "epoch": 66.6430469441984, "grad_norm": 0.241951584815979, "learning_rate": 1e-05, "loss": 0.961, "step": 75240 }, { "epoch": 66.64747564216121, "grad_norm": 0.2685646712779999, "learning_rate": 1e-05, "loss": 0.9782, "step": 75245 }, { "epoch": 66.651904340124, "grad_norm": 0.2204943150281906, "learning_rate": 1e-05, "loss": 0.9375, "step": 75250 }, { "epoch": 66.6563330380868, "grad_norm": 0.23781593143939972, "learning_rate": 1e-05, "loss": 0.9394, "step": 75255 }, { "epoch": 66.66076173604961, "grad_norm": 0.2906077802181244, "learning_rate": 1e-05, "loss": 0.9623, "step": 75260 }, { "epoch": 66.6651904340124, "grad_norm": 0.22208988666534424, "learning_rate": 1e-05, "loss": 0.9315, "step": 75265 }, { "epoch": 66.6696191319752, "grad_norm": 0.27872180938720703, "learning_rate": 1e-05, "loss": 0.9645, "step": 75270 }, { "epoch": 66.674047829938, "grad_norm": 0.20481392741203308, "learning_rate": 1e-05, "loss": 0.9967, "step": 75275 }, { "epoch": 66.6784765279008, "grad_norm": 0.25637343525886536, "learning_rate": 1e-05, "loss": 0.9395, "step": 75280 }, { "epoch": 66.68290522586359, "grad_norm": 0.25928816199302673, "learning_rate": 1e-05, "loss": 1.0125, "step": 75285 }, { "epoch": 66.6873339238264, "grad_norm": 0.19369734823703766, "learning_rate": 1e-05, "loss": 0.9623, "step": 75290 }, { "epoch": 66.6917626217892, "grad_norm": 0.22767478227615356, "learning_rate": 1e-05, "loss": 0.9538, "step": 75295 }, { "epoch": 66.69619131975199, "grad_norm": 0.21035458147525787, "learning_rate": 1e-05, "loss": 0.9944, "step": 75300 }, { "epoch": 66.7006200177148, "grad_norm": 0.22688359022140503, "learning_rate": 1e-05, "loss": 0.9901, "step": 75305 }, { "epoch": 66.70504871567759, "grad_norm": 0.26498523354530334, "learning_rate": 1e-05, "loss": 0.948, "step": 75310 }, { "epoch": 66.70947741364039, "grad_norm": 0.23060598969459534, "learning_rate": 1e-05, "loss": 0.945, "step": 75315 }, { "epoch": 66.7139061116032, "grad_norm": 0.22982072830200195, "learning_rate": 1e-05, "loss": 0.928, "step": 75320 }, { "epoch": 66.71833480956599, "grad_norm": 0.2353755235671997, "learning_rate": 1e-05, "loss": 0.9356, "step": 75325 }, { "epoch": 66.72276350752878, "grad_norm": 0.25840964913368225, "learning_rate": 1e-05, "loss": 0.9848, "step": 75330 }, { "epoch": 66.72719220549159, "grad_norm": 0.235965758562088, "learning_rate": 1e-05, "loss": 0.9068, "step": 75335 }, { "epoch": 66.73162090345438, "grad_norm": 0.21959485113620758, "learning_rate": 1e-05, "loss": 0.9741, "step": 75340 }, { "epoch": 66.73604960141718, "grad_norm": 0.2299426794052124, "learning_rate": 1e-05, "loss": 0.9764, "step": 75345 }, { "epoch": 66.74047829937999, "grad_norm": 0.24419432878494263, "learning_rate": 1e-05, "loss": 1.026, "step": 75350 }, { "epoch": 66.74490699734278, "grad_norm": 0.24157685041427612, "learning_rate": 1e-05, "loss": 0.9721, "step": 75355 }, { "epoch": 66.74933569530558, "grad_norm": 0.2289917767047882, "learning_rate": 1e-05, "loss": 1.0021, "step": 75360 }, { "epoch": 66.75376439326838, "grad_norm": 0.2568418085575104, "learning_rate": 1e-05, "loss": 0.9933, "step": 75365 }, { "epoch": 66.75819309123118, "grad_norm": 0.23512817919254303, "learning_rate": 1e-05, "loss": 0.9925, "step": 75370 }, { "epoch": 66.76262178919397, "grad_norm": 0.24344559013843536, "learning_rate": 1e-05, "loss": 0.9666, "step": 75375 }, { "epoch": 66.76705048715678, "grad_norm": 0.22590403258800507, "learning_rate": 1e-05, "loss": 0.9459, "step": 75380 }, { "epoch": 66.77147918511957, "grad_norm": 0.2334146499633789, "learning_rate": 1e-05, "loss": 0.9837, "step": 75385 }, { "epoch": 66.77590788308237, "grad_norm": 0.25481337308883667, "learning_rate": 1e-05, "loss": 0.9684, "step": 75390 }, { "epoch": 66.78033658104518, "grad_norm": 0.2354278862476349, "learning_rate": 1e-05, "loss": 0.9859, "step": 75395 }, { "epoch": 66.78476527900797, "grad_norm": 0.2461446076631546, "learning_rate": 1e-05, "loss": 1.0025, "step": 75400 }, { "epoch": 66.78919397697076, "grad_norm": 0.25934159755706787, "learning_rate": 1e-05, "loss": 0.9347, "step": 75405 }, { "epoch": 66.79362267493357, "grad_norm": 0.2537505030632019, "learning_rate": 1e-05, "loss": 1.0068, "step": 75410 }, { "epoch": 66.79805137289637, "grad_norm": 0.27161723375320435, "learning_rate": 1e-05, "loss": 0.9576, "step": 75415 }, { "epoch": 66.80248007085916, "grad_norm": 0.21545828878879547, "learning_rate": 1e-05, "loss": 0.9792, "step": 75420 }, { "epoch": 66.80690876882197, "grad_norm": 0.23820339143276215, "learning_rate": 1e-05, "loss": 0.9637, "step": 75425 }, { "epoch": 66.81133746678476, "grad_norm": 0.3309570252895355, "learning_rate": 1e-05, "loss": 0.9842, "step": 75430 }, { "epoch": 66.81576616474756, "grad_norm": 0.2800322473049164, "learning_rate": 1e-05, "loss": 0.9802, "step": 75435 }, { "epoch": 66.82019486271037, "grad_norm": 0.30508455634117126, "learning_rate": 1e-05, "loss": 0.9963, "step": 75440 }, { "epoch": 66.82462356067316, "grad_norm": 0.27522146701812744, "learning_rate": 1e-05, "loss": 1.0242, "step": 75445 }, { "epoch": 66.82905225863595, "grad_norm": 0.22513563930988312, "learning_rate": 1e-05, "loss": 1.0128, "step": 75450 }, { "epoch": 66.83348095659876, "grad_norm": 0.22156572341918945, "learning_rate": 1e-05, "loss": 0.9739, "step": 75455 }, { "epoch": 66.83790965456156, "grad_norm": 0.2650841772556305, "learning_rate": 1e-05, "loss": 0.9384, "step": 75460 }, { "epoch": 66.84233835252435, "grad_norm": 0.24405457079410553, "learning_rate": 1e-05, "loss": 1.0047, "step": 75465 }, { "epoch": 66.84676705048716, "grad_norm": 0.23092027008533478, "learning_rate": 1e-05, "loss": 0.9984, "step": 75470 }, { "epoch": 66.85119574844995, "grad_norm": 0.23895317316055298, "learning_rate": 1e-05, "loss": 0.9254, "step": 75475 }, { "epoch": 66.85562444641276, "grad_norm": 0.24698390066623688, "learning_rate": 1e-05, "loss": 0.9918, "step": 75480 }, { "epoch": 66.86005314437556, "grad_norm": 0.25817641615867615, "learning_rate": 1e-05, "loss": 0.9666, "step": 75485 }, { "epoch": 66.86448184233835, "grad_norm": 0.2691112160682678, "learning_rate": 1e-05, "loss": 0.9779, "step": 75490 }, { "epoch": 66.86891054030116, "grad_norm": 0.21688330173492432, "learning_rate": 1e-05, "loss": 0.9531, "step": 75495 }, { "epoch": 66.87333923826395, "grad_norm": 0.2268580198287964, "learning_rate": 1e-05, "loss": 0.9912, "step": 75500 }, { "epoch": 66.87776793622675, "grad_norm": 0.24221055209636688, "learning_rate": 1e-05, "loss": 0.926, "step": 75505 }, { "epoch": 66.88219663418955, "grad_norm": 0.23131303489208221, "learning_rate": 1e-05, "loss": 0.986, "step": 75510 }, { "epoch": 66.88662533215235, "grad_norm": 0.2452724277973175, "learning_rate": 1e-05, "loss": 0.9762, "step": 75515 }, { "epoch": 66.89105403011514, "grad_norm": 0.23758156597614288, "learning_rate": 1e-05, "loss": 0.9077, "step": 75520 }, { "epoch": 66.89548272807795, "grad_norm": 0.21675114333629608, "learning_rate": 1e-05, "loss": 0.9754, "step": 75525 }, { "epoch": 66.89991142604075, "grad_norm": 0.23800943791866302, "learning_rate": 1e-05, "loss": 0.9462, "step": 75530 }, { "epoch": 66.90434012400354, "grad_norm": 0.23256763815879822, "learning_rate": 1e-05, "loss": 0.9861, "step": 75535 }, { "epoch": 66.90876882196635, "grad_norm": 0.26479703187942505, "learning_rate": 1e-05, "loss": 1.0034, "step": 75540 }, { "epoch": 66.91319751992914, "grad_norm": 0.23046721518039703, "learning_rate": 1e-05, "loss": 0.974, "step": 75545 }, { "epoch": 66.91762621789194, "grad_norm": 0.22760146856307983, "learning_rate": 1e-05, "loss": 1.0068, "step": 75550 }, { "epoch": 66.92205491585474, "grad_norm": 0.21838697791099548, "learning_rate": 1e-05, "loss": 0.8722, "step": 75555 }, { "epoch": 66.92648361381754, "grad_norm": 0.19916194677352905, "learning_rate": 1e-05, "loss": 0.8918, "step": 75560 }, { "epoch": 66.93091231178033, "grad_norm": 0.21423226594924927, "learning_rate": 1e-05, "loss": 0.9847, "step": 75565 }, { "epoch": 66.93534100974314, "grad_norm": 0.21948851644992828, "learning_rate": 1e-05, "loss": 0.9602, "step": 75570 }, { "epoch": 66.93976970770593, "grad_norm": 0.22058556973934174, "learning_rate": 1e-05, "loss": 0.962, "step": 75575 }, { "epoch": 66.94419840566873, "grad_norm": 0.3051481544971466, "learning_rate": 1e-05, "loss": 0.9541, "step": 75580 }, { "epoch": 66.94862710363154, "grad_norm": 0.2431422770023346, "learning_rate": 1e-05, "loss": 0.9691, "step": 75585 }, { "epoch": 66.95305580159433, "grad_norm": 0.24674725532531738, "learning_rate": 1e-05, "loss": 0.9634, "step": 75590 }, { "epoch": 66.95748449955713, "grad_norm": 0.2501239776611328, "learning_rate": 1e-05, "loss": 0.9657, "step": 75595 }, { "epoch": 66.96191319751993, "grad_norm": 0.21950183808803558, "learning_rate": 1e-05, "loss": 0.9991, "step": 75600 }, { "epoch": 66.96634189548273, "grad_norm": 0.27658769488334656, "learning_rate": 1e-05, "loss": 0.9139, "step": 75605 }, { "epoch": 66.97077059344552, "grad_norm": 0.19335128366947174, "learning_rate": 1e-05, "loss": 0.9972, "step": 75610 }, { "epoch": 66.97519929140833, "grad_norm": 0.23977304995059967, "learning_rate": 1e-05, "loss": 0.9912, "step": 75615 }, { "epoch": 66.97962798937112, "grad_norm": 0.20509642362594604, "learning_rate": 1e-05, "loss": 1.0011, "step": 75620 }, { "epoch": 66.98405668733392, "grad_norm": 0.2605193853378296, "learning_rate": 1e-05, "loss": 1.001, "step": 75625 }, { "epoch": 66.98848538529673, "grad_norm": 0.2361188679933548, "learning_rate": 1e-05, "loss": 0.9237, "step": 75630 }, { "epoch": 66.99291408325952, "grad_norm": 0.22738058865070343, "learning_rate": 1e-05, "loss": 0.9594, "step": 75635 }, { "epoch": 66.99734278122232, "grad_norm": 0.2395820915699005, "learning_rate": 1e-05, "loss": 1.0295, "step": 75640 }, { "epoch": 67.00177147918512, "grad_norm": 0.22118353843688965, "learning_rate": 1e-05, "loss": 0.9469, "step": 75645 }, { "epoch": 67.00620017714792, "grad_norm": 0.2724268138408661, "learning_rate": 1e-05, "loss": 0.9401, "step": 75650 }, { "epoch": 67.01062887511071, "grad_norm": 0.2503476142883301, "learning_rate": 1e-05, "loss": 0.965, "step": 75655 }, { "epoch": 67.01505757307352, "grad_norm": 0.26182955503463745, "learning_rate": 1e-05, "loss": 0.9541, "step": 75660 }, { "epoch": 67.01948627103631, "grad_norm": 0.22933301329612732, "learning_rate": 1e-05, "loss": 0.9583, "step": 75665 }, { "epoch": 67.02391496899911, "grad_norm": 0.23889955878257751, "learning_rate": 1e-05, "loss": 0.9183, "step": 75670 }, { "epoch": 67.02834366696192, "grad_norm": 0.26819315552711487, "learning_rate": 1e-05, "loss": 0.9487, "step": 75675 }, { "epoch": 67.03277236492471, "grad_norm": 0.26547348499298096, "learning_rate": 1e-05, "loss": 0.9957, "step": 75680 }, { "epoch": 67.0372010628875, "grad_norm": 0.26852715015411377, "learning_rate": 1e-05, "loss": 0.9702, "step": 75685 }, { "epoch": 67.04162976085031, "grad_norm": 0.24702468514442444, "learning_rate": 1e-05, "loss": 0.968, "step": 75690 }, { "epoch": 67.0460584588131, "grad_norm": 0.22735893726348877, "learning_rate": 1e-05, "loss": 1.0001, "step": 75695 }, { "epoch": 67.0504871567759, "grad_norm": 0.2753344178199768, "learning_rate": 1e-05, "loss": 1.0473, "step": 75700 }, { "epoch": 67.05491585473871, "grad_norm": 0.25895124673843384, "learning_rate": 1e-05, "loss": 0.9827, "step": 75705 }, { "epoch": 67.0593445527015, "grad_norm": 0.23908720910549164, "learning_rate": 1e-05, "loss": 0.9829, "step": 75710 }, { "epoch": 67.0637732506643, "grad_norm": 0.2247406542301178, "learning_rate": 1e-05, "loss": 0.948, "step": 75715 }, { "epoch": 67.0682019486271, "grad_norm": 0.22783435881137848, "learning_rate": 1e-05, "loss": 1.0127, "step": 75720 }, { "epoch": 67.0726306465899, "grad_norm": 0.26499298214912415, "learning_rate": 1e-05, "loss": 0.934, "step": 75725 }, { "epoch": 67.07705934455271, "grad_norm": 0.23504024744033813, "learning_rate": 1e-05, "loss": 0.9887, "step": 75730 }, { "epoch": 67.0814880425155, "grad_norm": 0.25943422317504883, "learning_rate": 1e-05, "loss": 0.9538, "step": 75735 }, { "epoch": 67.0859167404783, "grad_norm": 0.21212315559387207, "learning_rate": 1e-05, "loss": 0.9552, "step": 75740 }, { "epoch": 67.0903454384411, "grad_norm": 0.243398517370224, "learning_rate": 1e-05, "loss": 0.9612, "step": 75745 }, { "epoch": 67.0947741364039, "grad_norm": 0.22973883152008057, "learning_rate": 1e-05, "loss": 1.0122, "step": 75750 }, { "epoch": 67.0992028343667, "grad_norm": 0.25598523020744324, "learning_rate": 1e-05, "loss": 0.9221, "step": 75755 }, { "epoch": 67.1036315323295, "grad_norm": 0.21338117122650146, "learning_rate": 1e-05, "loss": 0.9666, "step": 75760 }, { "epoch": 67.1080602302923, "grad_norm": 0.2169879674911499, "learning_rate": 1e-05, "loss": 0.9366, "step": 75765 }, { "epoch": 67.11248892825509, "grad_norm": 0.26075443625450134, "learning_rate": 1e-05, "loss": 0.9384, "step": 75770 }, { "epoch": 67.1169176262179, "grad_norm": 0.2342262864112854, "learning_rate": 1e-05, "loss": 0.9307, "step": 75775 }, { "epoch": 67.12134632418069, "grad_norm": 0.2636463940143585, "learning_rate": 1e-05, "loss": 0.9561, "step": 75780 }, { "epoch": 67.12577502214349, "grad_norm": 0.24479275941848755, "learning_rate": 1e-05, "loss": 0.9623, "step": 75785 }, { "epoch": 67.1302037201063, "grad_norm": 0.2589849829673767, "learning_rate": 1e-05, "loss": 0.9909, "step": 75790 }, { "epoch": 67.13463241806909, "grad_norm": 0.22806964814662933, "learning_rate": 1e-05, "loss": 0.9291, "step": 75795 }, { "epoch": 67.13906111603188, "grad_norm": 0.2187642753124237, "learning_rate": 1e-05, "loss": 0.9509, "step": 75800 }, { "epoch": 67.14348981399469, "grad_norm": 0.31209951639175415, "learning_rate": 1e-05, "loss": 0.9563, "step": 75805 }, { "epoch": 67.14791851195749, "grad_norm": 0.33480408787727356, "learning_rate": 1e-05, "loss": 1.0076, "step": 75810 }, { "epoch": 67.15234720992028, "grad_norm": 0.28045713901519775, "learning_rate": 1e-05, "loss": 0.9384, "step": 75815 }, { "epoch": 67.15677590788309, "grad_norm": 0.1930042952299118, "learning_rate": 1e-05, "loss": 1.0108, "step": 75820 }, { "epoch": 67.16120460584588, "grad_norm": 0.22959069907665253, "learning_rate": 1e-05, "loss": 0.9529, "step": 75825 }, { "epoch": 67.16563330380868, "grad_norm": 0.23556780815124512, "learning_rate": 1e-05, "loss": 0.9459, "step": 75830 }, { "epoch": 67.17006200177148, "grad_norm": 0.232491135597229, "learning_rate": 1e-05, "loss": 0.949, "step": 75835 }, { "epoch": 67.17449069973428, "grad_norm": 0.2298961579799652, "learning_rate": 1e-05, "loss": 0.9672, "step": 75840 }, { "epoch": 67.17891939769707, "grad_norm": 0.2644525170326233, "learning_rate": 1e-05, "loss": 1.0036, "step": 75845 }, { "epoch": 67.18334809565988, "grad_norm": 0.2615306079387665, "learning_rate": 1e-05, "loss": 0.9678, "step": 75850 }, { "epoch": 67.18777679362267, "grad_norm": 0.26570606231689453, "learning_rate": 1e-05, "loss": 0.9631, "step": 75855 }, { "epoch": 67.19220549158547, "grad_norm": 0.25981390476226807, "learning_rate": 1e-05, "loss": 0.9693, "step": 75860 }, { "epoch": 67.19663418954828, "grad_norm": 0.2342883199453354, "learning_rate": 1e-05, "loss": 0.9496, "step": 75865 }, { "epoch": 67.20106288751107, "grad_norm": 0.25465264916419983, "learning_rate": 1e-05, "loss": 0.9804, "step": 75870 }, { "epoch": 67.20549158547387, "grad_norm": 0.23989440500736237, "learning_rate": 1e-05, "loss": 0.9687, "step": 75875 }, { "epoch": 67.20992028343667, "grad_norm": 0.2354646474123001, "learning_rate": 1e-05, "loss": 0.9837, "step": 75880 }, { "epoch": 67.21434898139947, "grad_norm": 0.21341370046138763, "learning_rate": 1e-05, "loss": 1.023, "step": 75885 }, { "epoch": 67.21877767936226, "grad_norm": 0.27982768416404724, "learning_rate": 1e-05, "loss": 1.0196, "step": 75890 }, { "epoch": 67.22320637732507, "grad_norm": 0.2206200212240219, "learning_rate": 1e-05, "loss": 0.9471, "step": 75895 }, { "epoch": 67.22763507528786, "grad_norm": 0.2396899163722992, "learning_rate": 1e-05, "loss": 1.0184, "step": 75900 }, { "epoch": 67.23206377325066, "grad_norm": 0.23123286664485931, "learning_rate": 1e-05, "loss": 0.93, "step": 75905 }, { "epoch": 67.23649247121347, "grad_norm": 0.288381963968277, "learning_rate": 1e-05, "loss": 1.0049, "step": 75910 }, { "epoch": 67.24092116917626, "grad_norm": 0.29143276810646057, "learning_rate": 1e-05, "loss": 0.948, "step": 75915 }, { "epoch": 67.24534986713905, "grad_norm": 0.23502743244171143, "learning_rate": 1e-05, "loss": 1.025, "step": 75920 }, { "epoch": 67.24977856510186, "grad_norm": 0.24762378633022308, "learning_rate": 1e-05, "loss": 0.964, "step": 75925 }, { "epoch": 67.25420726306466, "grad_norm": 0.23010413348674774, "learning_rate": 1e-05, "loss": 0.9808, "step": 75930 }, { "epoch": 67.25863596102745, "grad_norm": 0.281925767660141, "learning_rate": 1e-05, "loss": 0.9322, "step": 75935 }, { "epoch": 67.26306465899026, "grad_norm": 0.26682430505752563, "learning_rate": 1e-05, "loss": 0.9394, "step": 75940 }, { "epoch": 67.26749335695305, "grad_norm": 0.2477923333644867, "learning_rate": 1e-05, "loss": 0.9688, "step": 75945 }, { "epoch": 67.27192205491585, "grad_norm": 0.21757370233535767, "learning_rate": 1e-05, "loss": 0.8989, "step": 75950 }, { "epoch": 67.27635075287866, "grad_norm": 0.26036861538887024, "learning_rate": 1e-05, "loss": 1.0361, "step": 75955 }, { "epoch": 67.28077945084145, "grad_norm": 0.22255021333694458, "learning_rate": 1e-05, "loss": 1.0157, "step": 75960 }, { "epoch": 67.28520814880426, "grad_norm": 0.2614385187625885, "learning_rate": 1e-05, "loss": 0.9447, "step": 75965 }, { "epoch": 67.28963684676705, "grad_norm": 0.2175007164478302, "learning_rate": 1e-05, "loss": 0.9834, "step": 75970 }, { "epoch": 67.29406554472985, "grad_norm": 0.2614530324935913, "learning_rate": 1e-05, "loss": 1.01, "step": 75975 }, { "epoch": 67.29849424269266, "grad_norm": 0.23791691660881042, "learning_rate": 1e-05, "loss": 0.9957, "step": 75980 }, { "epoch": 67.30292294065545, "grad_norm": 0.2422555536031723, "learning_rate": 1e-05, "loss": 1.0253, "step": 75985 }, { "epoch": 67.30735163861824, "grad_norm": 0.22131690382957458, "learning_rate": 1e-05, "loss": 0.9866, "step": 75990 }, { "epoch": 67.31178033658105, "grad_norm": 0.24223895370960236, "learning_rate": 1e-05, "loss": 0.9854, "step": 75995 }, { "epoch": 67.31620903454385, "grad_norm": 0.2355361133813858, "learning_rate": 1e-05, "loss": 0.9029, "step": 76000 }, { "epoch": 67.32063773250664, "grad_norm": 0.18825343251228333, "learning_rate": 1e-05, "loss": 0.9883, "step": 76005 }, { "epoch": 67.32506643046945, "grad_norm": 0.21182124316692352, "learning_rate": 1e-05, "loss": 0.9987, "step": 76010 }, { "epoch": 67.32949512843224, "grad_norm": 0.24794258177280426, "learning_rate": 1e-05, "loss": 1.0012, "step": 76015 }, { "epoch": 67.33392382639504, "grad_norm": 0.24796223640441895, "learning_rate": 1e-05, "loss": 0.9524, "step": 76020 }, { "epoch": 67.33835252435784, "grad_norm": 0.23214022815227509, "learning_rate": 1e-05, "loss": 0.9697, "step": 76025 }, { "epoch": 67.34278122232064, "grad_norm": 0.22341832518577576, "learning_rate": 1e-05, "loss": 0.9829, "step": 76030 }, { "epoch": 67.34720992028343, "grad_norm": 0.2317054718732834, "learning_rate": 1e-05, "loss": 0.9784, "step": 76035 }, { "epoch": 67.35163861824624, "grad_norm": 0.27360010147094727, "learning_rate": 1e-05, "loss": 0.9604, "step": 76040 }, { "epoch": 67.35606731620904, "grad_norm": 0.2764146029949188, "learning_rate": 1e-05, "loss": 0.9564, "step": 76045 }, { "epoch": 67.36049601417183, "grad_norm": 0.2772946357727051, "learning_rate": 1e-05, "loss": 0.961, "step": 76050 }, { "epoch": 67.36492471213464, "grad_norm": 0.2732570469379425, "learning_rate": 1e-05, "loss": 0.9783, "step": 76055 }, { "epoch": 67.36935341009743, "grad_norm": 0.2558162212371826, "learning_rate": 1e-05, "loss": 1.0112, "step": 76060 }, { "epoch": 67.37378210806023, "grad_norm": 0.2680899202823639, "learning_rate": 1e-05, "loss": 0.9635, "step": 76065 }, { "epoch": 67.37821080602303, "grad_norm": 0.2657979428768158, "learning_rate": 1e-05, "loss": 0.9799, "step": 76070 }, { "epoch": 67.38263950398583, "grad_norm": 0.24710823595523834, "learning_rate": 1e-05, "loss": 0.9208, "step": 76075 }, { "epoch": 67.38706820194862, "grad_norm": 0.24437837302684784, "learning_rate": 1e-05, "loss": 0.9954, "step": 76080 }, { "epoch": 67.39149689991143, "grad_norm": 0.26440921425819397, "learning_rate": 1e-05, "loss": 1.0412, "step": 76085 }, { "epoch": 67.39592559787422, "grad_norm": 0.2714998424053192, "learning_rate": 1e-05, "loss": 0.9723, "step": 76090 }, { "epoch": 67.40035429583702, "grad_norm": 0.22913290560245514, "learning_rate": 1e-05, "loss": 0.9444, "step": 76095 }, { "epoch": 67.40478299379983, "grad_norm": 0.20261049270629883, "learning_rate": 1e-05, "loss": 0.9543, "step": 76100 }, { "epoch": 67.40921169176262, "grad_norm": 0.27024707198143005, "learning_rate": 1e-05, "loss": 0.9453, "step": 76105 }, { "epoch": 67.41364038972542, "grad_norm": 0.2339015007019043, "learning_rate": 1e-05, "loss": 0.971, "step": 76110 }, { "epoch": 67.41806908768822, "grad_norm": 0.22890833020210266, "learning_rate": 1e-05, "loss": 0.9397, "step": 76115 }, { "epoch": 67.42249778565102, "grad_norm": 0.2372104525566101, "learning_rate": 1e-05, "loss": 0.9434, "step": 76120 }, { "epoch": 67.42692648361381, "grad_norm": 0.21062377095222473, "learning_rate": 1e-05, "loss": 0.9647, "step": 76125 }, { "epoch": 67.43135518157662, "grad_norm": 0.22386609017848969, "learning_rate": 1e-05, "loss": 0.9572, "step": 76130 }, { "epoch": 67.43578387953941, "grad_norm": 0.21555323898792267, "learning_rate": 1e-05, "loss": 0.9623, "step": 76135 }, { "epoch": 67.44021257750221, "grad_norm": 0.23043711483478546, "learning_rate": 1e-05, "loss": 0.9764, "step": 76140 }, { "epoch": 67.44464127546502, "grad_norm": 0.23496712744235992, "learning_rate": 1e-05, "loss": 0.947, "step": 76145 }, { "epoch": 67.44906997342781, "grad_norm": 0.21874217689037323, "learning_rate": 1e-05, "loss": 0.9423, "step": 76150 }, { "epoch": 67.4534986713906, "grad_norm": 0.242909237742424, "learning_rate": 1e-05, "loss": 0.9506, "step": 76155 }, { "epoch": 67.45792736935341, "grad_norm": 0.25705698132514954, "learning_rate": 1e-05, "loss": 0.9368, "step": 76160 }, { "epoch": 67.46235606731621, "grad_norm": 0.24262379109859467, "learning_rate": 1e-05, "loss": 0.9449, "step": 76165 }, { "epoch": 67.466784765279, "grad_norm": 0.2792976200580597, "learning_rate": 1e-05, "loss": 0.9503, "step": 76170 }, { "epoch": 67.47121346324181, "grad_norm": 0.2542523741722107, "learning_rate": 1e-05, "loss": 0.9019, "step": 76175 }, { "epoch": 67.4756421612046, "grad_norm": 0.2473069280385971, "learning_rate": 1e-05, "loss": 0.9746, "step": 76180 }, { "epoch": 67.4800708591674, "grad_norm": 0.2294023185968399, "learning_rate": 1e-05, "loss": 1.0494, "step": 76185 }, { "epoch": 67.4844995571302, "grad_norm": 0.2472751885652542, "learning_rate": 1e-05, "loss": 0.9852, "step": 76190 }, { "epoch": 67.488928255093, "grad_norm": 0.24871501326560974, "learning_rate": 1e-05, "loss": 0.9581, "step": 76195 }, { "epoch": 67.4933569530558, "grad_norm": 0.29277753829956055, "learning_rate": 1e-05, "loss": 1.0161, "step": 76200 }, { "epoch": 67.4977856510186, "grad_norm": 0.219540074467659, "learning_rate": 1e-05, "loss": 0.9393, "step": 76205 }, { "epoch": 67.5022143489814, "grad_norm": 0.2593120336532593, "learning_rate": 1e-05, "loss": 0.9884, "step": 76210 }, { "epoch": 67.5066430469442, "grad_norm": 0.25804784893989563, "learning_rate": 1e-05, "loss": 0.9874, "step": 76215 }, { "epoch": 67.511071744907, "grad_norm": 0.26734939217567444, "learning_rate": 1e-05, "loss": 0.9226, "step": 76220 }, { "epoch": 67.5155004428698, "grad_norm": 0.22027775645256042, "learning_rate": 1e-05, "loss": 0.9665, "step": 76225 }, { "epoch": 67.5199291408326, "grad_norm": 0.21133993566036224, "learning_rate": 1e-05, "loss": 0.9362, "step": 76230 }, { "epoch": 67.5243578387954, "grad_norm": 0.19820855557918549, "learning_rate": 1e-05, "loss": 0.992, "step": 76235 }, { "epoch": 67.52878653675819, "grad_norm": 0.2640177011489868, "learning_rate": 1e-05, "loss": 0.9851, "step": 76240 }, { "epoch": 67.533215234721, "grad_norm": 0.2408740520477295, "learning_rate": 1e-05, "loss": 0.9157, "step": 76245 }, { "epoch": 67.53764393268379, "grad_norm": 0.25717005133628845, "learning_rate": 1e-05, "loss": 0.9614, "step": 76250 }, { "epoch": 67.54207263064659, "grad_norm": 0.22637727856636047, "learning_rate": 1e-05, "loss": 0.926, "step": 76255 }, { "epoch": 67.5465013286094, "grad_norm": 0.24869249761104584, "learning_rate": 1e-05, "loss": 0.9807, "step": 76260 }, { "epoch": 67.55093002657219, "grad_norm": 0.23160229623317719, "learning_rate": 1e-05, "loss": 0.9204, "step": 76265 }, { "epoch": 67.55535872453498, "grad_norm": 0.2552292048931122, "learning_rate": 1e-05, "loss": 0.9291, "step": 76270 }, { "epoch": 67.55978742249779, "grad_norm": 0.286464661359787, "learning_rate": 1e-05, "loss": 0.9468, "step": 76275 }, { "epoch": 67.56421612046059, "grad_norm": 0.3222089111804962, "learning_rate": 1e-05, "loss": 0.9552, "step": 76280 }, { "epoch": 67.56864481842338, "grad_norm": 0.23707778751850128, "learning_rate": 1e-05, "loss": 0.9544, "step": 76285 }, { "epoch": 67.57307351638619, "grad_norm": 0.25252413749694824, "learning_rate": 1e-05, "loss": 0.9903, "step": 76290 }, { "epoch": 67.57750221434898, "grad_norm": 0.21557208895683289, "learning_rate": 1e-05, "loss": 0.9521, "step": 76295 }, { "epoch": 67.58193091231178, "grad_norm": 0.26272785663604736, "learning_rate": 1e-05, "loss": 0.9066, "step": 76300 }, { "epoch": 67.58635961027458, "grad_norm": 0.2887038290500641, "learning_rate": 1e-05, "loss": 0.9003, "step": 76305 }, { "epoch": 67.59078830823738, "grad_norm": 0.24808894097805023, "learning_rate": 1e-05, "loss": 0.9989, "step": 76310 }, { "epoch": 67.59521700620017, "grad_norm": 0.22453254461288452, "learning_rate": 1e-05, "loss": 0.959, "step": 76315 }, { "epoch": 67.59964570416298, "grad_norm": 0.2560519278049469, "learning_rate": 1e-05, "loss": 1.0081, "step": 76320 }, { "epoch": 67.60407440212578, "grad_norm": 0.2581895887851715, "learning_rate": 1e-05, "loss": 0.9535, "step": 76325 }, { "epoch": 67.60850310008857, "grad_norm": 0.25403323769569397, "learning_rate": 1e-05, "loss": 1.0012, "step": 76330 }, { "epoch": 67.61293179805138, "grad_norm": 0.2543439567089081, "learning_rate": 1e-05, "loss": 0.9702, "step": 76335 }, { "epoch": 67.61736049601417, "grad_norm": 0.2541045844554901, "learning_rate": 1e-05, "loss": 0.9822, "step": 76340 }, { "epoch": 67.62178919397697, "grad_norm": 0.2141459584236145, "learning_rate": 1e-05, "loss": 0.9271, "step": 76345 }, { "epoch": 67.62621789193977, "grad_norm": 0.21260610222816467, "learning_rate": 1e-05, "loss": 0.9533, "step": 76350 }, { "epoch": 67.63064658990257, "grad_norm": 0.24872179329395294, "learning_rate": 1e-05, "loss": 0.9872, "step": 76355 }, { "epoch": 67.63507528786536, "grad_norm": 0.23809215426445007, "learning_rate": 1e-05, "loss": 0.9327, "step": 76360 }, { "epoch": 67.63950398582817, "grad_norm": 0.2212904691696167, "learning_rate": 1e-05, "loss": 0.9553, "step": 76365 }, { "epoch": 67.64393268379096, "grad_norm": 0.22988073527812958, "learning_rate": 1e-05, "loss": 0.9645, "step": 76370 }, { "epoch": 67.64836138175376, "grad_norm": 0.2459574192762375, "learning_rate": 1e-05, "loss": 0.9829, "step": 76375 }, { "epoch": 67.65279007971657, "grad_norm": 0.24165329337120056, "learning_rate": 1e-05, "loss": 0.96, "step": 76380 }, { "epoch": 67.65721877767936, "grad_norm": 0.25526028871536255, "learning_rate": 1e-05, "loss": 0.9637, "step": 76385 }, { "epoch": 67.66164747564216, "grad_norm": 0.23187051713466644, "learning_rate": 1e-05, "loss": 0.9916, "step": 76390 }, { "epoch": 67.66607617360496, "grad_norm": 0.25210532546043396, "learning_rate": 1e-05, "loss": 0.9335, "step": 76395 }, { "epoch": 67.67050487156776, "grad_norm": 0.22754041850566864, "learning_rate": 1e-05, "loss": 0.9513, "step": 76400 }, { "epoch": 67.67493356953055, "grad_norm": 0.2311403751373291, "learning_rate": 1e-05, "loss": 0.9985, "step": 76405 }, { "epoch": 67.67936226749336, "grad_norm": 0.21909789741039276, "learning_rate": 1e-05, "loss": 0.9757, "step": 76410 }, { "epoch": 67.68379096545615, "grad_norm": 0.21831664443016052, "learning_rate": 1e-05, "loss": 0.9762, "step": 76415 }, { "epoch": 67.68821966341895, "grad_norm": 0.2344667911529541, "learning_rate": 1e-05, "loss": 0.9946, "step": 76420 }, { "epoch": 67.69264836138176, "grad_norm": 0.2096155881881714, "learning_rate": 1e-05, "loss": 0.9158, "step": 76425 }, { "epoch": 67.69707705934455, "grad_norm": 0.27816441655158997, "learning_rate": 1e-05, "loss": 0.928, "step": 76430 }, { "epoch": 67.70150575730734, "grad_norm": 0.2850865423679352, "learning_rate": 1e-05, "loss": 0.998, "step": 76435 }, { "epoch": 67.70593445527015, "grad_norm": 0.2764822840690613, "learning_rate": 1e-05, "loss": 0.9747, "step": 76440 }, { "epoch": 67.71036315323295, "grad_norm": 0.22478429973125458, "learning_rate": 1e-05, "loss": 0.9665, "step": 76445 }, { "epoch": 67.71479185119574, "grad_norm": 0.24560759961605072, "learning_rate": 1e-05, "loss": 0.944, "step": 76450 }, { "epoch": 67.71922054915855, "grad_norm": 0.24513061344623566, "learning_rate": 1e-05, "loss": 0.9381, "step": 76455 }, { "epoch": 67.72364924712134, "grad_norm": 0.2477315217256546, "learning_rate": 1e-05, "loss": 0.9397, "step": 76460 }, { "epoch": 67.72807794508415, "grad_norm": 0.2127884030342102, "learning_rate": 1e-05, "loss": 0.9859, "step": 76465 }, { "epoch": 67.73250664304695, "grad_norm": 0.21078099310398102, "learning_rate": 1e-05, "loss": 0.9575, "step": 76470 }, { "epoch": 67.73693534100974, "grad_norm": 0.2255050539970398, "learning_rate": 1e-05, "loss": 1.015, "step": 76475 }, { "epoch": 67.74136403897255, "grad_norm": 0.2513887882232666, "learning_rate": 1e-05, "loss": 0.9186, "step": 76480 }, { "epoch": 67.74579273693534, "grad_norm": 0.24259239435195923, "learning_rate": 1e-05, "loss": 0.991, "step": 76485 }, { "epoch": 67.75022143489814, "grad_norm": 0.2723296284675598, "learning_rate": 1e-05, "loss": 1.0132, "step": 76490 }, { "epoch": 67.75465013286095, "grad_norm": 0.2045954316854477, "learning_rate": 1e-05, "loss": 0.9607, "step": 76495 }, { "epoch": 67.75907883082374, "grad_norm": 0.21716976165771484, "learning_rate": 1e-05, "loss": 0.9927, "step": 76500 }, { "epoch": 67.76350752878653, "grad_norm": 0.24256424605846405, "learning_rate": 1e-05, "loss": 1.0052, "step": 76505 }, { "epoch": 67.76793622674934, "grad_norm": 0.2862918972969055, "learning_rate": 1e-05, "loss": 1.005, "step": 76510 }, { "epoch": 67.77236492471214, "grad_norm": 0.2170039415359497, "learning_rate": 1e-05, "loss": 1.0352, "step": 76515 }, { "epoch": 67.77679362267493, "grad_norm": 0.22306427359580994, "learning_rate": 1e-05, "loss": 0.9119, "step": 76520 }, { "epoch": 67.78122232063774, "grad_norm": 0.2219875603914261, "learning_rate": 1e-05, "loss": 0.9463, "step": 76525 }, { "epoch": 67.78565101860053, "grad_norm": 0.23259595036506653, "learning_rate": 1e-05, "loss": 0.9213, "step": 76530 }, { "epoch": 67.79007971656333, "grad_norm": 0.21992826461791992, "learning_rate": 1e-05, "loss": 1.0191, "step": 76535 }, { "epoch": 67.79450841452613, "grad_norm": 0.3031964898109436, "learning_rate": 1e-05, "loss": 0.9496, "step": 76540 }, { "epoch": 67.79893711248893, "grad_norm": 0.22539545595645905, "learning_rate": 1e-05, "loss": 0.9305, "step": 76545 }, { "epoch": 67.80336581045172, "grad_norm": 0.2568967044353485, "learning_rate": 1e-05, "loss": 0.9332, "step": 76550 }, { "epoch": 67.80779450841453, "grad_norm": 0.21805106103420258, "learning_rate": 1e-05, "loss": 0.9531, "step": 76555 }, { "epoch": 67.81222320637733, "grad_norm": 0.2425696849822998, "learning_rate": 1e-05, "loss": 0.9627, "step": 76560 }, { "epoch": 67.81665190434012, "grad_norm": 0.233871191740036, "learning_rate": 1e-05, "loss": 0.9602, "step": 76565 }, { "epoch": 67.82108060230293, "grad_norm": 0.2168416976928711, "learning_rate": 1e-05, "loss": 0.9868, "step": 76570 }, { "epoch": 67.82550930026572, "grad_norm": 0.2798667550086975, "learning_rate": 1e-05, "loss": 0.9746, "step": 76575 }, { "epoch": 67.82993799822852, "grad_norm": 0.2331400215625763, "learning_rate": 1e-05, "loss": 1.0151, "step": 76580 }, { "epoch": 67.83436669619132, "grad_norm": 0.21549779176712036, "learning_rate": 1e-05, "loss": 0.9136, "step": 76585 }, { "epoch": 67.83879539415412, "grad_norm": 0.2231917679309845, "learning_rate": 1e-05, "loss": 1.0246, "step": 76590 }, { "epoch": 67.84322409211691, "grad_norm": 0.3170589506626129, "learning_rate": 1e-05, "loss": 0.9753, "step": 76595 }, { "epoch": 67.84765279007972, "grad_norm": 0.2921904921531677, "learning_rate": 1e-05, "loss": 0.8998, "step": 76600 }, { "epoch": 67.85208148804251, "grad_norm": 0.2759345471858978, "learning_rate": 1e-05, "loss": 0.9566, "step": 76605 }, { "epoch": 67.85651018600531, "grad_norm": 0.24915920197963715, "learning_rate": 1e-05, "loss": 0.9608, "step": 76610 }, { "epoch": 67.86093888396812, "grad_norm": 0.2416379153728485, "learning_rate": 1e-05, "loss": 0.9739, "step": 76615 }, { "epoch": 67.86536758193091, "grad_norm": 0.24495647847652435, "learning_rate": 1e-05, "loss": 0.9551, "step": 76620 }, { "epoch": 67.8697962798937, "grad_norm": 0.2333097606897354, "learning_rate": 1e-05, "loss": 0.9605, "step": 76625 }, { "epoch": 67.87422497785651, "grad_norm": 0.22763358056545258, "learning_rate": 1e-05, "loss": 0.9865, "step": 76630 }, { "epoch": 67.87865367581931, "grad_norm": 0.209817036986351, "learning_rate": 1e-05, "loss": 0.9575, "step": 76635 }, { "epoch": 67.8830823737821, "grad_norm": 0.24745959043502808, "learning_rate": 1e-05, "loss": 0.9355, "step": 76640 }, { "epoch": 67.88751107174491, "grad_norm": 0.24498023092746735, "learning_rate": 1e-05, "loss": 0.9618, "step": 76645 }, { "epoch": 67.8919397697077, "grad_norm": 0.23769810795783997, "learning_rate": 1e-05, "loss": 0.9351, "step": 76650 }, { "epoch": 67.8963684676705, "grad_norm": 0.2656491696834564, "learning_rate": 1e-05, "loss": 0.9292, "step": 76655 }, { "epoch": 67.9007971656333, "grad_norm": 0.30382663011550903, "learning_rate": 1e-05, "loss": 0.9541, "step": 76660 }, { "epoch": 67.9052258635961, "grad_norm": 0.263300359249115, "learning_rate": 1e-05, "loss": 0.9694, "step": 76665 }, { "epoch": 67.9096545615589, "grad_norm": 0.25266414880752563, "learning_rate": 1e-05, "loss": 0.9507, "step": 76670 }, { "epoch": 67.9140832595217, "grad_norm": 0.27038756012916565, "learning_rate": 1e-05, "loss": 0.9653, "step": 76675 }, { "epoch": 67.9185119574845, "grad_norm": 0.219991996884346, "learning_rate": 1e-05, "loss": 0.9765, "step": 76680 }, { "epoch": 67.92294065544729, "grad_norm": 0.2520654499530792, "learning_rate": 1e-05, "loss": 0.9226, "step": 76685 }, { "epoch": 67.9273693534101, "grad_norm": 0.25472912192344666, "learning_rate": 1e-05, "loss": 0.9826, "step": 76690 }, { "epoch": 67.9317980513729, "grad_norm": 0.21687844395637512, "learning_rate": 1e-05, "loss": 1.0202, "step": 76695 }, { "epoch": 67.9362267493357, "grad_norm": 0.24386608600616455, "learning_rate": 1e-05, "loss": 0.9778, "step": 76700 }, { "epoch": 67.9406554472985, "grad_norm": 0.29538071155548096, "learning_rate": 1e-05, "loss": 1.0055, "step": 76705 }, { "epoch": 67.94508414526129, "grad_norm": 0.27116674184799194, "learning_rate": 1e-05, "loss": 0.9936, "step": 76710 }, { "epoch": 67.9495128432241, "grad_norm": 0.24953193962574005, "learning_rate": 1e-05, "loss": 0.9678, "step": 76715 }, { "epoch": 67.9539415411869, "grad_norm": 0.27773234248161316, "learning_rate": 1e-05, "loss": 0.9394, "step": 76720 }, { "epoch": 67.95837023914969, "grad_norm": 0.23349931836128235, "learning_rate": 1e-05, "loss": 1.0117, "step": 76725 }, { "epoch": 67.9627989371125, "grad_norm": 0.2621541917324066, "learning_rate": 1e-05, "loss": 0.9153, "step": 76730 }, { "epoch": 67.96722763507529, "grad_norm": 0.2179231345653534, "learning_rate": 1e-05, "loss": 0.9342, "step": 76735 }, { "epoch": 67.97165633303808, "grad_norm": 0.24042630195617676, "learning_rate": 1e-05, "loss": 1.0274, "step": 76740 }, { "epoch": 67.97608503100089, "grad_norm": 0.22015798091888428, "learning_rate": 1e-05, "loss": 0.9526, "step": 76745 }, { "epoch": 67.98051372896369, "grad_norm": 0.24946820735931396, "learning_rate": 1e-05, "loss": 0.9247, "step": 76750 }, { "epoch": 67.98494242692648, "grad_norm": 0.22434455156326294, "learning_rate": 1e-05, "loss": 0.9574, "step": 76755 }, { "epoch": 67.98937112488929, "grad_norm": 0.2252250462770462, "learning_rate": 1e-05, "loss": 1.0068, "step": 76760 }, { "epoch": 67.99379982285208, "grad_norm": 0.21331030130386353, "learning_rate": 1e-05, "loss": 0.9555, "step": 76765 }, { "epoch": 67.99822852081488, "grad_norm": 0.25047460198402405, "learning_rate": 1e-05, "loss": 0.9368, "step": 76770 }, { "epoch": 68.00265721877768, "grad_norm": 0.25584807991981506, "learning_rate": 1e-05, "loss": 1.0007, "step": 76775 }, { "epoch": 68.00708591674048, "grad_norm": 0.23728016018867493, "learning_rate": 1e-05, "loss": 0.9378, "step": 76780 }, { "epoch": 68.01151461470327, "grad_norm": 0.22622765600681305, "learning_rate": 1e-05, "loss": 0.9955, "step": 76785 }, { "epoch": 68.01594331266608, "grad_norm": 0.2121357023715973, "learning_rate": 1e-05, "loss": 0.9863, "step": 76790 }, { "epoch": 68.02037201062888, "grad_norm": 0.25021448731422424, "learning_rate": 1e-05, "loss": 0.9359, "step": 76795 }, { "epoch": 68.02480070859167, "grad_norm": 0.22793389856815338, "learning_rate": 1e-05, "loss": 0.8953, "step": 76800 }, { "epoch": 68.02922940655448, "grad_norm": 0.23099902272224426, "learning_rate": 1e-05, "loss": 1.0125, "step": 76805 }, { "epoch": 68.03365810451727, "grad_norm": 0.27883380651474, "learning_rate": 1e-05, "loss": 0.9198, "step": 76810 }, { "epoch": 68.03808680248007, "grad_norm": 0.2255604863166809, "learning_rate": 1e-05, "loss": 1.0052, "step": 76815 }, { "epoch": 68.04251550044287, "grad_norm": 0.22914519906044006, "learning_rate": 1e-05, "loss": 0.9591, "step": 76820 }, { "epoch": 68.04694419840567, "grad_norm": 0.21722693741321564, "learning_rate": 1e-05, "loss": 0.9825, "step": 76825 }, { "epoch": 68.05137289636846, "grad_norm": 0.22584277391433716, "learning_rate": 1e-05, "loss": 0.9439, "step": 76830 }, { "epoch": 68.05580159433127, "grad_norm": 0.2503102123737335, "learning_rate": 1e-05, "loss": 0.9558, "step": 76835 }, { "epoch": 68.06023029229407, "grad_norm": 0.30725279450416565, "learning_rate": 1e-05, "loss": 0.9486, "step": 76840 }, { "epoch": 68.06465899025686, "grad_norm": 0.24035684764385223, "learning_rate": 1e-05, "loss": 0.9682, "step": 76845 }, { "epoch": 68.06908768821967, "grad_norm": 0.3344380557537079, "learning_rate": 1e-05, "loss": 0.9876, "step": 76850 }, { "epoch": 68.07351638618246, "grad_norm": 0.2741321325302124, "learning_rate": 1e-05, "loss": 0.8868, "step": 76855 }, { "epoch": 68.07794508414526, "grad_norm": 0.2542209327220917, "learning_rate": 1e-05, "loss": 0.8985, "step": 76860 }, { "epoch": 68.08237378210806, "grad_norm": 0.21474997699260712, "learning_rate": 1e-05, "loss": 1.025, "step": 76865 }, { "epoch": 68.08680248007086, "grad_norm": 0.2442864626646042, "learning_rate": 1e-05, "loss": 0.9598, "step": 76870 }, { "epoch": 68.09123117803365, "grad_norm": 0.23459120094776154, "learning_rate": 1e-05, "loss": 0.9889, "step": 76875 }, { "epoch": 68.09565987599646, "grad_norm": 0.24858005344867706, "learning_rate": 1e-05, "loss": 0.9517, "step": 76880 }, { "epoch": 68.10008857395925, "grad_norm": 0.2764456570148468, "learning_rate": 1e-05, "loss": 0.8921, "step": 76885 }, { "epoch": 68.10451727192205, "grad_norm": 0.2397177517414093, "learning_rate": 1e-05, "loss": 0.9011, "step": 76890 }, { "epoch": 68.10894596988486, "grad_norm": 0.19807660579681396, "learning_rate": 1e-05, "loss": 0.938, "step": 76895 }, { "epoch": 68.11337466784765, "grad_norm": 0.2176160216331482, "learning_rate": 1e-05, "loss": 1.005, "step": 76900 }, { "epoch": 68.11780336581045, "grad_norm": 0.19930031895637512, "learning_rate": 1e-05, "loss": 0.9836, "step": 76905 }, { "epoch": 68.12223206377325, "grad_norm": 0.24108199775218964, "learning_rate": 1e-05, "loss": 0.9708, "step": 76910 }, { "epoch": 68.12666076173605, "grad_norm": 0.26400989294052124, "learning_rate": 1e-05, "loss": 0.9646, "step": 76915 }, { "epoch": 68.13108945969884, "grad_norm": 0.2549172341823578, "learning_rate": 1e-05, "loss": 0.9472, "step": 76920 }, { "epoch": 68.13551815766165, "grad_norm": 0.2952554523944855, "learning_rate": 1e-05, "loss": 0.9615, "step": 76925 }, { "epoch": 68.13994685562444, "grad_norm": 0.2596129775047302, "learning_rate": 1e-05, "loss": 0.9707, "step": 76930 }, { "epoch": 68.14437555358724, "grad_norm": 0.23718851804733276, "learning_rate": 1e-05, "loss": 0.9431, "step": 76935 }, { "epoch": 68.14880425155005, "grad_norm": 0.25222161412239075, "learning_rate": 1e-05, "loss": 0.9866, "step": 76940 }, { "epoch": 68.15323294951284, "grad_norm": 0.24107126891613007, "learning_rate": 1e-05, "loss": 0.9548, "step": 76945 }, { "epoch": 68.15766164747565, "grad_norm": 0.23306317627429962, "learning_rate": 1e-05, "loss": 1.0087, "step": 76950 }, { "epoch": 68.16209034543844, "grad_norm": 0.21922940015792847, "learning_rate": 1e-05, "loss": 0.9484, "step": 76955 }, { "epoch": 68.16651904340124, "grad_norm": 0.2458428144454956, "learning_rate": 1e-05, "loss": 0.9657, "step": 76960 }, { "epoch": 68.17094774136405, "grad_norm": 0.23581838607788086, "learning_rate": 1e-05, "loss": 1.0041, "step": 76965 }, { "epoch": 68.17537643932684, "grad_norm": 0.24623973667621613, "learning_rate": 1e-05, "loss": 0.9815, "step": 76970 }, { "epoch": 68.17980513728963, "grad_norm": 0.2370588779449463, "learning_rate": 1e-05, "loss": 0.9276, "step": 76975 }, { "epoch": 68.18423383525244, "grad_norm": 0.2924964725971222, "learning_rate": 1e-05, "loss": 0.9765, "step": 76980 }, { "epoch": 68.18866253321524, "grad_norm": 0.23916597664356232, "learning_rate": 1e-05, "loss": 0.9679, "step": 76985 }, { "epoch": 68.19309123117803, "grad_norm": 0.27028536796569824, "learning_rate": 1e-05, "loss": 0.9973, "step": 76990 }, { "epoch": 68.19751992914084, "grad_norm": 0.27892959117889404, "learning_rate": 1e-05, "loss": 0.9153, "step": 76995 }, { "epoch": 68.20194862710363, "grad_norm": 0.2842993140220642, "learning_rate": 1e-05, "loss": 0.9825, "step": 77000 }, { "epoch": 68.20637732506643, "grad_norm": 0.31390640139579773, "learning_rate": 1e-05, "loss": 0.9745, "step": 77005 }, { "epoch": 68.21080602302924, "grad_norm": 0.2571030855178833, "learning_rate": 1e-05, "loss": 0.948, "step": 77010 }, { "epoch": 68.21523472099203, "grad_norm": 0.22763369977474213, "learning_rate": 1e-05, "loss": 1.0053, "step": 77015 }, { "epoch": 68.21966341895482, "grad_norm": 0.20157192647457123, "learning_rate": 1e-05, "loss": 1.0086, "step": 77020 }, { "epoch": 68.22409211691763, "grad_norm": 0.23055174946784973, "learning_rate": 1e-05, "loss": 1.016, "step": 77025 }, { "epoch": 68.22852081488043, "grad_norm": 0.22462643682956696, "learning_rate": 1e-05, "loss": 0.9385, "step": 77030 }, { "epoch": 68.23294951284322, "grad_norm": 0.2218571901321411, "learning_rate": 1e-05, "loss": 0.909, "step": 77035 }, { "epoch": 68.23737821080603, "grad_norm": 0.23939426243305206, "learning_rate": 1e-05, "loss": 0.9974, "step": 77040 }, { "epoch": 68.24180690876882, "grad_norm": 0.2975097596645355, "learning_rate": 1e-05, "loss": 1.0153, "step": 77045 }, { "epoch": 68.24623560673162, "grad_norm": 0.2307479828596115, "learning_rate": 1e-05, "loss": 0.9736, "step": 77050 }, { "epoch": 68.25066430469442, "grad_norm": 0.25362545251846313, "learning_rate": 1e-05, "loss": 1.0118, "step": 77055 }, { "epoch": 68.25509300265722, "grad_norm": 0.2312287986278534, "learning_rate": 1e-05, "loss": 0.9144, "step": 77060 }, { "epoch": 68.25952170062001, "grad_norm": 0.21733015775680542, "learning_rate": 1e-05, "loss": 1.0118, "step": 77065 }, { "epoch": 68.26395039858282, "grad_norm": 0.2657482326030731, "learning_rate": 1e-05, "loss": 0.9913, "step": 77070 }, { "epoch": 68.26837909654562, "grad_norm": 0.24333474040031433, "learning_rate": 1e-05, "loss": 0.9821, "step": 77075 }, { "epoch": 68.27280779450841, "grad_norm": 0.2510969042778015, "learning_rate": 1e-05, "loss": 0.9476, "step": 77080 }, { "epoch": 68.27723649247122, "grad_norm": 0.2663164436817169, "learning_rate": 1e-05, "loss": 0.9038, "step": 77085 }, { "epoch": 68.28166519043401, "grad_norm": 0.21536394953727722, "learning_rate": 1e-05, "loss": 0.9419, "step": 77090 }, { "epoch": 68.2860938883968, "grad_norm": 0.2670002281665802, "learning_rate": 1e-05, "loss": 0.9612, "step": 77095 }, { "epoch": 68.29052258635961, "grad_norm": 0.23812291026115417, "learning_rate": 1e-05, "loss": 0.95, "step": 77100 }, { "epoch": 68.29495128432241, "grad_norm": 0.2553909420967102, "learning_rate": 1e-05, "loss": 0.9599, "step": 77105 }, { "epoch": 68.2993799822852, "grad_norm": 0.26331189274787903, "learning_rate": 1e-05, "loss": 0.9424, "step": 77110 }, { "epoch": 68.30380868024801, "grad_norm": 0.21576452255249023, "learning_rate": 1e-05, "loss": 0.9474, "step": 77115 }, { "epoch": 68.3082373782108, "grad_norm": 0.20230774581432343, "learning_rate": 1e-05, "loss": 1.0011, "step": 77120 }, { "epoch": 68.3126660761736, "grad_norm": 0.23358643054962158, "learning_rate": 1e-05, "loss": 0.9823, "step": 77125 }, { "epoch": 68.31709477413641, "grad_norm": 0.2323332577943802, "learning_rate": 1e-05, "loss": 0.9272, "step": 77130 }, { "epoch": 68.3215234720992, "grad_norm": 0.22187022864818573, "learning_rate": 1e-05, "loss": 0.9474, "step": 77135 }, { "epoch": 68.325952170062, "grad_norm": 0.2275269478559494, "learning_rate": 1e-05, "loss": 1.0549, "step": 77140 }, { "epoch": 68.3303808680248, "grad_norm": 0.24728339910507202, "learning_rate": 1e-05, "loss": 1.0032, "step": 77145 }, { "epoch": 68.3348095659876, "grad_norm": 0.24441349506378174, "learning_rate": 1e-05, "loss": 0.9891, "step": 77150 }, { "epoch": 68.33923826395039, "grad_norm": 0.26552215218544006, "learning_rate": 1e-05, "loss": 0.9226, "step": 77155 }, { "epoch": 68.3436669619132, "grad_norm": 0.20027394592761993, "learning_rate": 1e-05, "loss": 0.9726, "step": 77160 }, { "epoch": 68.348095659876, "grad_norm": 0.2559523582458496, "learning_rate": 1e-05, "loss": 0.9524, "step": 77165 }, { "epoch": 68.35252435783879, "grad_norm": 0.2647932171821594, "learning_rate": 1e-05, "loss": 0.947, "step": 77170 }, { "epoch": 68.3569530558016, "grad_norm": 0.30525845289230347, "learning_rate": 1e-05, "loss": 0.9191, "step": 77175 }, { "epoch": 68.36138175376439, "grad_norm": 0.22653600573539734, "learning_rate": 1e-05, "loss": 1.0074, "step": 77180 }, { "epoch": 68.36581045172719, "grad_norm": 0.22548870742321014, "learning_rate": 1e-05, "loss": 1.0263, "step": 77185 }, { "epoch": 68.37023914969, "grad_norm": 0.21463324129581451, "learning_rate": 1e-05, "loss": 0.9527, "step": 77190 }, { "epoch": 68.37466784765279, "grad_norm": 0.21944944560527802, "learning_rate": 1e-05, "loss": 1.0062, "step": 77195 }, { "epoch": 68.3790965456156, "grad_norm": 0.23399657011032104, "learning_rate": 1e-05, "loss": 0.9983, "step": 77200 }, { "epoch": 68.38352524357839, "grad_norm": 0.2551707327365875, "learning_rate": 1e-05, "loss": 0.987, "step": 77205 }, { "epoch": 68.38795394154118, "grad_norm": 0.3327534794807434, "learning_rate": 1e-05, "loss": 0.925, "step": 77210 }, { "epoch": 68.39238263950399, "grad_norm": 0.26631003618240356, "learning_rate": 1e-05, "loss": 0.9691, "step": 77215 }, { "epoch": 68.39681133746679, "grad_norm": 0.23770877718925476, "learning_rate": 1e-05, "loss": 0.9827, "step": 77220 }, { "epoch": 68.40124003542958, "grad_norm": 0.21755321323871613, "learning_rate": 1e-05, "loss": 0.9975, "step": 77225 }, { "epoch": 68.40566873339239, "grad_norm": 0.2293049693107605, "learning_rate": 1e-05, "loss": 0.9756, "step": 77230 }, { "epoch": 68.41009743135518, "grad_norm": 0.23316511511802673, "learning_rate": 1e-05, "loss": 0.9907, "step": 77235 }, { "epoch": 68.41452612931798, "grad_norm": 0.25545260310173035, "learning_rate": 1e-05, "loss": 0.9988, "step": 77240 }, { "epoch": 68.41895482728079, "grad_norm": 0.25524574518203735, "learning_rate": 1e-05, "loss": 0.9493, "step": 77245 }, { "epoch": 68.42338352524358, "grad_norm": 0.22837354242801666, "learning_rate": 1e-05, "loss": 0.9321, "step": 77250 }, { "epoch": 68.42781222320637, "grad_norm": 0.23328398168087006, "learning_rate": 1e-05, "loss": 0.9577, "step": 77255 }, { "epoch": 68.43224092116918, "grad_norm": 0.26440852880477905, "learning_rate": 1e-05, "loss": 0.9214, "step": 77260 }, { "epoch": 68.43666961913198, "grad_norm": 0.23230767250061035, "learning_rate": 1e-05, "loss": 1.0129, "step": 77265 }, { "epoch": 68.44109831709477, "grad_norm": 0.21214210987091064, "learning_rate": 1e-05, "loss": 0.9721, "step": 77270 }, { "epoch": 68.44552701505758, "grad_norm": 0.2702535092830658, "learning_rate": 1e-05, "loss": 0.9664, "step": 77275 }, { "epoch": 68.44995571302037, "grad_norm": 0.1878858357667923, "learning_rate": 1e-05, "loss": 0.9552, "step": 77280 }, { "epoch": 68.45438441098317, "grad_norm": 0.23052692413330078, "learning_rate": 1e-05, "loss": 0.9509, "step": 77285 }, { "epoch": 68.45881310894598, "grad_norm": 0.22295252978801727, "learning_rate": 1e-05, "loss": 0.9475, "step": 77290 }, { "epoch": 68.46324180690877, "grad_norm": 0.27759110927581787, "learning_rate": 1e-05, "loss": 0.9877, "step": 77295 }, { "epoch": 68.46767050487156, "grad_norm": 0.2125776708126068, "learning_rate": 1e-05, "loss": 0.9464, "step": 77300 }, { "epoch": 68.47209920283437, "grad_norm": 0.21196389198303223, "learning_rate": 1e-05, "loss": 0.9342, "step": 77305 }, { "epoch": 68.47652790079717, "grad_norm": 0.20559704303741455, "learning_rate": 1e-05, "loss": 0.9919, "step": 77310 }, { "epoch": 68.48095659875996, "grad_norm": 0.21400666236877441, "learning_rate": 1e-05, "loss": 0.9387, "step": 77315 }, { "epoch": 68.48538529672277, "grad_norm": 0.20403701066970825, "learning_rate": 1e-05, "loss": 0.931, "step": 77320 }, { "epoch": 68.48981399468556, "grad_norm": 0.2335355132818222, "learning_rate": 1e-05, "loss": 1.0186, "step": 77325 }, { "epoch": 68.49424269264836, "grad_norm": 0.2715393304824829, "learning_rate": 1e-05, "loss": 0.9235, "step": 77330 }, { "epoch": 68.49867139061116, "grad_norm": 0.22402842342853546, "learning_rate": 1e-05, "loss": 0.9323, "step": 77335 }, { "epoch": 68.50310008857396, "grad_norm": 0.25089943408966064, "learning_rate": 1e-05, "loss": 0.9886, "step": 77340 }, { "epoch": 68.50752878653675, "grad_norm": 0.2760465145111084, "learning_rate": 1e-05, "loss": 1.0117, "step": 77345 }, { "epoch": 68.51195748449956, "grad_norm": 0.24571135640144348, "learning_rate": 1e-05, "loss": 0.9784, "step": 77350 }, { "epoch": 68.51638618246236, "grad_norm": 0.24833685159683228, "learning_rate": 1e-05, "loss": 0.9419, "step": 77355 }, { "epoch": 68.52081488042515, "grad_norm": 0.27041807770729065, "learning_rate": 1e-05, "loss": 0.9722, "step": 77360 }, { "epoch": 68.52524357838796, "grad_norm": 0.243954598903656, "learning_rate": 1e-05, "loss": 0.978, "step": 77365 }, { "epoch": 68.52967227635075, "grad_norm": 0.21405239403247833, "learning_rate": 1e-05, "loss": 0.9491, "step": 77370 }, { "epoch": 68.53410097431355, "grad_norm": 0.30342036485671997, "learning_rate": 1e-05, "loss": 0.9963, "step": 77375 }, { "epoch": 68.53852967227635, "grad_norm": 0.35024258494377136, "learning_rate": 1e-05, "loss": 0.9727, "step": 77380 }, { "epoch": 68.54295837023915, "grad_norm": 0.21902072429656982, "learning_rate": 1e-05, "loss": 0.9382, "step": 77385 }, { "epoch": 68.54738706820194, "grad_norm": 0.2056579887866974, "learning_rate": 1e-05, "loss": 0.9551, "step": 77390 }, { "epoch": 68.55181576616475, "grad_norm": 0.2437870055437088, "learning_rate": 1e-05, "loss": 0.9493, "step": 77395 }, { "epoch": 68.55624446412754, "grad_norm": 0.2283148467540741, "learning_rate": 1e-05, "loss": 0.9911, "step": 77400 }, { "epoch": 68.56067316209034, "grad_norm": 0.2600709795951843, "learning_rate": 1e-05, "loss": 0.9744, "step": 77405 }, { "epoch": 68.56510186005315, "grad_norm": 0.24737876653671265, "learning_rate": 1e-05, "loss": 0.9781, "step": 77410 }, { "epoch": 68.56953055801594, "grad_norm": 0.2758330702781677, "learning_rate": 1e-05, "loss": 0.97, "step": 77415 }, { "epoch": 68.57395925597874, "grad_norm": 0.210517019033432, "learning_rate": 1e-05, "loss": 0.9455, "step": 77420 }, { "epoch": 68.57838795394154, "grad_norm": 0.23354770243167877, "learning_rate": 1e-05, "loss": 0.9953, "step": 77425 }, { "epoch": 68.58281665190434, "grad_norm": 0.27804309129714966, "learning_rate": 1e-05, "loss": 0.9231, "step": 77430 }, { "epoch": 68.58724534986715, "grad_norm": 0.28320494294166565, "learning_rate": 1e-05, "loss": 0.9496, "step": 77435 }, { "epoch": 68.59167404782994, "grad_norm": 0.3446701467037201, "learning_rate": 1e-05, "loss": 0.9537, "step": 77440 }, { "epoch": 68.59610274579273, "grad_norm": 0.20666784048080444, "learning_rate": 1e-05, "loss": 1.0163, "step": 77445 }, { "epoch": 68.60053144375554, "grad_norm": 0.23360778391361237, "learning_rate": 1e-05, "loss": 0.9806, "step": 77450 }, { "epoch": 68.60496014171834, "grad_norm": 0.2181546837091446, "learning_rate": 1e-05, "loss": 0.9759, "step": 77455 }, { "epoch": 68.60938883968113, "grad_norm": 0.24975289404392242, "learning_rate": 1e-05, "loss": 1.0397, "step": 77460 }, { "epoch": 68.61381753764394, "grad_norm": 0.2663228511810303, "learning_rate": 1e-05, "loss": 0.9839, "step": 77465 }, { "epoch": 68.61824623560673, "grad_norm": 0.22524394094944, "learning_rate": 1e-05, "loss": 1.0086, "step": 77470 }, { "epoch": 68.62267493356953, "grad_norm": 0.22815193235874176, "learning_rate": 1e-05, "loss": 0.9687, "step": 77475 }, { "epoch": 68.62710363153234, "grad_norm": 0.22721263766288757, "learning_rate": 1e-05, "loss": 1.0396, "step": 77480 }, { "epoch": 68.63153232949513, "grad_norm": 0.23396463692188263, "learning_rate": 1e-05, "loss": 0.9724, "step": 77485 }, { "epoch": 68.63596102745792, "grad_norm": 0.2337217479944229, "learning_rate": 1e-05, "loss": 0.968, "step": 77490 }, { "epoch": 68.64038972542073, "grad_norm": 0.20511044561862946, "learning_rate": 1e-05, "loss": 0.9805, "step": 77495 }, { "epoch": 68.64481842338353, "grad_norm": 0.21474257111549377, "learning_rate": 1e-05, "loss": 0.9221, "step": 77500 }, { "epoch": 68.64924712134632, "grad_norm": 0.23007886111736298, "learning_rate": 1e-05, "loss": 0.9865, "step": 77505 }, { "epoch": 68.65367581930913, "grad_norm": 0.24416375160217285, "learning_rate": 1e-05, "loss": 0.9666, "step": 77510 }, { "epoch": 68.65810451727192, "grad_norm": 0.2489532232284546, "learning_rate": 1e-05, "loss": 0.9808, "step": 77515 }, { "epoch": 68.66253321523472, "grad_norm": 0.23852939903736115, "learning_rate": 1e-05, "loss": 0.9713, "step": 77520 }, { "epoch": 68.66696191319753, "grad_norm": 0.25823014974594116, "learning_rate": 1e-05, "loss": 0.9484, "step": 77525 }, { "epoch": 68.67139061116032, "grad_norm": 0.21028785407543182, "learning_rate": 1e-05, "loss": 0.9807, "step": 77530 }, { "epoch": 68.67581930912311, "grad_norm": 0.2341497391462326, "learning_rate": 1e-05, "loss": 0.99, "step": 77535 }, { "epoch": 68.68024800708592, "grad_norm": 0.19045452773571014, "learning_rate": 1e-05, "loss": 1.0138, "step": 77540 }, { "epoch": 68.68467670504872, "grad_norm": 0.25875353813171387, "learning_rate": 1e-05, "loss": 0.9369, "step": 77545 }, { "epoch": 68.68910540301151, "grad_norm": 0.19904853403568268, "learning_rate": 1e-05, "loss": 0.9406, "step": 77550 }, { "epoch": 68.69353410097432, "grad_norm": 0.22108907997608185, "learning_rate": 1e-05, "loss": 0.9825, "step": 77555 }, { "epoch": 68.69796279893711, "grad_norm": 0.2110719531774521, "learning_rate": 1e-05, "loss": 0.9161, "step": 77560 }, { "epoch": 68.7023914968999, "grad_norm": 0.2077806442975998, "learning_rate": 1e-05, "loss": 0.9374, "step": 77565 }, { "epoch": 68.70682019486271, "grad_norm": 0.23411232233047485, "learning_rate": 1e-05, "loss": 0.928, "step": 77570 }, { "epoch": 68.71124889282551, "grad_norm": 0.23621974885463715, "learning_rate": 1e-05, "loss": 0.9792, "step": 77575 }, { "epoch": 68.7156775907883, "grad_norm": 0.26220467686653137, "learning_rate": 1e-05, "loss": 0.9282, "step": 77580 }, { "epoch": 68.72010628875111, "grad_norm": 0.19367679953575134, "learning_rate": 1e-05, "loss": 0.9874, "step": 77585 }, { "epoch": 68.7245349867139, "grad_norm": 0.2192482054233551, "learning_rate": 1e-05, "loss": 0.9102, "step": 77590 }, { "epoch": 68.7289636846767, "grad_norm": 0.21327932178974152, "learning_rate": 1e-05, "loss": 0.9438, "step": 77595 }, { "epoch": 68.73339238263951, "grad_norm": 0.2203483283519745, "learning_rate": 1e-05, "loss": 0.9832, "step": 77600 }, { "epoch": 68.7378210806023, "grad_norm": 0.23164600133895874, "learning_rate": 1e-05, "loss": 0.966, "step": 77605 }, { "epoch": 68.7422497785651, "grad_norm": 0.2410828173160553, "learning_rate": 1e-05, "loss": 0.9095, "step": 77610 }, { "epoch": 68.7466784765279, "grad_norm": 0.22069929540157318, "learning_rate": 1e-05, "loss": 0.9723, "step": 77615 }, { "epoch": 68.7511071744907, "grad_norm": 0.1951211541891098, "learning_rate": 1e-05, "loss": 0.9506, "step": 77620 }, { "epoch": 68.75553587245349, "grad_norm": 0.22460252046585083, "learning_rate": 1e-05, "loss": 0.9918, "step": 77625 }, { "epoch": 68.7599645704163, "grad_norm": 0.30915582180023193, "learning_rate": 1e-05, "loss": 0.9822, "step": 77630 }, { "epoch": 68.7643932683791, "grad_norm": 0.2821124494075775, "learning_rate": 1e-05, "loss": 1.0081, "step": 77635 }, { "epoch": 68.76882196634189, "grad_norm": 0.31025904417037964, "learning_rate": 1e-05, "loss": 0.9589, "step": 77640 }, { "epoch": 68.7732506643047, "grad_norm": 0.25534045696258545, "learning_rate": 1e-05, "loss": 0.981, "step": 77645 }, { "epoch": 68.77767936226749, "grad_norm": 0.301859974861145, "learning_rate": 1e-05, "loss": 0.9913, "step": 77650 }, { "epoch": 68.78210806023029, "grad_norm": 0.23446762561798096, "learning_rate": 1e-05, "loss": 0.9839, "step": 77655 }, { "epoch": 68.7865367581931, "grad_norm": 0.2862578332424164, "learning_rate": 1e-05, "loss": 0.941, "step": 77660 }, { "epoch": 68.79096545615589, "grad_norm": 0.22713255882263184, "learning_rate": 1e-05, "loss": 0.9488, "step": 77665 }, { "epoch": 68.79539415411868, "grad_norm": 0.24764148890972137, "learning_rate": 1e-05, "loss": 1.0126, "step": 77670 }, { "epoch": 68.79982285208149, "grad_norm": 0.2324870377779007, "learning_rate": 1e-05, "loss": 0.9632, "step": 77675 }, { "epoch": 68.80425155004428, "grad_norm": 0.24814823269844055, "learning_rate": 1e-05, "loss": 0.9527, "step": 77680 }, { "epoch": 68.8086802480071, "grad_norm": 0.22148555517196655, "learning_rate": 1e-05, "loss": 0.9734, "step": 77685 }, { "epoch": 68.81310894596989, "grad_norm": 0.23959319293498993, "learning_rate": 1e-05, "loss": 0.893, "step": 77690 }, { "epoch": 68.81753764393268, "grad_norm": 0.2729448676109314, "learning_rate": 1e-05, "loss": 1.0011, "step": 77695 }, { "epoch": 68.82196634189549, "grad_norm": 0.24853262305259705, "learning_rate": 1e-05, "loss": 0.9629, "step": 77700 }, { "epoch": 68.82639503985828, "grad_norm": 0.2562715709209442, "learning_rate": 1e-05, "loss": 1.0157, "step": 77705 }, { "epoch": 68.83082373782108, "grad_norm": 0.25085675716400146, "learning_rate": 1e-05, "loss": 0.9843, "step": 77710 }, { "epoch": 68.83525243578389, "grad_norm": 0.25476396083831787, "learning_rate": 1e-05, "loss": 0.9559, "step": 77715 }, { "epoch": 68.83968113374668, "grad_norm": 0.2540120780467987, "learning_rate": 1e-05, "loss": 0.9571, "step": 77720 }, { "epoch": 68.84410983170947, "grad_norm": 0.217283695936203, "learning_rate": 1e-05, "loss": 0.951, "step": 77725 }, { "epoch": 68.84853852967228, "grad_norm": 0.2352837473154068, "learning_rate": 1e-05, "loss": 0.9612, "step": 77730 }, { "epoch": 68.85296722763508, "grad_norm": 0.22519177198410034, "learning_rate": 1e-05, "loss": 0.945, "step": 77735 }, { "epoch": 68.85739592559787, "grad_norm": 0.219848170876503, "learning_rate": 1e-05, "loss": 0.997, "step": 77740 }, { "epoch": 68.86182462356068, "grad_norm": 0.24652816355228424, "learning_rate": 1e-05, "loss": 0.9779, "step": 77745 }, { "epoch": 68.86625332152347, "grad_norm": 0.2178501933813095, "learning_rate": 1e-05, "loss": 1.0261, "step": 77750 }, { "epoch": 68.87068201948627, "grad_norm": 0.22229357063770294, "learning_rate": 1e-05, "loss": 0.958, "step": 77755 }, { "epoch": 68.87511071744908, "grad_norm": 0.2508009076118469, "learning_rate": 1e-05, "loss": 1.0261, "step": 77760 }, { "epoch": 68.87953941541187, "grad_norm": 0.2688422203063965, "learning_rate": 1e-05, "loss": 1.011, "step": 77765 }, { "epoch": 68.88396811337466, "grad_norm": 0.27963051199913025, "learning_rate": 1e-05, "loss": 0.9823, "step": 77770 }, { "epoch": 68.88839681133747, "grad_norm": 0.2947990298271179, "learning_rate": 1e-05, "loss": 0.9468, "step": 77775 }, { "epoch": 68.89282550930027, "grad_norm": 0.24646307528018951, "learning_rate": 1e-05, "loss": 0.9289, "step": 77780 }, { "epoch": 68.89725420726306, "grad_norm": 0.2942536473274231, "learning_rate": 1e-05, "loss": 0.9448, "step": 77785 }, { "epoch": 68.90168290522587, "grad_norm": 0.2099767029285431, "learning_rate": 1e-05, "loss": 0.9847, "step": 77790 }, { "epoch": 68.90611160318866, "grad_norm": 0.29026347398757935, "learning_rate": 1e-05, "loss": 0.9709, "step": 77795 }, { "epoch": 68.91054030115146, "grad_norm": 0.21217495203018188, "learning_rate": 1e-05, "loss": 0.9461, "step": 77800 }, { "epoch": 68.91496899911427, "grad_norm": 0.21227198839187622, "learning_rate": 1e-05, "loss": 0.9501, "step": 77805 }, { "epoch": 68.91939769707706, "grad_norm": 0.2930909991264343, "learning_rate": 1e-05, "loss": 0.9768, "step": 77810 }, { "epoch": 68.92382639503985, "grad_norm": 0.23876406252384186, "learning_rate": 1e-05, "loss": 0.9081, "step": 77815 }, { "epoch": 68.92825509300266, "grad_norm": 0.21298159658908844, "learning_rate": 1e-05, "loss": 0.9607, "step": 77820 }, { "epoch": 68.93268379096546, "grad_norm": 0.22780898213386536, "learning_rate": 1e-05, "loss": 0.9901, "step": 77825 }, { "epoch": 68.93711248892825, "grad_norm": 0.22752320766448975, "learning_rate": 1e-05, "loss": 1.0245, "step": 77830 }, { "epoch": 68.94154118689106, "grad_norm": 0.2558114230632782, "learning_rate": 1e-05, "loss": 0.9788, "step": 77835 }, { "epoch": 68.94596988485385, "grad_norm": 0.20823952555656433, "learning_rate": 1e-05, "loss": 0.9347, "step": 77840 }, { "epoch": 68.95039858281665, "grad_norm": 0.2360299527645111, "learning_rate": 1e-05, "loss": 0.9838, "step": 77845 }, { "epoch": 68.95482728077945, "grad_norm": 0.21293459832668304, "learning_rate": 1e-05, "loss": 0.9427, "step": 77850 }, { "epoch": 68.95925597874225, "grad_norm": 0.2501783072948456, "learning_rate": 1e-05, "loss": 0.9786, "step": 77855 }, { "epoch": 68.96368467670504, "grad_norm": 0.24438054859638214, "learning_rate": 1e-05, "loss": 0.9571, "step": 77860 }, { "epoch": 68.96811337466785, "grad_norm": 0.21473801136016846, "learning_rate": 1e-05, "loss": 1.0151, "step": 77865 }, { "epoch": 68.97254207263065, "grad_norm": 0.25880977511405945, "learning_rate": 1e-05, "loss": 0.9925, "step": 77870 }, { "epoch": 68.97697077059344, "grad_norm": 0.2433009147644043, "learning_rate": 1e-05, "loss": 1.0099, "step": 77875 }, { "epoch": 68.98139946855625, "grad_norm": 0.21708384156227112, "learning_rate": 1e-05, "loss": 0.9261, "step": 77880 }, { "epoch": 68.98582816651904, "grad_norm": 0.2144363969564438, "learning_rate": 1e-05, "loss": 0.9665, "step": 77885 }, { "epoch": 68.99025686448184, "grad_norm": 0.2765505015850067, "learning_rate": 1e-05, "loss": 0.9411, "step": 77890 }, { "epoch": 68.99468556244464, "grad_norm": 0.213337242603302, "learning_rate": 1e-05, "loss": 0.9562, "step": 77895 }, { "epoch": 68.99911426040744, "grad_norm": 0.26184558868408203, "learning_rate": 1e-05, "loss": 0.9651, "step": 77900 }, { "epoch": 69.00354295837023, "grad_norm": 0.24421700835227966, "learning_rate": 1e-05, "loss": 0.9784, "step": 77905 }, { "epoch": 69.00797165633304, "grad_norm": 0.22132588922977448, "learning_rate": 1e-05, "loss": 0.9987, "step": 77910 }, { "epoch": 69.01240035429583, "grad_norm": 0.2508220374584198, "learning_rate": 1e-05, "loss": 0.9797, "step": 77915 }, { "epoch": 69.01682905225863, "grad_norm": 0.23341313004493713, "learning_rate": 1e-05, "loss": 0.9436, "step": 77920 }, { "epoch": 69.02125775022144, "grad_norm": 0.2203708291053772, "learning_rate": 1e-05, "loss": 0.9467, "step": 77925 }, { "epoch": 69.02568644818423, "grad_norm": 0.23375320434570312, "learning_rate": 1e-05, "loss": 0.9038, "step": 77930 }, { "epoch": 69.03011514614704, "grad_norm": 0.2515154182910919, "learning_rate": 1e-05, "loss": 0.9394, "step": 77935 }, { "epoch": 69.03454384410983, "grad_norm": 0.27470308542251587, "learning_rate": 1e-05, "loss": 1.0035, "step": 77940 }, { "epoch": 69.03897254207263, "grad_norm": 0.240151047706604, "learning_rate": 1e-05, "loss": 0.9414, "step": 77945 }, { "epoch": 69.04340124003544, "grad_norm": 0.2680210769176483, "learning_rate": 1e-05, "loss": 0.9509, "step": 77950 }, { "epoch": 69.04782993799823, "grad_norm": 0.21505847573280334, "learning_rate": 1e-05, "loss": 0.9635, "step": 77955 }, { "epoch": 69.05225863596102, "grad_norm": 0.21608306467533112, "learning_rate": 1e-05, "loss": 0.9882, "step": 77960 }, { "epoch": 69.05668733392383, "grad_norm": 0.2254691869020462, "learning_rate": 1e-05, "loss": 0.9355, "step": 77965 }, { "epoch": 69.06111603188663, "grad_norm": 0.2361876368522644, "learning_rate": 1e-05, "loss": 0.9991, "step": 77970 }, { "epoch": 69.06554472984942, "grad_norm": 0.22258225083351135, "learning_rate": 1e-05, "loss": 0.9256, "step": 77975 }, { "epoch": 69.06997342781223, "grad_norm": 0.31070664525032043, "learning_rate": 1e-05, "loss": 0.9673, "step": 77980 }, { "epoch": 69.07440212577502, "grad_norm": 0.28711432218551636, "learning_rate": 1e-05, "loss": 0.9668, "step": 77985 }, { "epoch": 69.07883082373782, "grad_norm": 0.23573873937129974, "learning_rate": 1e-05, "loss": 1.0237, "step": 77990 }, { "epoch": 69.08325952170063, "grad_norm": 0.25527769327163696, "learning_rate": 1e-05, "loss": 0.968, "step": 77995 }, { "epoch": 69.08768821966342, "grad_norm": 0.27001112699508667, "learning_rate": 1e-05, "loss": 0.9853, "step": 78000 }, { "epoch": 69.09211691762621, "grad_norm": 0.28292331099510193, "learning_rate": 1e-05, "loss": 0.9778, "step": 78005 }, { "epoch": 69.09654561558902, "grad_norm": 0.2472730278968811, "learning_rate": 1e-05, "loss": 0.9461, "step": 78010 }, { "epoch": 69.10097431355182, "grad_norm": 0.2121076136827469, "learning_rate": 1e-05, "loss": 0.9377, "step": 78015 }, { "epoch": 69.10540301151461, "grad_norm": 0.21238595247268677, "learning_rate": 1e-05, "loss": 0.9525, "step": 78020 }, { "epoch": 69.10983170947742, "grad_norm": 0.20333240926265717, "learning_rate": 1e-05, "loss": 0.9294, "step": 78025 }, { "epoch": 69.11426040744021, "grad_norm": 0.23517295718193054, "learning_rate": 1e-05, "loss": 0.9764, "step": 78030 }, { "epoch": 69.118689105403, "grad_norm": 0.21922169625759125, "learning_rate": 1e-05, "loss": 0.9867, "step": 78035 }, { "epoch": 69.12311780336582, "grad_norm": 0.2950936257839203, "learning_rate": 1e-05, "loss": 0.9328, "step": 78040 }, { "epoch": 69.12754650132861, "grad_norm": 0.210884690284729, "learning_rate": 1e-05, "loss": 0.9751, "step": 78045 }, { "epoch": 69.1319751992914, "grad_norm": 0.2227676659822464, "learning_rate": 1e-05, "loss": 0.9438, "step": 78050 }, { "epoch": 69.13640389725421, "grad_norm": 0.25897300243377686, "learning_rate": 1e-05, "loss": 0.9714, "step": 78055 }, { "epoch": 69.140832595217, "grad_norm": 0.23228009045124054, "learning_rate": 1e-05, "loss": 0.9896, "step": 78060 }, { "epoch": 69.1452612931798, "grad_norm": 0.23589274287223816, "learning_rate": 1e-05, "loss": 0.9837, "step": 78065 }, { "epoch": 69.14968999114261, "grad_norm": 0.2592189610004425, "learning_rate": 1e-05, "loss": 0.9668, "step": 78070 }, { "epoch": 69.1541186891054, "grad_norm": 0.21341092884540558, "learning_rate": 1e-05, "loss": 0.967, "step": 78075 }, { "epoch": 69.1585473870682, "grad_norm": 0.26577284932136536, "learning_rate": 1e-05, "loss": 1.036, "step": 78080 }, { "epoch": 69.162976085031, "grad_norm": 0.24455757439136505, "learning_rate": 1e-05, "loss": 0.9165, "step": 78085 }, { "epoch": 69.1674047829938, "grad_norm": 0.28228557109832764, "learning_rate": 1e-05, "loss": 0.9702, "step": 78090 }, { "epoch": 69.1718334809566, "grad_norm": 0.253810316324234, "learning_rate": 1e-05, "loss": 0.9157, "step": 78095 }, { "epoch": 69.1762621789194, "grad_norm": 0.26682087779045105, "learning_rate": 1e-05, "loss": 0.9937, "step": 78100 }, { "epoch": 69.1806908768822, "grad_norm": 0.23522455990314484, "learning_rate": 1e-05, "loss": 0.9954, "step": 78105 }, { "epoch": 69.18511957484499, "grad_norm": 0.22824148833751678, "learning_rate": 1e-05, "loss": 0.9961, "step": 78110 }, { "epoch": 69.1895482728078, "grad_norm": 0.25177592039108276, "learning_rate": 1e-05, "loss": 1.0131, "step": 78115 }, { "epoch": 69.19397697077059, "grad_norm": 0.2501276135444641, "learning_rate": 1e-05, "loss": 0.9488, "step": 78120 }, { "epoch": 69.19840566873339, "grad_norm": 0.24373245239257812, "learning_rate": 1e-05, "loss": 0.9946, "step": 78125 }, { "epoch": 69.2028343666962, "grad_norm": 0.2491762489080429, "learning_rate": 1e-05, "loss": 0.9459, "step": 78130 }, { "epoch": 69.20726306465899, "grad_norm": 0.2440991997718811, "learning_rate": 1e-05, "loss": 0.9428, "step": 78135 }, { "epoch": 69.21169176262178, "grad_norm": 0.21512573957443237, "learning_rate": 1e-05, "loss": 0.9736, "step": 78140 }, { "epoch": 69.21612046058459, "grad_norm": 0.22907035052776337, "learning_rate": 1e-05, "loss": 0.9601, "step": 78145 }, { "epoch": 69.22054915854739, "grad_norm": 0.22105741500854492, "learning_rate": 1e-05, "loss": 0.991, "step": 78150 }, { "epoch": 69.22497785651018, "grad_norm": 0.22191956639289856, "learning_rate": 1e-05, "loss": 0.9175, "step": 78155 }, { "epoch": 69.22940655447299, "grad_norm": 0.23449578881263733, "learning_rate": 1e-05, "loss": 1.0132, "step": 78160 }, { "epoch": 69.23383525243578, "grad_norm": 0.27362146973609924, "learning_rate": 1e-05, "loss": 0.9063, "step": 78165 }, { "epoch": 69.23826395039858, "grad_norm": 0.23117782175540924, "learning_rate": 1e-05, "loss": 0.9754, "step": 78170 }, { "epoch": 69.24269264836138, "grad_norm": 0.2439649999141693, "learning_rate": 1e-05, "loss": 0.9586, "step": 78175 }, { "epoch": 69.24712134632418, "grad_norm": 0.23415379226207733, "learning_rate": 1e-05, "loss": 0.946, "step": 78180 }, { "epoch": 69.25155004428699, "grad_norm": 0.22621458768844604, "learning_rate": 1e-05, "loss": 0.962, "step": 78185 }, { "epoch": 69.25597874224978, "grad_norm": 0.2816198170185089, "learning_rate": 1e-05, "loss": 0.9433, "step": 78190 }, { "epoch": 69.26040744021257, "grad_norm": 0.24278493225574493, "learning_rate": 1e-05, "loss": 0.9493, "step": 78195 }, { "epoch": 69.26483613817538, "grad_norm": 0.27565550804138184, "learning_rate": 1e-05, "loss": 0.9574, "step": 78200 }, { "epoch": 69.26926483613818, "grad_norm": 0.21678772568702698, "learning_rate": 1e-05, "loss": 0.9707, "step": 78205 }, { "epoch": 69.27369353410097, "grad_norm": 0.26439160108566284, "learning_rate": 1e-05, "loss": 0.956, "step": 78210 }, { "epoch": 69.27812223206378, "grad_norm": 0.21772649884223938, "learning_rate": 1e-05, "loss": 0.9988, "step": 78215 }, { "epoch": 69.28255093002657, "grad_norm": 0.2593878209590912, "learning_rate": 1e-05, "loss": 0.9677, "step": 78220 }, { "epoch": 69.28697962798937, "grad_norm": 0.24496276676654816, "learning_rate": 1e-05, "loss": 0.9133, "step": 78225 }, { "epoch": 69.29140832595218, "grad_norm": 0.2970627248287201, "learning_rate": 1e-05, "loss": 0.9543, "step": 78230 }, { "epoch": 69.29583702391497, "grad_norm": 0.24245810508728027, "learning_rate": 1e-05, "loss": 1.0205, "step": 78235 }, { "epoch": 69.30026572187776, "grad_norm": 0.23077483475208282, "learning_rate": 1e-05, "loss": 0.997, "step": 78240 }, { "epoch": 69.30469441984057, "grad_norm": 0.27574291825294495, "learning_rate": 1e-05, "loss": 0.9344, "step": 78245 }, { "epoch": 69.30912311780337, "grad_norm": 0.2636722922325134, "learning_rate": 1e-05, "loss": 0.9762, "step": 78250 }, { "epoch": 69.31355181576616, "grad_norm": 0.21882973611354828, "learning_rate": 1e-05, "loss": 0.9648, "step": 78255 }, { "epoch": 69.31798051372897, "grad_norm": 0.27143245935440063, "learning_rate": 1e-05, "loss": 0.9733, "step": 78260 }, { "epoch": 69.32240921169176, "grad_norm": 0.2611597776412964, "learning_rate": 1e-05, "loss": 0.9589, "step": 78265 }, { "epoch": 69.32683790965456, "grad_norm": 0.264215886592865, "learning_rate": 1e-05, "loss": 0.9612, "step": 78270 }, { "epoch": 69.33126660761737, "grad_norm": 0.2768572270870209, "learning_rate": 1e-05, "loss": 0.9672, "step": 78275 }, { "epoch": 69.33569530558016, "grad_norm": 0.3088644742965698, "learning_rate": 1e-05, "loss": 0.9075, "step": 78280 }, { "epoch": 69.34012400354295, "grad_norm": 0.2964226305484772, "learning_rate": 1e-05, "loss": 0.9307, "step": 78285 }, { "epoch": 69.34455270150576, "grad_norm": 0.30500662326812744, "learning_rate": 1e-05, "loss": 0.9516, "step": 78290 }, { "epoch": 69.34898139946856, "grad_norm": 0.21796412765979767, "learning_rate": 1e-05, "loss": 0.9693, "step": 78295 }, { "epoch": 69.35341009743135, "grad_norm": 0.2305801808834076, "learning_rate": 1e-05, "loss": 0.9402, "step": 78300 }, { "epoch": 69.35783879539416, "grad_norm": 0.22625824809074402, "learning_rate": 1e-05, "loss": 0.8954, "step": 78305 }, { "epoch": 69.36226749335695, "grad_norm": 0.24406488239765167, "learning_rate": 1e-05, "loss": 1.0046, "step": 78310 }, { "epoch": 69.36669619131975, "grad_norm": 0.2312549501657486, "learning_rate": 1e-05, "loss": 0.9041, "step": 78315 }, { "epoch": 69.37112488928256, "grad_norm": 0.2332194447517395, "learning_rate": 1e-05, "loss": 1.012, "step": 78320 }, { "epoch": 69.37555358724535, "grad_norm": 0.22481530904769897, "learning_rate": 1e-05, "loss": 0.9907, "step": 78325 }, { "epoch": 69.37998228520814, "grad_norm": 0.2588733732700348, "learning_rate": 1e-05, "loss": 0.9868, "step": 78330 }, { "epoch": 69.38441098317095, "grad_norm": 0.23142071068286896, "learning_rate": 1e-05, "loss": 0.951, "step": 78335 }, { "epoch": 69.38883968113375, "grad_norm": 0.22105400264263153, "learning_rate": 1e-05, "loss": 0.963, "step": 78340 }, { "epoch": 69.39326837909654, "grad_norm": 0.22179068624973297, "learning_rate": 1e-05, "loss": 0.9304, "step": 78345 }, { "epoch": 69.39769707705935, "grad_norm": 0.24478228390216827, "learning_rate": 1e-05, "loss": 0.9859, "step": 78350 }, { "epoch": 69.40212577502214, "grad_norm": 0.22888080775737762, "learning_rate": 1e-05, "loss": 0.9384, "step": 78355 }, { "epoch": 69.40655447298494, "grad_norm": 0.2257438600063324, "learning_rate": 1e-05, "loss": 0.9244, "step": 78360 }, { "epoch": 69.41098317094774, "grad_norm": 0.21681204438209534, "learning_rate": 1e-05, "loss": 0.9298, "step": 78365 }, { "epoch": 69.41541186891054, "grad_norm": 0.21038585901260376, "learning_rate": 1e-05, "loss": 0.9589, "step": 78370 }, { "epoch": 69.41984056687333, "grad_norm": 0.21871738135814667, "learning_rate": 1e-05, "loss": 0.9602, "step": 78375 }, { "epoch": 69.42426926483614, "grad_norm": 0.18269741535186768, "learning_rate": 1e-05, "loss": 0.9225, "step": 78380 }, { "epoch": 69.42869796279894, "grad_norm": 0.26974815130233765, "learning_rate": 1e-05, "loss": 0.9314, "step": 78385 }, { "epoch": 69.43312666076173, "grad_norm": 0.24466097354888916, "learning_rate": 1e-05, "loss": 0.967, "step": 78390 }, { "epoch": 69.43755535872454, "grad_norm": 0.257714182138443, "learning_rate": 1e-05, "loss": 0.9347, "step": 78395 }, { "epoch": 69.44198405668733, "grad_norm": 0.22286882996559143, "learning_rate": 1e-05, "loss": 0.9587, "step": 78400 }, { "epoch": 69.44641275465013, "grad_norm": 0.24475909769535065, "learning_rate": 1e-05, "loss": 0.9799, "step": 78405 }, { "epoch": 69.45084145261293, "grad_norm": 0.2519271969795227, "learning_rate": 1e-05, "loss": 0.9451, "step": 78410 }, { "epoch": 69.45527015057573, "grad_norm": 0.25500184297561646, "learning_rate": 1e-05, "loss": 0.9382, "step": 78415 }, { "epoch": 69.45969884853854, "grad_norm": 0.2197030782699585, "learning_rate": 1e-05, "loss": 1.0047, "step": 78420 }, { "epoch": 69.46412754650133, "grad_norm": 0.2599322199821472, "learning_rate": 1e-05, "loss": 0.9986, "step": 78425 }, { "epoch": 69.46855624446412, "grad_norm": 0.2638357877731323, "learning_rate": 1e-05, "loss": 0.9607, "step": 78430 }, { "epoch": 69.47298494242693, "grad_norm": 0.2599092125892639, "learning_rate": 1e-05, "loss": 0.9777, "step": 78435 }, { "epoch": 69.47741364038973, "grad_norm": 0.231001615524292, "learning_rate": 1e-05, "loss": 0.9859, "step": 78440 }, { "epoch": 69.48184233835252, "grad_norm": 0.24882809817790985, "learning_rate": 1e-05, "loss": 0.9946, "step": 78445 }, { "epoch": 69.48627103631533, "grad_norm": 0.2364180088043213, "learning_rate": 1e-05, "loss": 0.9584, "step": 78450 }, { "epoch": 69.49069973427812, "grad_norm": 0.26200833916664124, "learning_rate": 1e-05, "loss": 0.9975, "step": 78455 }, { "epoch": 69.49512843224092, "grad_norm": 0.25467485189437866, "learning_rate": 1e-05, "loss": 0.944, "step": 78460 }, { "epoch": 69.49955713020373, "grad_norm": 0.2419613152742386, "learning_rate": 1e-05, "loss": 1.0285, "step": 78465 }, { "epoch": 69.50398582816652, "grad_norm": 0.2418401837348938, "learning_rate": 1e-05, "loss": 0.9393, "step": 78470 }, { "epoch": 69.50841452612931, "grad_norm": 0.27183210849761963, "learning_rate": 1e-05, "loss": 1.012, "step": 78475 }, { "epoch": 69.51284322409212, "grad_norm": 0.23429618775844574, "learning_rate": 1e-05, "loss": 0.9341, "step": 78480 }, { "epoch": 69.51727192205492, "grad_norm": 0.2655896246433258, "learning_rate": 1e-05, "loss": 0.9713, "step": 78485 }, { "epoch": 69.52170062001771, "grad_norm": 0.2690069079399109, "learning_rate": 1e-05, "loss": 1.0103, "step": 78490 }, { "epoch": 69.52612931798052, "grad_norm": 0.24193212389945984, "learning_rate": 1e-05, "loss": 0.9968, "step": 78495 }, { "epoch": 69.53055801594331, "grad_norm": 0.23649504780769348, "learning_rate": 1e-05, "loss": 0.9085, "step": 78500 }, { "epoch": 69.53498671390611, "grad_norm": 0.22794711589813232, "learning_rate": 1e-05, "loss": 0.9914, "step": 78505 }, { "epoch": 69.53941541186892, "grad_norm": 0.31951290369033813, "learning_rate": 1e-05, "loss": 0.9554, "step": 78510 }, { "epoch": 69.54384410983171, "grad_norm": 0.24057066440582275, "learning_rate": 1e-05, "loss": 1.0008, "step": 78515 }, { "epoch": 69.5482728077945, "grad_norm": 0.24476291239261627, "learning_rate": 1e-05, "loss": 1.01, "step": 78520 }, { "epoch": 69.55270150575731, "grad_norm": 0.3005260229110718, "learning_rate": 1e-05, "loss": 0.9369, "step": 78525 }, { "epoch": 69.5571302037201, "grad_norm": 0.2852076292037964, "learning_rate": 1e-05, "loss": 1.0075, "step": 78530 }, { "epoch": 69.5615589016829, "grad_norm": 0.2528403401374817, "learning_rate": 1e-05, "loss": 0.9282, "step": 78535 }, { "epoch": 69.56598759964571, "grad_norm": 0.29602670669555664, "learning_rate": 1e-05, "loss": 0.9988, "step": 78540 }, { "epoch": 69.5704162976085, "grad_norm": 0.2505547106266022, "learning_rate": 1e-05, "loss": 0.9688, "step": 78545 }, { "epoch": 69.5748449955713, "grad_norm": 0.22935326397418976, "learning_rate": 1e-05, "loss": 0.9532, "step": 78550 }, { "epoch": 69.5792736935341, "grad_norm": 0.2139900177717209, "learning_rate": 1e-05, "loss": 0.9689, "step": 78555 }, { "epoch": 69.5837023914969, "grad_norm": 0.23865588009357452, "learning_rate": 1e-05, "loss": 0.9373, "step": 78560 }, { "epoch": 69.5881310894597, "grad_norm": 0.22912459075450897, "learning_rate": 1e-05, "loss": 0.9168, "step": 78565 }, { "epoch": 69.5925597874225, "grad_norm": 0.259901762008667, "learning_rate": 1e-05, "loss": 0.9474, "step": 78570 }, { "epoch": 69.5969884853853, "grad_norm": 0.24522702395915985, "learning_rate": 1e-05, "loss": 0.9487, "step": 78575 }, { "epoch": 69.60141718334809, "grad_norm": 0.25122779607772827, "learning_rate": 1e-05, "loss": 0.9332, "step": 78580 }, { "epoch": 69.6058458813109, "grad_norm": 0.25533708930015564, "learning_rate": 1e-05, "loss": 0.9602, "step": 78585 }, { "epoch": 69.61027457927369, "grad_norm": 0.2665473520755768, "learning_rate": 1e-05, "loss": 0.9551, "step": 78590 }, { "epoch": 69.61470327723649, "grad_norm": 0.2831224799156189, "learning_rate": 1e-05, "loss": 0.9738, "step": 78595 }, { "epoch": 69.6191319751993, "grad_norm": 0.23435364663600922, "learning_rate": 1e-05, "loss": 0.9839, "step": 78600 }, { "epoch": 69.62356067316209, "grad_norm": 0.23897412419319153, "learning_rate": 1e-05, "loss": 0.9102, "step": 78605 }, { "epoch": 69.62798937112488, "grad_norm": 0.2745687663555145, "learning_rate": 1e-05, "loss": 0.9352, "step": 78610 }, { "epoch": 69.63241806908769, "grad_norm": 0.21054910123348236, "learning_rate": 1e-05, "loss": 0.9782, "step": 78615 }, { "epoch": 69.63684676705049, "grad_norm": 0.23244932293891907, "learning_rate": 1e-05, "loss": 0.9976, "step": 78620 }, { "epoch": 69.64127546501328, "grad_norm": 0.2235032021999359, "learning_rate": 1e-05, "loss": 0.9405, "step": 78625 }, { "epoch": 69.64570416297609, "grad_norm": 0.25063931941986084, "learning_rate": 1e-05, "loss": 1.0058, "step": 78630 }, { "epoch": 69.65013286093888, "grad_norm": 0.24528315663337708, "learning_rate": 1e-05, "loss": 0.9426, "step": 78635 }, { "epoch": 69.65456155890168, "grad_norm": 0.21745511889457703, "learning_rate": 1e-05, "loss": 0.963, "step": 78640 }, { "epoch": 69.65899025686448, "grad_norm": 0.24475501477718353, "learning_rate": 1e-05, "loss": 0.9557, "step": 78645 }, { "epoch": 69.66341895482728, "grad_norm": 0.22695426642894745, "learning_rate": 1e-05, "loss": 0.9538, "step": 78650 }, { "epoch": 69.66784765279007, "grad_norm": 0.25105684995651245, "learning_rate": 1e-05, "loss": 0.9878, "step": 78655 }, { "epoch": 69.67227635075288, "grad_norm": 0.23489674925804138, "learning_rate": 1e-05, "loss": 0.9636, "step": 78660 }, { "epoch": 69.67670504871568, "grad_norm": 0.212635338306427, "learning_rate": 1e-05, "loss": 0.9212, "step": 78665 }, { "epoch": 69.68113374667848, "grad_norm": 0.20544585585594177, "learning_rate": 1e-05, "loss": 0.9988, "step": 78670 }, { "epoch": 69.68556244464128, "grad_norm": 0.26581746339797974, "learning_rate": 1e-05, "loss": 0.9379, "step": 78675 }, { "epoch": 69.68999114260407, "grad_norm": 0.29279255867004395, "learning_rate": 1e-05, "loss": 0.969, "step": 78680 }, { "epoch": 69.69441984056688, "grad_norm": 0.23660996556282043, "learning_rate": 1e-05, "loss": 0.9597, "step": 78685 }, { "epoch": 69.69884853852967, "grad_norm": 0.3415919542312622, "learning_rate": 1e-05, "loss": 0.973, "step": 78690 }, { "epoch": 69.70327723649247, "grad_norm": 0.2696986794471741, "learning_rate": 1e-05, "loss": 0.939, "step": 78695 }, { "epoch": 69.70770593445528, "grad_norm": 0.2356952726840973, "learning_rate": 1e-05, "loss": 0.961, "step": 78700 }, { "epoch": 69.71213463241807, "grad_norm": 0.22103829681873322, "learning_rate": 1e-05, "loss": 0.988, "step": 78705 }, { "epoch": 69.71656333038086, "grad_norm": 0.2711811363697052, "learning_rate": 1e-05, "loss": 0.9964, "step": 78710 }, { "epoch": 69.72099202834367, "grad_norm": 0.21430125832557678, "learning_rate": 1e-05, "loss": 0.9899, "step": 78715 }, { "epoch": 69.72542072630647, "grad_norm": 0.29422467947006226, "learning_rate": 1e-05, "loss": 0.9513, "step": 78720 }, { "epoch": 69.72984942426926, "grad_norm": 0.22700706124305725, "learning_rate": 1e-05, "loss": 0.9723, "step": 78725 }, { "epoch": 69.73427812223207, "grad_norm": 0.21754463016986847, "learning_rate": 1e-05, "loss": 0.9271, "step": 78730 }, { "epoch": 69.73870682019486, "grad_norm": 0.22344212234020233, "learning_rate": 1e-05, "loss": 0.9773, "step": 78735 }, { "epoch": 69.74313551815766, "grad_norm": 0.26332640647888184, "learning_rate": 1e-05, "loss": 0.9632, "step": 78740 }, { "epoch": 69.74756421612047, "grad_norm": 0.22788086533546448, "learning_rate": 1e-05, "loss": 0.9486, "step": 78745 }, { "epoch": 69.75199291408326, "grad_norm": 0.26933398842811584, "learning_rate": 1e-05, "loss": 0.9555, "step": 78750 }, { "epoch": 69.75642161204605, "grad_norm": 0.2200969010591507, "learning_rate": 1e-05, "loss": 0.9412, "step": 78755 }, { "epoch": 69.76085031000886, "grad_norm": 0.21304844319820404, "learning_rate": 1e-05, "loss": 0.9663, "step": 78760 }, { "epoch": 69.76527900797166, "grad_norm": 0.2831621766090393, "learning_rate": 1e-05, "loss": 0.8625, "step": 78765 }, { "epoch": 69.76970770593445, "grad_norm": 0.25681835412979126, "learning_rate": 1e-05, "loss": 1.0108, "step": 78770 }, { "epoch": 69.77413640389726, "grad_norm": 0.2893185615539551, "learning_rate": 1e-05, "loss": 0.9477, "step": 78775 }, { "epoch": 69.77856510186005, "grad_norm": 0.2517160475254059, "learning_rate": 1e-05, "loss": 1.0208, "step": 78780 }, { "epoch": 69.78299379982285, "grad_norm": 0.2583816349506378, "learning_rate": 1e-05, "loss": 0.9637, "step": 78785 }, { "epoch": 69.78742249778566, "grad_norm": 0.24481059610843658, "learning_rate": 1e-05, "loss": 0.9596, "step": 78790 }, { "epoch": 69.79185119574845, "grad_norm": 0.2583755850791931, "learning_rate": 1e-05, "loss": 0.9867, "step": 78795 }, { "epoch": 69.79627989371124, "grad_norm": 0.2394489049911499, "learning_rate": 1e-05, "loss": 0.9845, "step": 78800 }, { "epoch": 69.80070859167405, "grad_norm": 0.24175888299942017, "learning_rate": 1e-05, "loss": 0.9258, "step": 78805 }, { "epoch": 69.80513728963685, "grad_norm": 0.22404834628105164, "learning_rate": 1e-05, "loss": 0.9774, "step": 78810 }, { "epoch": 69.80956598759964, "grad_norm": 0.2633533477783203, "learning_rate": 1e-05, "loss": 0.9543, "step": 78815 }, { "epoch": 69.81399468556245, "grad_norm": 0.22801053524017334, "learning_rate": 1e-05, "loss": 0.9821, "step": 78820 }, { "epoch": 69.81842338352524, "grad_norm": 0.2548644244670868, "learning_rate": 1e-05, "loss": 0.9331, "step": 78825 }, { "epoch": 69.82285208148804, "grad_norm": 0.21931657195091248, "learning_rate": 1e-05, "loss": 0.964, "step": 78830 }, { "epoch": 69.82728077945085, "grad_norm": 0.22544324398040771, "learning_rate": 1e-05, "loss": 1.0259, "step": 78835 }, { "epoch": 69.83170947741364, "grad_norm": 0.206543430685997, "learning_rate": 1e-05, "loss": 0.9903, "step": 78840 }, { "epoch": 69.83613817537643, "grad_norm": 0.26624545454978943, "learning_rate": 1e-05, "loss": 0.9997, "step": 78845 }, { "epoch": 69.84056687333924, "grad_norm": 0.23478882014751434, "learning_rate": 1e-05, "loss": 0.9489, "step": 78850 }, { "epoch": 69.84499557130204, "grad_norm": 0.26856914162635803, "learning_rate": 1e-05, "loss": 0.9754, "step": 78855 }, { "epoch": 69.84942426926483, "grad_norm": 0.25222253799438477, "learning_rate": 1e-05, "loss": 0.9393, "step": 78860 }, { "epoch": 69.85385296722764, "grad_norm": 0.23517948389053345, "learning_rate": 1e-05, "loss": 0.9104, "step": 78865 }, { "epoch": 69.85828166519043, "grad_norm": 0.2656784653663635, "learning_rate": 1e-05, "loss": 0.9951, "step": 78870 }, { "epoch": 69.86271036315323, "grad_norm": 0.21019315719604492, "learning_rate": 1e-05, "loss": 0.974, "step": 78875 }, { "epoch": 69.86713906111603, "grad_norm": 0.2421507090330124, "learning_rate": 1e-05, "loss": 0.9741, "step": 78880 }, { "epoch": 69.87156775907883, "grad_norm": 0.24652041494846344, "learning_rate": 1e-05, "loss": 0.9897, "step": 78885 }, { "epoch": 69.87599645704162, "grad_norm": 0.20786546170711517, "learning_rate": 1e-05, "loss": 0.9141, "step": 78890 }, { "epoch": 69.88042515500443, "grad_norm": 0.22649672627449036, "learning_rate": 1e-05, "loss": 0.9412, "step": 78895 }, { "epoch": 69.88485385296723, "grad_norm": 0.2741950452327728, "learning_rate": 1e-05, "loss": 0.9804, "step": 78900 }, { "epoch": 69.88928255093003, "grad_norm": 0.1955946832895279, "learning_rate": 1e-05, "loss": 0.9987, "step": 78905 }, { "epoch": 69.89371124889283, "grad_norm": 0.23754297196865082, "learning_rate": 1e-05, "loss": 0.9958, "step": 78910 }, { "epoch": 69.89813994685562, "grad_norm": 0.21939845383167267, "learning_rate": 1e-05, "loss": 0.9677, "step": 78915 }, { "epoch": 69.90256864481843, "grad_norm": 0.19615289568901062, "learning_rate": 1e-05, "loss": 0.9475, "step": 78920 }, { "epoch": 69.90699734278122, "grad_norm": 0.2401057779788971, "learning_rate": 1e-05, "loss": 0.9325, "step": 78925 }, { "epoch": 69.91142604074402, "grad_norm": 0.2540321946144104, "learning_rate": 1e-05, "loss": 0.9597, "step": 78930 }, { "epoch": 69.91585473870683, "grad_norm": 0.24981608986854553, "learning_rate": 1e-05, "loss": 1.0127, "step": 78935 }, { "epoch": 69.92028343666962, "grad_norm": 0.22702178359031677, "learning_rate": 1e-05, "loss": 0.9913, "step": 78940 }, { "epoch": 69.92471213463241, "grad_norm": 0.21843881905078888, "learning_rate": 1e-05, "loss": 0.9633, "step": 78945 }, { "epoch": 69.92914083259522, "grad_norm": 0.2285066395998001, "learning_rate": 1e-05, "loss": 0.9764, "step": 78950 }, { "epoch": 69.93356953055802, "grad_norm": 0.23991787433624268, "learning_rate": 1e-05, "loss": 1.0236, "step": 78955 }, { "epoch": 69.93799822852081, "grad_norm": 0.24507230520248413, "learning_rate": 1e-05, "loss": 0.9278, "step": 78960 }, { "epoch": 69.94242692648362, "grad_norm": 0.2080422341823578, "learning_rate": 1e-05, "loss": 0.9871, "step": 78965 }, { "epoch": 69.94685562444641, "grad_norm": 0.2088281810283661, "learning_rate": 1e-05, "loss": 0.9575, "step": 78970 }, { "epoch": 69.95128432240921, "grad_norm": 0.22768926620483398, "learning_rate": 1e-05, "loss": 0.9544, "step": 78975 }, { "epoch": 69.95571302037202, "grad_norm": 0.24241498112678528, "learning_rate": 1e-05, "loss": 0.9666, "step": 78980 }, { "epoch": 69.96014171833481, "grad_norm": 0.2541194260120392, "learning_rate": 1e-05, "loss": 0.9503, "step": 78985 }, { "epoch": 69.9645704162976, "grad_norm": 0.2567010223865509, "learning_rate": 1e-05, "loss": 0.9911, "step": 78990 }, { "epoch": 69.96899911426041, "grad_norm": 0.29952529072761536, "learning_rate": 1e-05, "loss": 0.9926, "step": 78995 }, { "epoch": 69.9734278122232, "grad_norm": 0.31167447566986084, "learning_rate": 1e-05, "loss": 0.9215, "step": 79000 }, { "epoch": 69.977856510186, "grad_norm": 0.24813435971736908, "learning_rate": 1e-05, "loss": 0.9329, "step": 79005 }, { "epoch": 69.98228520814881, "grad_norm": 0.2542593479156494, "learning_rate": 1e-05, "loss": 0.9567, "step": 79010 }, { "epoch": 69.9867139061116, "grad_norm": 0.30488407611846924, "learning_rate": 1e-05, "loss": 0.9425, "step": 79015 }, { "epoch": 69.9911426040744, "grad_norm": 0.235349640250206, "learning_rate": 1e-05, "loss": 0.9441, "step": 79020 }, { "epoch": 69.9955713020372, "grad_norm": 0.23907087743282318, "learning_rate": 1e-05, "loss": 0.9693, "step": 79025 }, { "epoch": 70.0, "grad_norm": 0.19082893431186676, "learning_rate": 1e-05, "loss": 0.9489, "step": 79030 }, { "epoch": 70.0044286979628, "grad_norm": 0.2625238299369812, "learning_rate": 1e-05, "loss": 0.9987, "step": 79035 }, { "epoch": 70.0088573959256, "grad_norm": 0.24825170636177063, "learning_rate": 1e-05, "loss": 1.0028, "step": 79040 }, { "epoch": 70.0132860938884, "grad_norm": 0.2068416178226471, "learning_rate": 1e-05, "loss": 0.9178, "step": 79045 }, { "epoch": 70.01771479185119, "grad_norm": 0.23527489602565765, "learning_rate": 1e-05, "loss": 0.9984, "step": 79050 }, { "epoch": 70.022143489814, "grad_norm": 0.22223863005638123, "learning_rate": 1e-05, "loss": 0.982, "step": 79055 }, { "epoch": 70.0265721877768, "grad_norm": 0.23251789808273315, "learning_rate": 1e-05, "loss": 0.9357, "step": 79060 }, { "epoch": 70.03100088573959, "grad_norm": 0.1982160061597824, "learning_rate": 1e-05, "loss": 0.9865, "step": 79065 }, { "epoch": 70.0354295837024, "grad_norm": 0.22085952758789062, "learning_rate": 1e-05, "loss": 0.9306, "step": 79070 }, { "epoch": 70.03985828166519, "grad_norm": 0.23387576639652252, "learning_rate": 1e-05, "loss": 0.9761, "step": 79075 }, { "epoch": 70.04428697962798, "grad_norm": 0.22022143006324768, "learning_rate": 1e-05, "loss": 0.922, "step": 79080 }, { "epoch": 70.04871567759079, "grad_norm": 0.2559187710285187, "learning_rate": 1e-05, "loss": 0.9254, "step": 79085 }, { "epoch": 70.05314437555359, "grad_norm": 0.2740534245967865, "learning_rate": 1e-05, "loss": 0.9733, "step": 79090 }, { "epoch": 70.05757307351638, "grad_norm": 0.22527728974819183, "learning_rate": 1e-05, "loss": 0.9575, "step": 79095 }, { "epoch": 70.06200177147919, "grad_norm": 0.22429199516773224, "learning_rate": 1e-05, "loss": 0.9412, "step": 79100 }, { "epoch": 70.06643046944198, "grad_norm": 0.24807880818843842, "learning_rate": 1e-05, "loss": 0.9732, "step": 79105 }, { "epoch": 70.07085916740478, "grad_norm": 0.28592634201049805, "learning_rate": 1e-05, "loss": 0.9966, "step": 79110 }, { "epoch": 70.07528786536759, "grad_norm": 0.224885493516922, "learning_rate": 1e-05, "loss": 0.9561, "step": 79115 }, { "epoch": 70.07971656333038, "grad_norm": 0.2100324183702469, "learning_rate": 1e-05, "loss": 0.9563, "step": 79120 }, { "epoch": 70.08414526129317, "grad_norm": 0.2536398470401764, "learning_rate": 1e-05, "loss": 0.9479, "step": 79125 }, { "epoch": 70.08857395925598, "grad_norm": 0.20576679706573486, "learning_rate": 1e-05, "loss": 0.9785, "step": 79130 }, { "epoch": 70.09300265721878, "grad_norm": 0.23106379806995392, "learning_rate": 1e-05, "loss": 1.0039, "step": 79135 }, { "epoch": 70.09743135518157, "grad_norm": 0.2338072806596756, "learning_rate": 1e-05, "loss": 0.9821, "step": 79140 }, { "epoch": 70.10186005314438, "grad_norm": 0.27735385298728943, "learning_rate": 1e-05, "loss": 0.9828, "step": 79145 }, { "epoch": 70.10628875110717, "grad_norm": 0.21151185035705566, "learning_rate": 1e-05, "loss": 0.972, "step": 79150 }, { "epoch": 70.11071744906998, "grad_norm": 0.2400214970111847, "learning_rate": 1e-05, "loss": 1.0037, "step": 79155 }, { "epoch": 70.11514614703277, "grad_norm": 0.2658084034919739, "learning_rate": 1e-05, "loss": 0.9446, "step": 79160 }, { "epoch": 70.11957484499557, "grad_norm": 0.22883708775043488, "learning_rate": 1e-05, "loss": 1.0035, "step": 79165 }, { "epoch": 70.12400354295838, "grad_norm": 0.2803514301776886, "learning_rate": 1e-05, "loss": 0.912, "step": 79170 }, { "epoch": 70.12843224092117, "grad_norm": 0.2579149007797241, "learning_rate": 1e-05, "loss": 1.0282, "step": 79175 }, { "epoch": 70.13286093888397, "grad_norm": 0.24845117330551147, "learning_rate": 1e-05, "loss": 0.976, "step": 79180 }, { "epoch": 70.13728963684677, "grad_norm": 0.2834876775741577, "learning_rate": 1e-05, "loss": 0.9688, "step": 79185 }, { "epoch": 70.14171833480957, "grad_norm": 0.2607656717300415, "learning_rate": 1e-05, "loss": 0.9649, "step": 79190 }, { "epoch": 70.14614703277236, "grad_norm": 0.2543274462223053, "learning_rate": 1e-05, "loss": 0.9521, "step": 79195 }, { "epoch": 70.15057573073517, "grad_norm": 0.27856314182281494, "learning_rate": 1e-05, "loss": 0.978, "step": 79200 }, { "epoch": 70.15500442869796, "grad_norm": 0.2586901783943176, "learning_rate": 1e-05, "loss": 1.0232, "step": 79205 }, { "epoch": 70.15943312666076, "grad_norm": 0.24000760912895203, "learning_rate": 1e-05, "loss": 1.0072, "step": 79210 }, { "epoch": 70.16386182462357, "grad_norm": 0.26503807306289673, "learning_rate": 1e-05, "loss": 0.9321, "step": 79215 }, { "epoch": 70.16829052258636, "grad_norm": 0.23649385571479797, "learning_rate": 1e-05, "loss": 0.985, "step": 79220 }, { "epoch": 70.17271922054915, "grad_norm": 0.25321412086486816, "learning_rate": 1e-05, "loss": 0.9864, "step": 79225 }, { "epoch": 70.17714791851196, "grad_norm": 0.2565304636955261, "learning_rate": 1e-05, "loss": 0.9139, "step": 79230 }, { "epoch": 70.18157661647476, "grad_norm": 0.23995286226272583, "learning_rate": 1e-05, "loss": 0.9327, "step": 79235 }, { "epoch": 70.18600531443755, "grad_norm": 0.24533501267433167, "learning_rate": 1e-05, "loss": 0.9637, "step": 79240 }, { "epoch": 70.19043401240036, "grad_norm": 0.23964214324951172, "learning_rate": 1e-05, "loss": 0.9919, "step": 79245 }, { "epoch": 70.19486271036315, "grad_norm": 0.2198006808757782, "learning_rate": 1e-05, "loss": 0.9289, "step": 79250 }, { "epoch": 70.19929140832595, "grad_norm": 0.26828646659851074, "learning_rate": 1e-05, "loss": 1.014, "step": 79255 }, { "epoch": 70.20372010628876, "grad_norm": 0.24599570035934448, "learning_rate": 1e-05, "loss": 0.9702, "step": 79260 }, { "epoch": 70.20814880425155, "grad_norm": 0.27050408720970154, "learning_rate": 1e-05, "loss": 0.9382, "step": 79265 }, { "epoch": 70.21257750221434, "grad_norm": 0.23714813590049744, "learning_rate": 1e-05, "loss": 0.9509, "step": 79270 }, { "epoch": 70.21700620017715, "grad_norm": 0.21290239691734314, "learning_rate": 1e-05, "loss": 0.969, "step": 79275 }, { "epoch": 70.22143489813995, "grad_norm": 0.2118254005908966, "learning_rate": 1e-05, "loss": 0.9847, "step": 79280 }, { "epoch": 70.22586359610274, "grad_norm": 0.26099902391433716, "learning_rate": 1e-05, "loss": 0.9316, "step": 79285 }, { "epoch": 70.23029229406555, "grad_norm": 0.23516890406608582, "learning_rate": 1e-05, "loss": 0.9807, "step": 79290 }, { "epoch": 70.23472099202834, "grad_norm": 0.27695444226264954, "learning_rate": 1e-05, "loss": 0.9767, "step": 79295 }, { "epoch": 70.23914968999114, "grad_norm": 0.22662830352783203, "learning_rate": 1e-05, "loss": 0.9848, "step": 79300 }, { "epoch": 70.24357838795395, "grad_norm": 0.2804628014564514, "learning_rate": 1e-05, "loss": 0.9252, "step": 79305 }, { "epoch": 70.24800708591674, "grad_norm": 0.27573657035827637, "learning_rate": 1e-05, "loss": 1.0104, "step": 79310 }, { "epoch": 70.25243578387953, "grad_norm": 0.2663476765155792, "learning_rate": 1e-05, "loss": 1.0176, "step": 79315 }, { "epoch": 70.25686448184234, "grad_norm": 0.22174453735351562, "learning_rate": 1e-05, "loss": 0.9285, "step": 79320 }, { "epoch": 70.26129317980514, "grad_norm": 0.2670125961303711, "learning_rate": 1e-05, "loss": 0.9692, "step": 79325 }, { "epoch": 70.26572187776793, "grad_norm": 0.2670302093029022, "learning_rate": 1e-05, "loss": 0.981, "step": 79330 }, { "epoch": 70.27015057573074, "grad_norm": 0.27516040205955505, "learning_rate": 1e-05, "loss": 0.9466, "step": 79335 }, { "epoch": 70.27457927369353, "grad_norm": 0.24188938736915588, "learning_rate": 1e-05, "loss": 0.9612, "step": 79340 }, { "epoch": 70.27900797165633, "grad_norm": 0.2529669404029846, "learning_rate": 1e-05, "loss": 0.9737, "step": 79345 }, { "epoch": 70.28343666961914, "grad_norm": 0.2787066400051117, "learning_rate": 1e-05, "loss": 0.9635, "step": 79350 }, { "epoch": 70.28786536758193, "grad_norm": 0.2558532655239105, "learning_rate": 1e-05, "loss": 0.9796, "step": 79355 }, { "epoch": 70.29229406554472, "grad_norm": 0.24675633013248444, "learning_rate": 1e-05, "loss": 0.9587, "step": 79360 }, { "epoch": 70.29672276350753, "grad_norm": 0.23144817352294922, "learning_rate": 1e-05, "loss": 1.0042, "step": 79365 }, { "epoch": 70.30115146147033, "grad_norm": 0.25335338711738586, "learning_rate": 1e-05, "loss": 0.9786, "step": 79370 }, { "epoch": 70.30558015943312, "grad_norm": 0.24836066365242004, "learning_rate": 1e-05, "loss": 0.9655, "step": 79375 }, { "epoch": 70.31000885739593, "grad_norm": 0.24997423589229584, "learning_rate": 1e-05, "loss": 0.9656, "step": 79380 }, { "epoch": 70.31443755535872, "grad_norm": 0.24430014193058014, "learning_rate": 1e-05, "loss": 0.9731, "step": 79385 }, { "epoch": 70.31886625332152, "grad_norm": 0.2486829161643982, "learning_rate": 1e-05, "loss": 0.8792, "step": 79390 }, { "epoch": 70.32329495128432, "grad_norm": 0.2654065489768982, "learning_rate": 1e-05, "loss": 0.9627, "step": 79395 }, { "epoch": 70.32772364924712, "grad_norm": 0.19608627259731293, "learning_rate": 1e-05, "loss": 0.9505, "step": 79400 }, { "epoch": 70.33215234720993, "grad_norm": 0.20926392078399658, "learning_rate": 1e-05, "loss": 1.0012, "step": 79405 }, { "epoch": 70.33658104517272, "grad_norm": 0.24228957295417786, "learning_rate": 1e-05, "loss": 0.9579, "step": 79410 }, { "epoch": 70.34100974313552, "grad_norm": 0.23176786303520203, "learning_rate": 1e-05, "loss": 0.9264, "step": 79415 }, { "epoch": 70.34543844109832, "grad_norm": 0.26702576875686646, "learning_rate": 1e-05, "loss": 1.0175, "step": 79420 }, { "epoch": 70.34986713906112, "grad_norm": 0.2655392289161682, "learning_rate": 1e-05, "loss": 0.9358, "step": 79425 }, { "epoch": 70.35429583702391, "grad_norm": 0.22481006383895874, "learning_rate": 1e-05, "loss": 0.923, "step": 79430 }, { "epoch": 70.35872453498672, "grad_norm": 0.23333734273910522, "learning_rate": 1e-05, "loss": 0.965, "step": 79435 }, { "epoch": 70.36315323294951, "grad_norm": 0.23280106484889984, "learning_rate": 1e-05, "loss": 0.955, "step": 79440 }, { "epoch": 70.36758193091231, "grad_norm": 0.223131000995636, "learning_rate": 1e-05, "loss": 0.9337, "step": 79445 }, { "epoch": 70.37201062887512, "grad_norm": 0.25419679284095764, "learning_rate": 1e-05, "loss": 0.9451, "step": 79450 }, { "epoch": 70.37643932683791, "grad_norm": 0.2271285355091095, "learning_rate": 1e-05, "loss": 0.9908, "step": 79455 }, { "epoch": 70.3808680248007, "grad_norm": 0.27452829480171204, "learning_rate": 1e-05, "loss": 1.0252, "step": 79460 }, { "epoch": 70.38529672276351, "grad_norm": 0.22552655637264252, "learning_rate": 1e-05, "loss": 1.021, "step": 79465 }, { "epoch": 70.38972542072631, "grad_norm": 0.28763824701309204, "learning_rate": 1e-05, "loss": 0.9382, "step": 79470 }, { "epoch": 70.3941541186891, "grad_norm": 0.2525554597377777, "learning_rate": 1e-05, "loss": 0.9455, "step": 79475 }, { "epoch": 70.39858281665191, "grad_norm": 0.2524948716163635, "learning_rate": 1e-05, "loss": 0.9693, "step": 79480 }, { "epoch": 70.4030115146147, "grad_norm": 0.21762143075466156, "learning_rate": 1e-05, "loss": 0.9394, "step": 79485 }, { "epoch": 70.4074402125775, "grad_norm": 0.24756409227848053, "learning_rate": 1e-05, "loss": 0.9533, "step": 79490 }, { "epoch": 70.4118689105403, "grad_norm": 0.22603274881839752, "learning_rate": 1e-05, "loss": 0.9665, "step": 79495 }, { "epoch": 70.4162976085031, "grad_norm": 0.2580925226211548, "learning_rate": 1e-05, "loss": 1.0077, "step": 79500 }, { "epoch": 70.4207263064659, "grad_norm": 0.26012122631073, "learning_rate": 1e-05, "loss": 0.9299, "step": 79505 }, { "epoch": 70.4251550044287, "grad_norm": 0.21958008408546448, "learning_rate": 1e-05, "loss": 0.896, "step": 79510 }, { "epoch": 70.4295837023915, "grad_norm": 0.23766876757144928, "learning_rate": 1e-05, "loss": 1.013, "step": 79515 }, { "epoch": 70.43401240035429, "grad_norm": 0.24751250445842743, "learning_rate": 1e-05, "loss": 0.976, "step": 79520 }, { "epoch": 70.4384410983171, "grad_norm": 0.2513173520565033, "learning_rate": 1e-05, "loss": 0.9647, "step": 79525 }, { "epoch": 70.4428697962799, "grad_norm": 0.20737197995185852, "learning_rate": 1e-05, "loss": 0.9898, "step": 79530 }, { "epoch": 70.44729849424269, "grad_norm": 0.2462579905986786, "learning_rate": 1e-05, "loss": 1.0375, "step": 79535 }, { "epoch": 70.4517271922055, "grad_norm": 0.2730797231197357, "learning_rate": 1e-05, "loss": 0.9333, "step": 79540 }, { "epoch": 70.45615589016829, "grad_norm": 0.20168955624103546, "learning_rate": 1e-05, "loss": 0.9603, "step": 79545 }, { "epoch": 70.46058458813108, "grad_norm": 0.22944588959217072, "learning_rate": 1e-05, "loss": 0.962, "step": 79550 }, { "epoch": 70.46501328609389, "grad_norm": 0.24466437101364136, "learning_rate": 1e-05, "loss": 0.9791, "step": 79555 }, { "epoch": 70.46944198405669, "grad_norm": 0.22957712411880493, "learning_rate": 1e-05, "loss": 0.9382, "step": 79560 }, { "epoch": 70.47387068201948, "grad_norm": 0.2648143470287323, "learning_rate": 1e-05, "loss": 1.0275, "step": 79565 }, { "epoch": 70.47829937998229, "grad_norm": 0.2452268898487091, "learning_rate": 1e-05, "loss": 0.9769, "step": 79570 }, { "epoch": 70.48272807794508, "grad_norm": 0.230984166264534, "learning_rate": 1e-05, "loss": 0.9842, "step": 79575 }, { "epoch": 70.48715677590788, "grad_norm": 0.2508372962474823, "learning_rate": 1e-05, "loss": 0.9814, "step": 79580 }, { "epoch": 70.49158547387069, "grad_norm": 0.2418755739927292, "learning_rate": 1e-05, "loss": 0.9566, "step": 79585 }, { "epoch": 70.49601417183348, "grad_norm": 0.20538198947906494, "learning_rate": 1e-05, "loss": 1.01, "step": 79590 }, { "epoch": 70.50044286979627, "grad_norm": 0.22137463092803955, "learning_rate": 1e-05, "loss": 0.944, "step": 79595 }, { "epoch": 70.50487156775908, "grad_norm": 0.22290237247943878, "learning_rate": 1e-05, "loss": 0.9368, "step": 79600 }, { "epoch": 70.50930026572188, "grad_norm": 0.23100435733795166, "learning_rate": 1e-05, "loss": 1.0192, "step": 79605 }, { "epoch": 70.51372896368467, "grad_norm": 0.26087355613708496, "learning_rate": 1e-05, "loss": 0.9215, "step": 79610 }, { "epoch": 70.51815766164748, "grad_norm": 0.22224442660808563, "learning_rate": 1e-05, "loss": 0.9433, "step": 79615 }, { "epoch": 70.52258635961027, "grad_norm": 0.2380107194185257, "learning_rate": 1e-05, "loss": 0.9559, "step": 79620 }, { "epoch": 70.52701505757307, "grad_norm": 0.24505631625652313, "learning_rate": 1e-05, "loss": 0.9578, "step": 79625 }, { "epoch": 70.53144375553588, "grad_norm": 0.22856011986732483, "learning_rate": 1e-05, "loss": 0.9813, "step": 79630 }, { "epoch": 70.53587245349867, "grad_norm": 0.24574953317642212, "learning_rate": 1e-05, "loss": 0.9705, "step": 79635 }, { "epoch": 70.54030115146146, "grad_norm": 0.22716130316257477, "learning_rate": 1e-05, "loss": 0.9332, "step": 79640 }, { "epoch": 70.54472984942427, "grad_norm": 0.20567893981933594, "learning_rate": 1e-05, "loss": 0.9772, "step": 79645 }, { "epoch": 70.54915854738707, "grad_norm": 0.23963038623332977, "learning_rate": 1e-05, "loss": 0.976, "step": 79650 }, { "epoch": 70.55358724534987, "grad_norm": 0.23295466601848602, "learning_rate": 1e-05, "loss": 0.9425, "step": 79655 }, { "epoch": 70.55801594331267, "grad_norm": 0.26455336809158325, "learning_rate": 1e-05, "loss": 0.9873, "step": 79660 }, { "epoch": 70.56244464127546, "grad_norm": 0.2628478705883026, "learning_rate": 1e-05, "loss": 0.9389, "step": 79665 }, { "epoch": 70.56687333923827, "grad_norm": 0.22359447181224823, "learning_rate": 1e-05, "loss": 0.9721, "step": 79670 }, { "epoch": 70.57130203720106, "grad_norm": 0.239747554063797, "learning_rate": 1e-05, "loss": 0.9899, "step": 79675 }, { "epoch": 70.57573073516386, "grad_norm": 0.19292421638965607, "learning_rate": 1e-05, "loss": 1.0173, "step": 79680 }, { "epoch": 70.58015943312667, "grad_norm": 0.24212707579135895, "learning_rate": 1e-05, "loss": 0.9685, "step": 79685 }, { "epoch": 70.58458813108946, "grad_norm": 0.23006929457187653, "learning_rate": 1e-05, "loss": 0.991, "step": 79690 }, { "epoch": 70.58901682905226, "grad_norm": 0.32561323046684265, "learning_rate": 1e-05, "loss": 0.9477, "step": 79695 }, { "epoch": 70.59344552701506, "grad_norm": 0.22944635152816772, "learning_rate": 1e-05, "loss": 1.021, "step": 79700 }, { "epoch": 70.59787422497786, "grad_norm": 0.2449757158756256, "learning_rate": 1e-05, "loss": 1.0047, "step": 79705 }, { "epoch": 70.60230292294065, "grad_norm": 0.21088825166225433, "learning_rate": 1e-05, "loss": 0.9834, "step": 79710 }, { "epoch": 70.60673162090346, "grad_norm": 0.2369798719882965, "learning_rate": 1e-05, "loss": 0.9662, "step": 79715 }, { "epoch": 70.61116031886625, "grad_norm": 0.24787916243076324, "learning_rate": 1e-05, "loss": 0.9234, "step": 79720 }, { "epoch": 70.61558901682905, "grad_norm": 0.26056164503097534, "learning_rate": 1e-05, "loss": 0.9568, "step": 79725 }, { "epoch": 70.62001771479186, "grad_norm": 0.20133395493030548, "learning_rate": 1e-05, "loss": 0.9767, "step": 79730 }, { "epoch": 70.62444641275465, "grad_norm": 0.24059037864208221, "learning_rate": 1e-05, "loss": 0.9633, "step": 79735 }, { "epoch": 70.62887511071744, "grad_norm": 0.2369968444108963, "learning_rate": 1e-05, "loss": 0.9681, "step": 79740 }, { "epoch": 70.63330380868025, "grad_norm": 0.21638371050357819, "learning_rate": 1e-05, "loss": 0.9445, "step": 79745 }, { "epoch": 70.63773250664305, "grad_norm": 0.19638800621032715, "learning_rate": 1e-05, "loss": 0.9922, "step": 79750 }, { "epoch": 70.64216120460584, "grad_norm": 0.2876845896244049, "learning_rate": 1e-05, "loss": 0.9637, "step": 79755 }, { "epoch": 70.64658990256865, "grad_norm": 0.25584158301353455, "learning_rate": 1e-05, "loss": 0.9856, "step": 79760 }, { "epoch": 70.65101860053144, "grad_norm": 0.24557648599147797, "learning_rate": 1e-05, "loss": 1.0226, "step": 79765 }, { "epoch": 70.65544729849424, "grad_norm": 0.2286657691001892, "learning_rate": 1e-05, "loss": 0.9982, "step": 79770 }, { "epoch": 70.65987599645705, "grad_norm": 0.23045624792575836, "learning_rate": 1e-05, "loss": 0.9728, "step": 79775 }, { "epoch": 70.66430469441984, "grad_norm": 0.2590855360031128, "learning_rate": 1e-05, "loss": 0.9764, "step": 79780 }, { "epoch": 70.66873339238263, "grad_norm": 0.22924913465976715, "learning_rate": 1e-05, "loss": 0.9816, "step": 79785 }, { "epoch": 70.67316209034544, "grad_norm": 0.26343294978141785, "learning_rate": 1e-05, "loss": 1.021, "step": 79790 }, { "epoch": 70.67759078830824, "grad_norm": 0.23316098749637604, "learning_rate": 1e-05, "loss": 0.9309, "step": 79795 }, { "epoch": 70.68201948627103, "grad_norm": 0.2377476692199707, "learning_rate": 1e-05, "loss": 0.9381, "step": 79800 }, { "epoch": 70.68644818423384, "grad_norm": 0.2573939859867096, "learning_rate": 1e-05, "loss": 0.956, "step": 79805 }, { "epoch": 70.69087688219663, "grad_norm": 0.243405282497406, "learning_rate": 1e-05, "loss": 0.9669, "step": 79810 }, { "epoch": 70.69530558015943, "grad_norm": 0.2553041875362396, "learning_rate": 1e-05, "loss": 0.8978, "step": 79815 }, { "epoch": 70.69973427812224, "grad_norm": 0.24558883905410767, "learning_rate": 1e-05, "loss": 0.9881, "step": 79820 }, { "epoch": 70.70416297608503, "grad_norm": 0.21875348687171936, "learning_rate": 1e-05, "loss": 0.9415, "step": 79825 }, { "epoch": 70.70859167404782, "grad_norm": 0.23658457398414612, "learning_rate": 1e-05, "loss": 0.9589, "step": 79830 }, { "epoch": 70.71302037201063, "grad_norm": 0.22629718482494354, "learning_rate": 1e-05, "loss": 0.9975, "step": 79835 }, { "epoch": 70.71744906997343, "grad_norm": 0.25328344106674194, "learning_rate": 1e-05, "loss": 0.9616, "step": 79840 }, { "epoch": 70.72187776793622, "grad_norm": 0.22597786784172058, "learning_rate": 1e-05, "loss": 0.9637, "step": 79845 }, { "epoch": 70.72630646589903, "grad_norm": 0.23131953179836273, "learning_rate": 1e-05, "loss": 0.9646, "step": 79850 }, { "epoch": 70.73073516386182, "grad_norm": 0.2347148358821869, "learning_rate": 1e-05, "loss": 0.9949, "step": 79855 }, { "epoch": 70.73516386182462, "grad_norm": 0.23361410200595856, "learning_rate": 1e-05, "loss": 0.9001, "step": 79860 }, { "epoch": 70.73959255978743, "grad_norm": 0.2147773951292038, "learning_rate": 1e-05, "loss": 0.9139, "step": 79865 }, { "epoch": 70.74402125775022, "grad_norm": 0.22081850469112396, "learning_rate": 1e-05, "loss": 0.9958, "step": 79870 }, { "epoch": 70.74844995571301, "grad_norm": 0.21614603698253632, "learning_rate": 1e-05, "loss": 0.9653, "step": 79875 }, { "epoch": 70.75287865367582, "grad_norm": 0.22959750890731812, "learning_rate": 1e-05, "loss": 0.9533, "step": 79880 }, { "epoch": 70.75730735163862, "grad_norm": 0.23026128113269806, "learning_rate": 1e-05, "loss": 0.935, "step": 79885 }, { "epoch": 70.76173604960141, "grad_norm": 0.24133040010929108, "learning_rate": 1e-05, "loss": 0.9561, "step": 79890 }, { "epoch": 70.76616474756422, "grad_norm": 0.20410820841789246, "learning_rate": 1e-05, "loss": 0.9986, "step": 79895 }, { "epoch": 70.77059344552701, "grad_norm": 0.25879955291748047, "learning_rate": 1e-05, "loss": 0.981, "step": 79900 }, { "epoch": 70.77502214348982, "grad_norm": 0.23236370086669922, "learning_rate": 1e-05, "loss": 0.9558, "step": 79905 }, { "epoch": 70.77945084145261, "grad_norm": 0.269452840089798, "learning_rate": 1e-05, "loss": 0.9737, "step": 79910 }, { "epoch": 70.78387953941541, "grad_norm": 0.22299674153327942, "learning_rate": 1e-05, "loss": 0.9214, "step": 79915 }, { "epoch": 70.78830823737822, "grad_norm": 0.24492210149765015, "learning_rate": 1e-05, "loss": 0.9928, "step": 79920 }, { "epoch": 70.79273693534101, "grad_norm": 0.2115638256072998, "learning_rate": 1e-05, "loss": 0.9822, "step": 79925 }, { "epoch": 70.7971656333038, "grad_norm": 0.25674957036972046, "learning_rate": 1e-05, "loss": 0.9739, "step": 79930 }, { "epoch": 70.80159433126661, "grad_norm": 0.23233728110790253, "learning_rate": 1e-05, "loss": 0.9394, "step": 79935 }, { "epoch": 70.80602302922941, "grad_norm": 0.22099140286445618, "learning_rate": 1e-05, "loss": 0.9689, "step": 79940 }, { "epoch": 70.8104517271922, "grad_norm": 0.23403653502464294, "learning_rate": 1e-05, "loss": 0.9466, "step": 79945 }, { "epoch": 70.81488042515501, "grad_norm": 0.22350962460041046, "learning_rate": 1e-05, "loss": 0.9678, "step": 79950 }, { "epoch": 70.8193091231178, "grad_norm": 0.24749670922756195, "learning_rate": 1e-05, "loss": 0.9609, "step": 79955 }, { "epoch": 70.8237378210806, "grad_norm": 0.2343074530363083, "learning_rate": 1e-05, "loss": 0.9741, "step": 79960 }, { "epoch": 70.8281665190434, "grad_norm": 0.2226838916540146, "learning_rate": 1e-05, "loss": 0.9755, "step": 79965 }, { "epoch": 70.8325952170062, "grad_norm": 0.34486261010169983, "learning_rate": 1e-05, "loss": 0.9782, "step": 79970 }, { "epoch": 70.837023914969, "grad_norm": 0.25514116883277893, "learning_rate": 1e-05, "loss": 0.9294, "step": 79975 }, { "epoch": 70.8414526129318, "grad_norm": 0.2079382687807083, "learning_rate": 1e-05, "loss": 0.943, "step": 79980 }, { "epoch": 70.8458813108946, "grad_norm": 0.24633552134037018, "learning_rate": 1e-05, "loss": 1.0244, "step": 79985 }, { "epoch": 70.85031000885739, "grad_norm": 0.21729297935962677, "learning_rate": 1e-05, "loss": 0.9447, "step": 79990 }, { "epoch": 70.8547387068202, "grad_norm": 0.21760140359401703, "learning_rate": 1e-05, "loss": 1.0192, "step": 79995 }, { "epoch": 70.859167404783, "grad_norm": 0.2608988285064697, "learning_rate": 1e-05, "loss": 1.0259, "step": 80000 }, { "epoch": 70.86359610274579, "grad_norm": 0.2253909558057785, "learning_rate": 1e-05, "loss": 0.9326, "step": 80005 }, { "epoch": 70.8680248007086, "grad_norm": 0.2454213798046112, "learning_rate": 1e-05, "loss": 0.9389, "step": 80010 }, { "epoch": 70.87245349867139, "grad_norm": 0.235086590051651, "learning_rate": 1e-05, "loss": 0.9683, "step": 80015 }, { "epoch": 70.87688219663418, "grad_norm": 0.23786352574825287, "learning_rate": 1e-05, "loss": 1.0164, "step": 80020 }, { "epoch": 70.881310894597, "grad_norm": 0.21125487983226776, "learning_rate": 1e-05, "loss": 0.8755, "step": 80025 }, { "epoch": 70.88573959255979, "grad_norm": 0.23355448246002197, "learning_rate": 1e-05, "loss": 0.9948, "step": 80030 }, { "epoch": 70.89016829052258, "grad_norm": 0.24758510291576385, "learning_rate": 1e-05, "loss": 0.9894, "step": 80035 }, { "epoch": 70.89459698848539, "grad_norm": 0.2557387351989746, "learning_rate": 1e-05, "loss": 0.952, "step": 80040 }, { "epoch": 70.89902568644818, "grad_norm": 0.24200406670570374, "learning_rate": 1e-05, "loss": 0.9837, "step": 80045 }, { "epoch": 70.90345438441098, "grad_norm": 0.2286711484193802, "learning_rate": 1e-05, "loss": 0.9843, "step": 80050 }, { "epoch": 70.90788308237379, "grad_norm": 0.2260790765285492, "learning_rate": 1e-05, "loss": 0.9189, "step": 80055 }, { "epoch": 70.91231178033658, "grad_norm": 0.23960740864276886, "learning_rate": 1e-05, "loss": 0.9645, "step": 80060 }, { "epoch": 70.91674047829937, "grad_norm": 0.2657438814640045, "learning_rate": 1e-05, "loss": 0.9813, "step": 80065 }, { "epoch": 70.92116917626218, "grad_norm": 0.2587682604789734, "learning_rate": 1e-05, "loss": 0.9889, "step": 80070 }, { "epoch": 70.92559787422498, "grad_norm": 0.22517229616641998, "learning_rate": 1e-05, "loss": 0.9592, "step": 80075 }, { "epoch": 70.93002657218777, "grad_norm": 0.29738885164260864, "learning_rate": 1e-05, "loss": 0.9492, "step": 80080 }, { "epoch": 70.93445527015058, "grad_norm": 0.2340104728937149, "learning_rate": 1e-05, "loss": 0.994, "step": 80085 }, { "epoch": 70.93888396811337, "grad_norm": 0.2501141428947449, "learning_rate": 1e-05, "loss": 0.9503, "step": 80090 }, { "epoch": 70.94331266607617, "grad_norm": 0.22814619541168213, "learning_rate": 1e-05, "loss": 0.9623, "step": 80095 }, { "epoch": 70.94774136403898, "grad_norm": 0.27238234877586365, "learning_rate": 1e-05, "loss": 0.9746, "step": 80100 }, { "epoch": 70.95217006200177, "grad_norm": 0.2445191890001297, "learning_rate": 1e-05, "loss": 0.9284, "step": 80105 }, { "epoch": 70.95659875996456, "grad_norm": 0.23224687576293945, "learning_rate": 1e-05, "loss": 0.998, "step": 80110 }, { "epoch": 70.96102745792737, "grad_norm": 0.23445631563663483, "learning_rate": 1e-05, "loss": 0.9438, "step": 80115 }, { "epoch": 70.96545615589017, "grad_norm": 0.26628297567367554, "learning_rate": 1e-05, "loss": 1.028, "step": 80120 }, { "epoch": 70.96988485385296, "grad_norm": 0.3078608512878418, "learning_rate": 1e-05, "loss": 0.9725, "step": 80125 }, { "epoch": 70.97431355181577, "grad_norm": 0.2374720424413681, "learning_rate": 1e-05, "loss": 0.964, "step": 80130 }, { "epoch": 70.97874224977856, "grad_norm": 0.31722986698150635, "learning_rate": 1e-05, "loss": 0.9739, "step": 80135 }, { "epoch": 70.98317094774137, "grad_norm": 0.2344239056110382, "learning_rate": 1e-05, "loss": 0.9608, "step": 80140 }, { "epoch": 70.98759964570417, "grad_norm": 0.2642884850502014, "learning_rate": 1e-05, "loss": 0.9302, "step": 80145 }, { "epoch": 70.99202834366696, "grad_norm": 0.2550107538700104, "learning_rate": 1e-05, "loss": 1.0004, "step": 80150 }, { "epoch": 70.99645704162977, "grad_norm": 0.2752811014652252, "learning_rate": 1e-05, "loss": 0.9788, "step": 80155 }, { "epoch": 71.00088573959256, "grad_norm": 0.2818542420864105, "learning_rate": 1e-05, "loss": 0.9414, "step": 80160 }, { "epoch": 71.00531443755536, "grad_norm": 0.2628529369831085, "learning_rate": 1e-05, "loss": 1.0139, "step": 80165 }, { "epoch": 71.00974313551816, "grad_norm": 0.2648138999938965, "learning_rate": 1e-05, "loss": 0.9789, "step": 80170 }, { "epoch": 71.01417183348096, "grad_norm": 0.21259281039237976, "learning_rate": 1e-05, "loss": 0.9276, "step": 80175 }, { "epoch": 71.01860053144375, "grad_norm": 0.239787757396698, "learning_rate": 1e-05, "loss": 0.9844, "step": 80180 }, { "epoch": 71.02302922940656, "grad_norm": 0.24108575284481049, "learning_rate": 1e-05, "loss": 0.9912, "step": 80185 }, { "epoch": 71.02745792736935, "grad_norm": 0.2325608879327774, "learning_rate": 1e-05, "loss": 0.934, "step": 80190 }, { "epoch": 71.03188662533215, "grad_norm": 0.26507678627967834, "learning_rate": 1e-05, "loss": 0.9217, "step": 80195 }, { "epoch": 71.03631532329496, "grad_norm": 0.23155640065670013, "learning_rate": 1e-05, "loss": 0.9403, "step": 80200 }, { "epoch": 71.04074402125775, "grad_norm": 0.221502885222435, "learning_rate": 1e-05, "loss": 0.9931, "step": 80205 }, { "epoch": 71.04517271922055, "grad_norm": 0.2519630193710327, "learning_rate": 1e-05, "loss": 0.9901, "step": 80210 }, { "epoch": 71.04960141718335, "grad_norm": 0.22802503407001495, "learning_rate": 1e-05, "loss": 0.968, "step": 80215 }, { "epoch": 71.05403011514615, "grad_norm": 0.21995733678340912, "learning_rate": 1e-05, "loss": 0.982, "step": 80220 }, { "epoch": 71.05845881310894, "grad_norm": 0.23330344259738922, "learning_rate": 1e-05, "loss": 0.965, "step": 80225 }, { "epoch": 71.06288751107175, "grad_norm": 0.20638588070869446, "learning_rate": 1e-05, "loss": 0.925, "step": 80230 }, { "epoch": 71.06731620903454, "grad_norm": 0.2572697401046753, "learning_rate": 1e-05, "loss": 0.9262, "step": 80235 }, { "epoch": 71.07174490699734, "grad_norm": 0.2319895625114441, "learning_rate": 1e-05, "loss": 1.0021, "step": 80240 }, { "epoch": 71.07617360496015, "grad_norm": 0.27966466546058655, "learning_rate": 1e-05, "loss": 0.969, "step": 80245 }, { "epoch": 71.08060230292294, "grad_norm": 0.2795937955379486, "learning_rate": 1e-05, "loss": 0.989, "step": 80250 }, { "epoch": 71.08503100088573, "grad_norm": 0.27428826689720154, "learning_rate": 1e-05, "loss": 0.9562, "step": 80255 }, { "epoch": 71.08945969884854, "grad_norm": 0.2572523355484009, "learning_rate": 1e-05, "loss": 0.9503, "step": 80260 }, { "epoch": 71.09388839681134, "grad_norm": 0.2241087406873703, "learning_rate": 1e-05, "loss": 1.0161, "step": 80265 }, { "epoch": 71.09831709477413, "grad_norm": 0.2676275670528412, "learning_rate": 1e-05, "loss": 1.0268, "step": 80270 }, { "epoch": 71.10274579273694, "grad_norm": 0.275318443775177, "learning_rate": 1e-05, "loss": 0.9355, "step": 80275 }, { "epoch": 71.10717449069973, "grad_norm": 0.2725452184677124, "learning_rate": 1e-05, "loss": 0.9459, "step": 80280 }, { "epoch": 71.11160318866253, "grad_norm": 0.23706059157848358, "learning_rate": 1e-05, "loss": 0.991, "step": 80285 }, { "epoch": 71.11603188662534, "grad_norm": 0.261922150850296, "learning_rate": 1e-05, "loss": 0.9838, "step": 80290 }, { "epoch": 71.12046058458813, "grad_norm": 0.26080769300460815, "learning_rate": 1e-05, "loss": 0.9666, "step": 80295 }, { "epoch": 71.12488928255092, "grad_norm": 0.25540903210639954, "learning_rate": 1e-05, "loss": 0.9935, "step": 80300 }, { "epoch": 71.12931798051373, "grad_norm": 0.25440841913223267, "learning_rate": 1e-05, "loss": 0.9749, "step": 80305 }, { "epoch": 71.13374667847653, "grad_norm": 0.22302967309951782, "learning_rate": 1e-05, "loss": 0.9536, "step": 80310 }, { "epoch": 71.13817537643932, "grad_norm": 0.26216715574264526, "learning_rate": 1e-05, "loss": 0.9416, "step": 80315 }, { "epoch": 71.14260407440213, "grad_norm": 0.2351176142692566, "learning_rate": 1e-05, "loss": 0.9603, "step": 80320 }, { "epoch": 71.14703277236492, "grad_norm": 0.27764636278152466, "learning_rate": 1e-05, "loss": 1.0004, "step": 80325 }, { "epoch": 71.15146147032772, "grad_norm": 0.26274585723876953, "learning_rate": 1e-05, "loss": 0.9594, "step": 80330 }, { "epoch": 71.15589016829053, "grad_norm": 0.2509876489639282, "learning_rate": 1e-05, "loss": 0.9375, "step": 80335 }, { "epoch": 71.16031886625332, "grad_norm": 0.23072203993797302, "learning_rate": 1e-05, "loss": 0.9534, "step": 80340 }, { "epoch": 71.16474756421611, "grad_norm": 0.21615734696388245, "learning_rate": 1e-05, "loss": 0.9316, "step": 80345 }, { "epoch": 71.16917626217892, "grad_norm": 0.25847092270851135, "learning_rate": 1e-05, "loss": 0.9884, "step": 80350 }, { "epoch": 71.17360496014172, "grad_norm": 0.24947138130664825, "learning_rate": 1e-05, "loss": 0.9399, "step": 80355 }, { "epoch": 71.17803365810451, "grad_norm": 0.2748432755470276, "learning_rate": 1e-05, "loss": 0.9694, "step": 80360 }, { "epoch": 71.18246235606732, "grad_norm": 0.24430978298187256, "learning_rate": 1e-05, "loss": 1.0211, "step": 80365 }, { "epoch": 71.18689105403011, "grad_norm": 0.22269472479820251, "learning_rate": 1e-05, "loss": 0.9657, "step": 80370 }, { "epoch": 71.1913197519929, "grad_norm": 0.24195894598960876, "learning_rate": 1e-05, "loss": 0.9545, "step": 80375 }, { "epoch": 71.19574844995572, "grad_norm": 0.23681263625621796, "learning_rate": 1e-05, "loss": 0.9693, "step": 80380 }, { "epoch": 71.20017714791851, "grad_norm": 0.2955668270587921, "learning_rate": 1e-05, "loss": 0.939, "step": 80385 }, { "epoch": 71.20460584588132, "grad_norm": 0.25044575333595276, "learning_rate": 1e-05, "loss": 0.934, "step": 80390 }, { "epoch": 71.20903454384411, "grad_norm": 0.24081918597221375, "learning_rate": 1e-05, "loss": 0.9553, "step": 80395 }, { "epoch": 71.2134632418069, "grad_norm": 0.20423902571201324, "learning_rate": 1e-05, "loss": 0.9804, "step": 80400 }, { "epoch": 71.21789193976971, "grad_norm": 0.23293322324752808, "learning_rate": 1e-05, "loss": 0.9743, "step": 80405 }, { "epoch": 71.22232063773251, "grad_norm": 0.23395177721977234, "learning_rate": 1e-05, "loss": 0.9734, "step": 80410 }, { "epoch": 71.2267493356953, "grad_norm": 0.26237043738365173, "learning_rate": 1e-05, "loss": 0.9798, "step": 80415 }, { "epoch": 71.23117803365811, "grad_norm": 0.24904310703277588, "learning_rate": 1e-05, "loss": 0.9376, "step": 80420 }, { "epoch": 71.2356067316209, "grad_norm": 0.21965089440345764, "learning_rate": 1e-05, "loss": 0.9562, "step": 80425 }, { "epoch": 71.2400354295837, "grad_norm": 0.22282753884792328, "learning_rate": 1e-05, "loss": 0.9642, "step": 80430 }, { "epoch": 71.24446412754651, "grad_norm": 0.26240798830986023, "learning_rate": 1e-05, "loss": 0.9387, "step": 80435 }, { "epoch": 71.2488928255093, "grad_norm": 0.2755425274372101, "learning_rate": 1e-05, "loss": 0.9915, "step": 80440 }, { "epoch": 71.2533215234721, "grad_norm": 0.22789554297924042, "learning_rate": 1e-05, "loss": 0.9579, "step": 80445 }, { "epoch": 71.2577502214349, "grad_norm": 0.24028095602989197, "learning_rate": 1e-05, "loss": 0.9619, "step": 80450 }, { "epoch": 71.2621789193977, "grad_norm": 0.24441474676132202, "learning_rate": 1e-05, "loss": 1.0017, "step": 80455 }, { "epoch": 71.26660761736049, "grad_norm": 0.20573052763938904, "learning_rate": 1e-05, "loss": 0.9849, "step": 80460 }, { "epoch": 71.2710363153233, "grad_norm": 0.24030780792236328, "learning_rate": 1e-05, "loss": 0.9586, "step": 80465 }, { "epoch": 71.2754650132861, "grad_norm": 0.23251719772815704, "learning_rate": 1e-05, "loss": 0.9401, "step": 80470 }, { "epoch": 71.27989371124889, "grad_norm": 0.23238717019557953, "learning_rate": 1e-05, "loss": 0.9758, "step": 80475 }, { "epoch": 71.2843224092117, "grad_norm": 0.21438805758953094, "learning_rate": 1e-05, "loss": 0.9548, "step": 80480 }, { "epoch": 71.28875110717449, "grad_norm": 0.21263061463832855, "learning_rate": 1e-05, "loss": 0.9578, "step": 80485 }, { "epoch": 71.29317980513729, "grad_norm": 0.24810579419136047, "learning_rate": 1e-05, "loss": 0.9572, "step": 80490 }, { "epoch": 71.2976085031001, "grad_norm": 0.2711457908153534, "learning_rate": 1e-05, "loss": 0.9583, "step": 80495 }, { "epoch": 71.30203720106289, "grad_norm": 0.24710458517074585, "learning_rate": 1e-05, "loss": 1.0022, "step": 80500 }, { "epoch": 71.30646589902568, "grad_norm": 0.23652152717113495, "learning_rate": 1e-05, "loss": 0.9641, "step": 80505 }, { "epoch": 71.31089459698849, "grad_norm": 0.23710598051548004, "learning_rate": 1e-05, "loss": 1.0135, "step": 80510 }, { "epoch": 71.31532329495128, "grad_norm": 0.22188624739646912, "learning_rate": 1e-05, "loss": 0.9892, "step": 80515 }, { "epoch": 71.31975199291408, "grad_norm": 0.26686355471611023, "learning_rate": 1e-05, "loss": 0.9485, "step": 80520 }, { "epoch": 71.32418069087689, "grad_norm": 0.251725435256958, "learning_rate": 1e-05, "loss": 0.9281, "step": 80525 }, { "epoch": 71.32860938883968, "grad_norm": 0.2258744090795517, "learning_rate": 1e-05, "loss": 0.9937, "step": 80530 }, { "epoch": 71.33303808680247, "grad_norm": 0.2634340524673462, "learning_rate": 1e-05, "loss": 0.93, "step": 80535 }, { "epoch": 71.33746678476528, "grad_norm": 0.26706716418266296, "learning_rate": 1e-05, "loss": 1.0132, "step": 80540 }, { "epoch": 71.34189548272808, "grad_norm": 0.2580114006996155, "learning_rate": 1e-05, "loss": 1.006, "step": 80545 }, { "epoch": 71.34632418069087, "grad_norm": 0.23479372262954712, "learning_rate": 1e-05, "loss": 0.963, "step": 80550 }, { "epoch": 71.35075287865368, "grad_norm": 0.2756200432777405, "learning_rate": 1e-05, "loss": 0.9754, "step": 80555 }, { "epoch": 71.35518157661647, "grad_norm": 0.27503982186317444, "learning_rate": 1e-05, "loss": 0.9449, "step": 80560 }, { "epoch": 71.35961027457927, "grad_norm": 0.23908555507659912, "learning_rate": 1e-05, "loss": 1.0129, "step": 80565 }, { "epoch": 71.36403897254208, "grad_norm": 0.2406509816646576, "learning_rate": 1e-05, "loss": 0.9694, "step": 80570 }, { "epoch": 71.36846767050487, "grad_norm": 0.32873573899269104, "learning_rate": 1e-05, "loss": 0.9597, "step": 80575 }, { "epoch": 71.37289636846766, "grad_norm": 0.20651979744434357, "learning_rate": 1e-05, "loss": 0.9848, "step": 80580 }, { "epoch": 71.37732506643047, "grad_norm": 0.23700939118862152, "learning_rate": 1e-05, "loss": 0.946, "step": 80585 }, { "epoch": 71.38175376439327, "grad_norm": 0.258241206407547, "learning_rate": 1e-05, "loss": 0.9811, "step": 80590 }, { "epoch": 71.38618246235606, "grad_norm": 0.24222588539123535, "learning_rate": 1e-05, "loss": 0.981, "step": 80595 }, { "epoch": 71.39061116031887, "grad_norm": 0.2494470477104187, "learning_rate": 1e-05, "loss": 0.8667, "step": 80600 }, { "epoch": 71.39503985828166, "grad_norm": 0.23111043870449066, "learning_rate": 1e-05, "loss": 0.9052, "step": 80605 }, { "epoch": 71.39946855624446, "grad_norm": 0.27360033988952637, "learning_rate": 1e-05, "loss": 0.9606, "step": 80610 }, { "epoch": 71.40389725420727, "grad_norm": 0.23422251641750336, "learning_rate": 1e-05, "loss": 0.946, "step": 80615 }, { "epoch": 71.40832595217006, "grad_norm": 0.24004213511943817, "learning_rate": 1e-05, "loss": 0.9933, "step": 80620 }, { "epoch": 71.41275465013285, "grad_norm": 0.34923917055130005, "learning_rate": 1e-05, "loss": 0.9926, "step": 80625 }, { "epoch": 71.41718334809566, "grad_norm": 0.23606614768505096, "learning_rate": 1e-05, "loss": 0.9766, "step": 80630 }, { "epoch": 71.42161204605846, "grad_norm": 0.22275462746620178, "learning_rate": 1e-05, "loss": 0.9227, "step": 80635 }, { "epoch": 71.42604074402126, "grad_norm": 0.23076629638671875, "learning_rate": 1e-05, "loss": 0.9707, "step": 80640 }, { "epoch": 71.43046944198406, "grad_norm": 0.2563914954662323, "learning_rate": 1e-05, "loss": 0.9356, "step": 80645 }, { "epoch": 71.43489813994685, "grad_norm": 0.2539900839328766, "learning_rate": 1e-05, "loss": 0.9844, "step": 80650 }, { "epoch": 71.43932683790966, "grad_norm": 0.20931419730186462, "learning_rate": 1e-05, "loss": 0.9641, "step": 80655 }, { "epoch": 71.44375553587246, "grad_norm": 0.23672591149806976, "learning_rate": 1e-05, "loss": 0.9601, "step": 80660 }, { "epoch": 71.44818423383525, "grad_norm": 0.2351336032152176, "learning_rate": 1e-05, "loss": 0.9407, "step": 80665 }, { "epoch": 71.45261293179806, "grad_norm": 0.22419917583465576, "learning_rate": 1e-05, "loss": 0.9988, "step": 80670 }, { "epoch": 71.45704162976085, "grad_norm": 0.2815806269645691, "learning_rate": 1e-05, "loss": 0.9432, "step": 80675 }, { "epoch": 71.46147032772365, "grad_norm": 0.25801798701286316, "learning_rate": 1e-05, "loss": 0.9956, "step": 80680 }, { "epoch": 71.46589902568645, "grad_norm": 0.24311938881874084, "learning_rate": 1e-05, "loss": 0.9757, "step": 80685 }, { "epoch": 71.47032772364925, "grad_norm": 0.2610177993774414, "learning_rate": 1e-05, "loss": 0.9998, "step": 80690 }, { "epoch": 71.47475642161204, "grad_norm": 0.23146659135818481, "learning_rate": 1e-05, "loss": 1.0054, "step": 80695 }, { "epoch": 71.47918511957485, "grad_norm": 0.2188626527786255, "learning_rate": 1e-05, "loss": 0.9289, "step": 80700 }, { "epoch": 71.48361381753764, "grad_norm": 0.23460035026073456, "learning_rate": 1e-05, "loss": 0.9153, "step": 80705 }, { "epoch": 71.48804251550044, "grad_norm": 0.23641106486320496, "learning_rate": 1e-05, "loss": 0.9932, "step": 80710 }, { "epoch": 71.49247121346325, "grad_norm": 0.24951107800006866, "learning_rate": 1e-05, "loss": 0.9778, "step": 80715 }, { "epoch": 71.49689991142604, "grad_norm": 0.2044644057750702, "learning_rate": 1e-05, "loss": 0.9201, "step": 80720 }, { "epoch": 71.50132860938884, "grad_norm": 0.2317964881658554, "learning_rate": 1e-05, "loss": 0.9904, "step": 80725 }, { "epoch": 71.50575730735164, "grad_norm": 0.21704602241516113, "learning_rate": 1e-05, "loss": 0.933, "step": 80730 }, { "epoch": 71.51018600531444, "grad_norm": 0.22970236837863922, "learning_rate": 1e-05, "loss": 1.0101, "step": 80735 }, { "epoch": 71.51461470327723, "grad_norm": 0.21163766086101532, "learning_rate": 1e-05, "loss": 0.9968, "step": 80740 }, { "epoch": 71.51904340124004, "grad_norm": 0.21668152511119843, "learning_rate": 1e-05, "loss": 0.9892, "step": 80745 }, { "epoch": 71.52347209920283, "grad_norm": 0.2421610802412033, "learning_rate": 1e-05, "loss": 1.0036, "step": 80750 }, { "epoch": 71.52790079716563, "grad_norm": 0.20976890623569489, "learning_rate": 1e-05, "loss": 0.9851, "step": 80755 }, { "epoch": 71.53232949512844, "grad_norm": 0.2770538032054901, "learning_rate": 1e-05, "loss": 0.9664, "step": 80760 }, { "epoch": 71.53675819309123, "grad_norm": 0.22088707983493805, "learning_rate": 1e-05, "loss": 0.9805, "step": 80765 }, { "epoch": 71.54118689105402, "grad_norm": 0.22720152139663696, "learning_rate": 1e-05, "loss": 0.938, "step": 80770 }, { "epoch": 71.54561558901683, "grad_norm": 0.21053992211818695, "learning_rate": 1e-05, "loss": 0.9165, "step": 80775 }, { "epoch": 71.55004428697963, "grad_norm": 0.2627178430557251, "learning_rate": 1e-05, "loss": 1.0406, "step": 80780 }, { "epoch": 71.55447298494242, "grad_norm": 0.20366695523262024, "learning_rate": 1e-05, "loss": 0.9728, "step": 80785 }, { "epoch": 71.55890168290523, "grad_norm": 0.26274093985557556, "learning_rate": 1e-05, "loss": 0.9531, "step": 80790 }, { "epoch": 71.56333038086802, "grad_norm": 0.24148856103420258, "learning_rate": 1e-05, "loss": 1.0237, "step": 80795 }, { "epoch": 71.56775907883082, "grad_norm": 0.21823592483997345, "learning_rate": 1e-05, "loss": 0.9545, "step": 80800 }, { "epoch": 71.57218777679363, "grad_norm": 0.2478497177362442, "learning_rate": 1e-05, "loss": 0.9288, "step": 80805 }, { "epoch": 71.57661647475642, "grad_norm": 0.2173428237438202, "learning_rate": 1e-05, "loss": 0.9952, "step": 80810 }, { "epoch": 71.58104517271921, "grad_norm": 0.22447757422924042, "learning_rate": 1e-05, "loss": 0.9864, "step": 80815 }, { "epoch": 71.58547387068202, "grad_norm": 0.23631472885608673, "learning_rate": 1e-05, "loss": 0.9368, "step": 80820 }, { "epoch": 71.58990256864482, "grad_norm": 0.25925299525260925, "learning_rate": 1e-05, "loss": 0.9795, "step": 80825 }, { "epoch": 71.59433126660761, "grad_norm": 0.25300338864326477, "learning_rate": 1e-05, "loss": 0.9649, "step": 80830 }, { "epoch": 71.59875996457042, "grad_norm": 0.30748093128204346, "learning_rate": 1e-05, "loss": 0.9242, "step": 80835 }, { "epoch": 71.60318866253321, "grad_norm": 0.27988728880882263, "learning_rate": 1e-05, "loss": 0.947, "step": 80840 }, { "epoch": 71.60761736049601, "grad_norm": 0.2745327949523926, "learning_rate": 1e-05, "loss": 0.9574, "step": 80845 }, { "epoch": 71.61204605845882, "grad_norm": 0.22556182742118835, "learning_rate": 1e-05, "loss": 0.9285, "step": 80850 }, { "epoch": 71.61647475642161, "grad_norm": 0.2211562991142273, "learning_rate": 1e-05, "loss": 0.9595, "step": 80855 }, { "epoch": 71.6209034543844, "grad_norm": 0.28095027804374695, "learning_rate": 1e-05, "loss": 0.9748, "step": 80860 }, { "epoch": 71.62533215234721, "grad_norm": 0.21826890110969543, "learning_rate": 1e-05, "loss": 0.9763, "step": 80865 }, { "epoch": 71.62976085031, "grad_norm": 0.20751270651817322, "learning_rate": 1e-05, "loss": 1.0081, "step": 80870 }, { "epoch": 71.63418954827281, "grad_norm": 0.22506152093410492, "learning_rate": 1e-05, "loss": 0.9444, "step": 80875 }, { "epoch": 71.63861824623561, "grad_norm": 0.2570043206214905, "learning_rate": 1e-05, "loss": 0.9839, "step": 80880 }, { "epoch": 71.6430469441984, "grad_norm": 0.26006948947906494, "learning_rate": 1e-05, "loss": 0.9827, "step": 80885 }, { "epoch": 71.64747564216121, "grad_norm": 0.2555810809135437, "learning_rate": 1e-05, "loss": 0.9955, "step": 80890 }, { "epoch": 71.651904340124, "grad_norm": 0.21721920371055603, "learning_rate": 1e-05, "loss": 0.9699, "step": 80895 }, { "epoch": 71.6563330380868, "grad_norm": 0.2691342234611511, "learning_rate": 1e-05, "loss": 0.9628, "step": 80900 }, { "epoch": 71.66076173604961, "grad_norm": 0.28396010398864746, "learning_rate": 1e-05, "loss": 0.9363, "step": 80905 }, { "epoch": 71.6651904340124, "grad_norm": 0.27304667234420776, "learning_rate": 1e-05, "loss": 0.9666, "step": 80910 }, { "epoch": 71.6696191319752, "grad_norm": 0.2724715769290924, "learning_rate": 1e-05, "loss": 0.9192, "step": 80915 }, { "epoch": 71.674047829938, "grad_norm": 0.27306482195854187, "learning_rate": 1e-05, "loss": 0.9797, "step": 80920 }, { "epoch": 71.6784765279008, "grad_norm": 0.2507292926311493, "learning_rate": 1e-05, "loss": 0.97, "step": 80925 }, { "epoch": 71.68290522586359, "grad_norm": 0.23401039838790894, "learning_rate": 1e-05, "loss": 0.9386, "step": 80930 }, { "epoch": 71.6873339238264, "grad_norm": 0.26273906230926514, "learning_rate": 1e-05, "loss": 0.9802, "step": 80935 }, { "epoch": 71.6917626217892, "grad_norm": 0.3119945824146271, "learning_rate": 1e-05, "loss": 1.022, "step": 80940 }, { "epoch": 71.69619131975199, "grad_norm": 0.23534446954727173, "learning_rate": 1e-05, "loss": 0.9938, "step": 80945 }, { "epoch": 71.7006200177148, "grad_norm": 0.25379493832588196, "learning_rate": 1e-05, "loss": 0.9694, "step": 80950 }, { "epoch": 71.70504871567759, "grad_norm": 0.2314881533384323, "learning_rate": 1e-05, "loss": 0.9729, "step": 80955 }, { "epoch": 71.70947741364039, "grad_norm": 0.2233014851808548, "learning_rate": 1e-05, "loss": 0.9761, "step": 80960 }, { "epoch": 71.7139061116032, "grad_norm": 0.21803206205368042, "learning_rate": 1e-05, "loss": 0.8981, "step": 80965 }, { "epoch": 71.71833480956599, "grad_norm": 0.23618222773075104, "learning_rate": 1e-05, "loss": 0.9409, "step": 80970 }, { "epoch": 71.72276350752878, "grad_norm": 0.26837074756622314, "learning_rate": 1e-05, "loss": 0.9811, "step": 80975 }, { "epoch": 71.72719220549159, "grad_norm": 0.2029809057712555, "learning_rate": 1e-05, "loss": 0.9583, "step": 80980 }, { "epoch": 71.73162090345438, "grad_norm": 0.21906058490276337, "learning_rate": 1e-05, "loss": 1.0104, "step": 80985 }, { "epoch": 71.73604960141718, "grad_norm": 0.24671000242233276, "learning_rate": 1e-05, "loss": 0.9648, "step": 80990 }, { "epoch": 71.74047829937999, "grad_norm": 0.24907661974430084, "learning_rate": 1e-05, "loss": 0.965, "step": 80995 }, { "epoch": 71.74490699734278, "grad_norm": 0.21554157137870789, "learning_rate": 1e-05, "loss": 0.9985, "step": 81000 }, { "epoch": 71.74933569530558, "grad_norm": 0.22353962063789368, "learning_rate": 1e-05, "loss": 1.0062, "step": 81005 }, { "epoch": 71.75376439326838, "grad_norm": 0.22581346333026886, "learning_rate": 1e-05, "loss": 0.961, "step": 81010 }, { "epoch": 71.75819309123118, "grad_norm": 0.2615215480327606, "learning_rate": 1e-05, "loss": 0.9848, "step": 81015 }, { "epoch": 71.76262178919397, "grad_norm": 0.24465018510818481, "learning_rate": 1e-05, "loss": 0.9885, "step": 81020 }, { "epoch": 71.76705048715678, "grad_norm": 0.2122029960155487, "learning_rate": 1e-05, "loss": 0.9634, "step": 81025 }, { "epoch": 71.77147918511957, "grad_norm": 0.23267334699630737, "learning_rate": 1e-05, "loss": 0.9727, "step": 81030 }, { "epoch": 71.77590788308237, "grad_norm": 0.19814740121364594, "learning_rate": 1e-05, "loss": 0.9314, "step": 81035 }, { "epoch": 71.78033658104518, "grad_norm": 0.27602100372314453, "learning_rate": 1e-05, "loss": 0.9187, "step": 81040 }, { "epoch": 71.78476527900797, "grad_norm": 0.2953493893146515, "learning_rate": 1e-05, "loss": 0.9405, "step": 81045 }, { "epoch": 71.78919397697076, "grad_norm": 0.24956394731998444, "learning_rate": 1e-05, "loss": 0.991, "step": 81050 }, { "epoch": 71.79362267493357, "grad_norm": 0.2468077540397644, "learning_rate": 1e-05, "loss": 0.9512, "step": 81055 }, { "epoch": 71.79805137289637, "grad_norm": 0.2389238029718399, "learning_rate": 1e-05, "loss": 0.9974, "step": 81060 }, { "epoch": 71.80248007085916, "grad_norm": 0.2530921697616577, "learning_rate": 1e-05, "loss": 0.9808, "step": 81065 }, { "epoch": 71.80690876882197, "grad_norm": 0.22603349387645721, "learning_rate": 1e-05, "loss": 1.0133, "step": 81070 }, { "epoch": 71.81133746678476, "grad_norm": 0.230739027261734, "learning_rate": 1e-05, "loss": 0.9841, "step": 81075 }, { "epoch": 71.81576616474756, "grad_norm": 0.2293829619884491, "learning_rate": 1e-05, "loss": 0.9951, "step": 81080 }, { "epoch": 71.82019486271037, "grad_norm": 0.2462838739156723, "learning_rate": 1e-05, "loss": 0.9985, "step": 81085 }, { "epoch": 71.82462356067316, "grad_norm": 0.2447291910648346, "learning_rate": 1e-05, "loss": 0.954, "step": 81090 }, { "epoch": 71.82905225863595, "grad_norm": 0.22890999913215637, "learning_rate": 1e-05, "loss": 0.9736, "step": 81095 }, { "epoch": 71.83348095659876, "grad_norm": 0.24218162894248962, "learning_rate": 1e-05, "loss": 0.924, "step": 81100 }, { "epoch": 71.83790965456156, "grad_norm": 0.23259367048740387, "learning_rate": 1e-05, "loss": 0.9103, "step": 81105 }, { "epoch": 71.84233835252435, "grad_norm": 0.21914659440517426, "learning_rate": 1e-05, "loss": 0.9623, "step": 81110 }, { "epoch": 71.84676705048716, "grad_norm": 0.2505854368209839, "learning_rate": 1e-05, "loss": 1.0011, "step": 81115 }, { "epoch": 71.85119574844995, "grad_norm": 0.2469121366739273, "learning_rate": 1e-05, "loss": 0.9385, "step": 81120 }, { "epoch": 71.85562444641276, "grad_norm": 0.2482776790857315, "learning_rate": 1e-05, "loss": 0.9748, "step": 81125 }, { "epoch": 71.86005314437556, "grad_norm": 0.2741515338420868, "learning_rate": 1e-05, "loss": 0.9599, "step": 81130 }, { "epoch": 71.86448184233835, "grad_norm": 0.22157226502895355, "learning_rate": 1e-05, "loss": 0.9528, "step": 81135 }, { "epoch": 71.86891054030116, "grad_norm": 0.22523397207260132, "learning_rate": 1e-05, "loss": 0.9483, "step": 81140 }, { "epoch": 71.87333923826395, "grad_norm": 0.2368723601102829, "learning_rate": 1e-05, "loss": 1.0142, "step": 81145 }, { "epoch": 71.87776793622675, "grad_norm": 0.25687870383262634, "learning_rate": 1e-05, "loss": 0.939, "step": 81150 }, { "epoch": 71.88219663418955, "grad_norm": 0.22510018944740295, "learning_rate": 1e-05, "loss": 0.9894, "step": 81155 }, { "epoch": 71.88662533215235, "grad_norm": 0.2791641354560852, "learning_rate": 1e-05, "loss": 0.9925, "step": 81160 }, { "epoch": 71.89105403011514, "grad_norm": 0.27973106503486633, "learning_rate": 1e-05, "loss": 0.9669, "step": 81165 }, { "epoch": 71.89548272807795, "grad_norm": 0.2402273565530777, "learning_rate": 1e-05, "loss": 0.9657, "step": 81170 }, { "epoch": 71.89991142604075, "grad_norm": 0.2389194816350937, "learning_rate": 1e-05, "loss": 1.008, "step": 81175 }, { "epoch": 71.90434012400354, "grad_norm": 0.21920032799243927, "learning_rate": 1e-05, "loss": 0.968, "step": 81180 }, { "epoch": 71.90876882196635, "grad_norm": 0.2521321773529053, "learning_rate": 1e-05, "loss": 0.9937, "step": 81185 }, { "epoch": 71.91319751992914, "grad_norm": 0.26153382658958435, "learning_rate": 1e-05, "loss": 0.965, "step": 81190 }, { "epoch": 71.91762621789194, "grad_norm": 0.22406961023807526, "learning_rate": 1e-05, "loss": 1.0196, "step": 81195 }, { "epoch": 71.92205491585474, "grad_norm": 0.22841869294643402, "learning_rate": 1e-05, "loss": 0.9996, "step": 81200 }, { "epoch": 71.92648361381754, "grad_norm": 0.2409314215183258, "learning_rate": 1e-05, "loss": 0.9553, "step": 81205 }, { "epoch": 71.93091231178033, "grad_norm": 0.22601507604122162, "learning_rate": 1e-05, "loss": 1.0239, "step": 81210 }, { "epoch": 71.93534100974314, "grad_norm": 0.24212512373924255, "learning_rate": 1e-05, "loss": 0.9366, "step": 81215 }, { "epoch": 71.93976970770593, "grad_norm": 0.25856292247772217, "learning_rate": 1e-05, "loss": 0.9343, "step": 81220 }, { "epoch": 71.94419840566873, "grad_norm": 0.24418391287326813, "learning_rate": 1e-05, "loss": 0.957, "step": 81225 }, { "epoch": 71.94862710363154, "grad_norm": 0.2174445539712906, "learning_rate": 1e-05, "loss": 0.9592, "step": 81230 }, { "epoch": 71.95305580159433, "grad_norm": 0.23913152515888214, "learning_rate": 1e-05, "loss": 1.008, "step": 81235 }, { "epoch": 71.95748449955713, "grad_norm": 0.20793089270591736, "learning_rate": 1e-05, "loss": 0.9702, "step": 81240 }, { "epoch": 71.96191319751993, "grad_norm": 0.21642142534255981, "learning_rate": 1e-05, "loss": 0.9452, "step": 81245 }, { "epoch": 71.96634189548273, "grad_norm": 0.25521984696388245, "learning_rate": 1e-05, "loss": 0.9555, "step": 81250 }, { "epoch": 71.97077059344552, "grad_norm": 0.24558059871196747, "learning_rate": 1e-05, "loss": 0.9822, "step": 81255 }, { "epoch": 71.97519929140833, "grad_norm": 0.2338753193616867, "learning_rate": 1e-05, "loss": 0.9417, "step": 81260 }, { "epoch": 71.97962798937112, "grad_norm": 0.24042771756649017, "learning_rate": 1e-05, "loss": 0.9647, "step": 81265 }, { "epoch": 71.98405668733392, "grad_norm": 0.24682267010211945, "learning_rate": 1e-05, "loss": 0.9124, "step": 81270 }, { "epoch": 71.98848538529673, "grad_norm": 0.23481950163841248, "learning_rate": 1e-05, "loss": 0.9051, "step": 81275 }, { "epoch": 71.99291408325952, "grad_norm": 0.2177044153213501, "learning_rate": 1e-05, "loss": 0.9406, "step": 81280 }, { "epoch": 71.99734278122232, "grad_norm": 0.2158825844526291, "learning_rate": 1e-05, "loss": 0.9869, "step": 81285 }, { "epoch": 72.00177147918512, "grad_norm": 0.23622381687164307, "learning_rate": 1e-05, "loss": 1.0119, "step": 81290 }, { "epoch": 72.00620017714792, "grad_norm": 0.21890859305858612, "learning_rate": 1e-05, "loss": 0.9202, "step": 81295 }, { "epoch": 72.01062887511071, "grad_norm": 0.242213174700737, "learning_rate": 1e-05, "loss": 0.9529, "step": 81300 }, { "epoch": 72.01505757307352, "grad_norm": 0.27944111824035645, "learning_rate": 1e-05, "loss": 0.9156, "step": 81305 }, { "epoch": 72.01948627103631, "grad_norm": 0.23651334643363953, "learning_rate": 1e-05, "loss": 0.9052, "step": 81310 }, { "epoch": 72.02391496899911, "grad_norm": 0.22058548033237457, "learning_rate": 1e-05, "loss": 0.958, "step": 81315 }, { "epoch": 72.02834366696192, "grad_norm": 0.25605371594429016, "learning_rate": 1e-05, "loss": 0.9252, "step": 81320 }, { "epoch": 72.03277236492471, "grad_norm": 0.22399884462356567, "learning_rate": 1e-05, "loss": 0.9335, "step": 81325 }, { "epoch": 72.0372010628875, "grad_norm": 0.2504664361476898, "learning_rate": 1e-05, "loss": 0.9585, "step": 81330 }, { "epoch": 72.04162976085031, "grad_norm": 0.23969127237796783, "learning_rate": 1e-05, "loss": 0.9748, "step": 81335 }, { "epoch": 72.0460584588131, "grad_norm": 0.24141551554203033, "learning_rate": 1e-05, "loss": 0.9293, "step": 81340 }, { "epoch": 72.0504871567759, "grad_norm": 0.24725572764873505, "learning_rate": 1e-05, "loss": 0.9772, "step": 81345 }, { "epoch": 72.05491585473871, "grad_norm": 0.20956510305404663, "learning_rate": 1e-05, "loss": 0.9638, "step": 81350 }, { "epoch": 72.0593445527015, "grad_norm": 0.22496171295642853, "learning_rate": 1e-05, "loss": 0.9941, "step": 81355 }, { "epoch": 72.0637732506643, "grad_norm": 0.236468106508255, "learning_rate": 1e-05, "loss": 0.9857, "step": 81360 }, { "epoch": 72.0682019486271, "grad_norm": 0.24994243681430817, "learning_rate": 1e-05, "loss": 0.9587, "step": 81365 }, { "epoch": 72.0726306465899, "grad_norm": 0.23941124975681305, "learning_rate": 1e-05, "loss": 0.977, "step": 81370 }, { "epoch": 72.07705934455271, "grad_norm": 0.25477737188339233, "learning_rate": 1e-05, "loss": 1.0119, "step": 81375 }, { "epoch": 72.0814880425155, "grad_norm": 0.23598423600196838, "learning_rate": 1e-05, "loss": 0.9768, "step": 81380 }, { "epoch": 72.0859167404783, "grad_norm": 0.21149305999279022, "learning_rate": 1e-05, "loss": 0.9507, "step": 81385 }, { "epoch": 72.0903454384411, "grad_norm": 0.2616944909095764, "learning_rate": 1e-05, "loss": 0.9256, "step": 81390 }, { "epoch": 72.0947741364039, "grad_norm": 0.27005478739738464, "learning_rate": 1e-05, "loss": 0.9962, "step": 81395 }, { "epoch": 72.0992028343667, "grad_norm": 0.27633461356163025, "learning_rate": 1e-05, "loss": 0.954, "step": 81400 }, { "epoch": 72.1036315323295, "grad_norm": 0.23954515159130096, "learning_rate": 1e-05, "loss": 0.9232, "step": 81405 }, { "epoch": 72.1080602302923, "grad_norm": 0.2105601578950882, "learning_rate": 1e-05, "loss": 0.929, "step": 81410 }, { "epoch": 72.11248892825509, "grad_norm": 0.2877689301967621, "learning_rate": 1e-05, "loss": 0.9561, "step": 81415 }, { "epoch": 72.1169176262179, "grad_norm": 0.2449050396680832, "learning_rate": 1e-05, "loss": 0.9102, "step": 81420 }, { "epoch": 72.12134632418069, "grad_norm": 0.24011529982089996, "learning_rate": 1e-05, "loss": 0.9395, "step": 81425 }, { "epoch": 72.12577502214349, "grad_norm": 0.26916563510894775, "learning_rate": 1e-05, "loss": 0.9429, "step": 81430 }, { "epoch": 72.1302037201063, "grad_norm": 0.1996055245399475, "learning_rate": 1e-05, "loss": 0.9872, "step": 81435 }, { "epoch": 72.13463241806909, "grad_norm": 0.24843572080135345, "learning_rate": 1e-05, "loss": 0.9804, "step": 81440 }, { "epoch": 72.13906111603188, "grad_norm": 0.21508702635765076, "learning_rate": 1e-05, "loss": 0.9687, "step": 81445 }, { "epoch": 72.14348981399469, "grad_norm": 0.23762810230255127, "learning_rate": 1e-05, "loss": 1.011, "step": 81450 }, { "epoch": 72.14791851195749, "grad_norm": 0.28272709250450134, "learning_rate": 1e-05, "loss": 1.0115, "step": 81455 }, { "epoch": 72.15234720992028, "grad_norm": 0.26446881890296936, "learning_rate": 1e-05, "loss": 0.972, "step": 81460 }, { "epoch": 72.15677590788309, "grad_norm": 0.24066747725009918, "learning_rate": 1e-05, "loss": 0.9166, "step": 81465 }, { "epoch": 72.16120460584588, "grad_norm": 0.2595427334308624, "learning_rate": 1e-05, "loss": 0.9393, "step": 81470 }, { "epoch": 72.16563330380868, "grad_norm": 0.20953522622585297, "learning_rate": 1e-05, "loss": 0.9438, "step": 81475 }, { "epoch": 72.17006200177148, "grad_norm": 0.255551815032959, "learning_rate": 1e-05, "loss": 0.9497, "step": 81480 }, { "epoch": 72.17449069973428, "grad_norm": 0.21852417290210724, "learning_rate": 1e-05, "loss": 0.9973, "step": 81485 }, { "epoch": 72.17891939769707, "grad_norm": 0.22914937138557434, "learning_rate": 1e-05, "loss": 0.9718, "step": 81490 }, { "epoch": 72.18334809565988, "grad_norm": 0.2236819714307785, "learning_rate": 1e-05, "loss": 0.9664, "step": 81495 }, { "epoch": 72.18777679362267, "grad_norm": 0.2010485827922821, "learning_rate": 1e-05, "loss": 0.9432, "step": 81500 }, { "epoch": 72.19220549158547, "grad_norm": 0.22047671675682068, "learning_rate": 1e-05, "loss": 0.9559, "step": 81505 }, { "epoch": 72.19663418954828, "grad_norm": 0.2272142916917801, "learning_rate": 1e-05, "loss": 0.9798, "step": 81510 }, { "epoch": 72.20106288751107, "grad_norm": 0.2484421581029892, "learning_rate": 1e-05, "loss": 0.9492, "step": 81515 }, { "epoch": 72.20549158547387, "grad_norm": 0.2548709809780121, "learning_rate": 1e-05, "loss": 0.9564, "step": 81520 }, { "epoch": 72.20992028343667, "grad_norm": 0.212657630443573, "learning_rate": 1e-05, "loss": 1.0099, "step": 81525 }, { "epoch": 72.21434898139947, "grad_norm": 0.22249644994735718, "learning_rate": 1e-05, "loss": 0.9963, "step": 81530 }, { "epoch": 72.21877767936226, "grad_norm": 0.2488417774438858, "learning_rate": 1e-05, "loss": 0.9633, "step": 81535 }, { "epoch": 72.22320637732507, "grad_norm": 0.25050610303878784, "learning_rate": 1e-05, "loss": 0.9596, "step": 81540 }, { "epoch": 72.22763507528786, "grad_norm": 0.22389383614063263, "learning_rate": 1e-05, "loss": 0.9527, "step": 81545 }, { "epoch": 72.23206377325066, "grad_norm": 0.2913344204425812, "learning_rate": 1e-05, "loss": 0.9207, "step": 81550 }, { "epoch": 72.23649247121347, "grad_norm": 0.26752591133117676, "learning_rate": 1e-05, "loss": 1.0218, "step": 81555 }, { "epoch": 72.24092116917626, "grad_norm": 0.29146572947502136, "learning_rate": 1e-05, "loss": 0.965, "step": 81560 }, { "epoch": 72.24534986713905, "grad_norm": 0.2561086416244507, "learning_rate": 1e-05, "loss": 1.0116, "step": 81565 }, { "epoch": 72.24977856510186, "grad_norm": 0.24353192746639252, "learning_rate": 1e-05, "loss": 0.9546, "step": 81570 }, { "epoch": 72.25420726306466, "grad_norm": 0.20363597571849823, "learning_rate": 1e-05, "loss": 0.96, "step": 81575 }, { "epoch": 72.25863596102745, "grad_norm": 0.28131386637687683, "learning_rate": 1e-05, "loss": 0.9347, "step": 81580 }, { "epoch": 72.26306465899026, "grad_norm": 0.23872306942939758, "learning_rate": 1e-05, "loss": 0.9959, "step": 81585 }, { "epoch": 72.26749335695305, "grad_norm": 0.24349729716777802, "learning_rate": 1e-05, "loss": 0.9592, "step": 81590 }, { "epoch": 72.27192205491585, "grad_norm": 0.22526611387729645, "learning_rate": 1e-05, "loss": 0.9496, "step": 81595 }, { "epoch": 72.27635075287866, "grad_norm": 0.2616110146045685, "learning_rate": 1e-05, "loss": 0.9447, "step": 81600 }, { "epoch": 72.28077945084145, "grad_norm": 0.26459765434265137, "learning_rate": 1e-05, "loss": 0.9282, "step": 81605 }, { "epoch": 72.28520814880426, "grad_norm": 0.2639113664627075, "learning_rate": 1e-05, "loss": 0.9528, "step": 81610 }, { "epoch": 72.28963684676705, "grad_norm": 0.23226481676101685, "learning_rate": 1e-05, "loss": 0.9281, "step": 81615 }, { "epoch": 72.29406554472985, "grad_norm": 0.27576157450675964, "learning_rate": 1e-05, "loss": 0.9832, "step": 81620 }, { "epoch": 72.29849424269266, "grad_norm": 0.2584199011325836, "learning_rate": 1e-05, "loss": 0.9409, "step": 81625 }, { "epoch": 72.30292294065545, "grad_norm": 0.27295801043510437, "learning_rate": 1e-05, "loss": 1.0044, "step": 81630 }, { "epoch": 72.30735163861824, "grad_norm": 0.23922015726566315, "learning_rate": 1e-05, "loss": 0.9383, "step": 81635 }, { "epoch": 72.31178033658105, "grad_norm": 0.2583041489124298, "learning_rate": 1e-05, "loss": 0.9484, "step": 81640 }, { "epoch": 72.31620903454385, "grad_norm": 0.27850082516670227, "learning_rate": 1e-05, "loss": 0.948, "step": 81645 }, { "epoch": 72.32063773250664, "grad_norm": 0.20707598328590393, "learning_rate": 1e-05, "loss": 0.973, "step": 81650 }, { "epoch": 72.32506643046945, "grad_norm": 0.23906508088111877, "learning_rate": 1e-05, "loss": 1.0103, "step": 81655 }, { "epoch": 72.32949512843224, "grad_norm": 0.23260806500911713, "learning_rate": 1e-05, "loss": 0.9225, "step": 81660 }, { "epoch": 72.33392382639504, "grad_norm": 0.21275664865970612, "learning_rate": 1e-05, "loss": 0.9826, "step": 81665 }, { "epoch": 72.33835252435784, "grad_norm": 0.2309500277042389, "learning_rate": 1e-05, "loss": 0.9882, "step": 81670 }, { "epoch": 72.34278122232064, "grad_norm": 0.2329455018043518, "learning_rate": 1e-05, "loss": 0.9827, "step": 81675 }, { "epoch": 72.34720992028343, "grad_norm": 0.22734816372394562, "learning_rate": 1e-05, "loss": 0.9739, "step": 81680 }, { "epoch": 72.35163861824624, "grad_norm": 0.22864748537540436, "learning_rate": 1e-05, "loss": 0.9725, "step": 81685 }, { "epoch": 72.35606731620904, "grad_norm": 0.2648402750492096, "learning_rate": 1e-05, "loss": 0.9696, "step": 81690 }, { "epoch": 72.36049601417183, "grad_norm": 0.25199875235557556, "learning_rate": 1e-05, "loss": 0.941, "step": 81695 }, { "epoch": 72.36492471213464, "grad_norm": 0.28546321392059326, "learning_rate": 1e-05, "loss": 0.9816, "step": 81700 }, { "epoch": 72.36935341009743, "grad_norm": 0.23969654738903046, "learning_rate": 1e-05, "loss": 0.9689, "step": 81705 }, { "epoch": 72.37378210806023, "grad_norm": 0.2365540713071823, "learning_rate": 1e-05, "loss": 1.0338, "step": 81710 }, { "epoch": 72.37821080602303, "grad_norm": 0.22369222342967987, "learning_rate": 1e-05, "loss": 0.9438, "step": 81715 }, { "epoch": 72.38263950398583, "grad_norm": 0.2490292489528656, "learning_rate": 1e-05, "loss": 0.9683, "step": 81720 }, { "epoch": 72.38706820194862, "grad_norm": 0.27210327982902527, "learning_rate": 1e-05, "loss": 0.9578, "step": 81725 }, { "epoch": 72.39149689991143, "grad_norm": 0.19943967461585999, "learning_rate": 1e-05, "loss": 0.9391, "step": 81730 }, { "epoch": 72.39592559787422, "grad_norm": 0.2582855522632599, "learning_rate": 1e-05, "loss": 0.8777, "step": 81735 }, { "epoch": 72.40035429583702, "grad_norm": 0.22129642963409424, "learning_rate": 1e-05, "loss": 0.9756, "step": 81740 }, { "epoch": 72.40478299379983, "grad_norm": 0.23241719603538513, "learning_rate": 1e-05, "loss": 0.9902, "step": 81745 }, { "epoch": 72.40921169176262, "grad_norm": 0.28924551606178284, "learning_rate": 1e-05, "loss": 0.9584, "step": 81750 }, { "epoch": 72.41364038972542, "grad_norm": 0.254770427942276, "learning_rate": 1e-05, "loss": 0.9606, "step": 81755 }, { "epoch": 72.41806908768822, "grad_norm": 0.2669125199317932, "learning_rate": 1e-05, "loss": 0.9749, "step": 81760 }, { "epoch": 72.42249778565102, "grad_norm": 0.29147234559059143, "learning_rate": 1e-05, "loss": 0.9364, "step": 81765 }, { "epoch": 72.42692648361381, "grad_norm": 0.21068868041038513, "learning_rate": 1e-05, "loss": 0.9788, "step": 81770 }, { "epoch": 72.43135518157662, "grad_norm": 0.23538866639137268, "learning_rate": 1e-05, "loss": 0.9424, "step": 81775 }, { "epoch": 72.43578387953941, "grad_norm": 0.2441510260105133, "learning_rate": 1e-05, "loss": 0.9638, "step": 81780 }, { "epoch": 72.44021257750221, "grad_norm": 0.22194457054138184, "learning_rate": 1e-05, "loss": 0.9395, "step": 81785 }, { "epoch": 72.44464127546502, "grad_norm": 0.2323472499847412, "learning_rate": 1e-05, "loss": 0.9094, "step": 81790 }, { "epoch": 72.44906997342781, "grad_norm": 0.2350277602672577, "learning_rate": 1e-05, "loss": 0.9315, "step": 81795 }, { "epoch": 72.4534986713906, "grad_norm": 0.22526490688323975, "learning_rate": 1e-05, "loss": 0.9195, "step": 81800 }, { "epoch": 72.45792736935341, "grad_norm": 0.20925948023796082, "learning_rate": 1e-05, "loss": 0.965, "step": 81805 }, { "epoch": 72.46235606731621, "grad_norm": 0.21776196360588074, "learning_rate": 1e-05, "loss": 1.016, "step": 81810 }, { "epoch": 72.466784765279, "grad_norm": 0.2173004001379013, "learning_rate": 1e-05, "loss": 1.0448, "step": 81815 }, { "epoch": 72.47121346324181, "grad_norm": 0.2210615575313568, "learning_rate": 1e-05, "loss": 0.9596, "step": 81820 }, { "epoch": 72.4756421612046, "grad_norm": 0.22636619210243225, "learning_rate": 1e-05, "loss": 0.9379, "step": 81825 }, { "epoch": 72.4800708591674, "grad_norm": 0.21062739193439484, "learning_rate": 1e-05, "loss": 0.9906, "step": 81830 }, { "epoch": 72.4844995571302, "grad_norm": 0.22763392329216003, "learning_rate": 1e-05, "loss": 0.9836, "step": 81835 }, { "epoch": 72.488928255093, "grad_norm": 0.29344770312309265, "learning_rate": 1e-05, "loss": 0.9134, "step": 81840 }, { "epoch": 72.4933569530558, "grad_norm": 0.24963222444057465, "learning_rate": 1e-05, "loss": 0.9976, "step": 81845 }, { "epoch": 72.4977856510186, "grad_norm": 0.22271636128425598, "learning_rate": 1e-05, "loss": 0.8833, "step": 81850 }, { "epoch": 72.5022143489814, "grad_norm": 0.19670172035694122, "learning_rate": 1e-05, "loss": 0.9921, "step": 81855 }, { "epoch": 72.5066430469442, "grad_norm": 0.2319105714559555, "learning_rate": 1e-05, "loss": 1.0384, "step": 81860 }, { "epoch": 72.511071744907, "grad_norm": 0.24714207649230957, "learning_rate": 1e-05, "loss": 0.9377, "step": 81865 }, { "epoch": 72.5155004428698, "grad_norm": 0.2168656885623932, "learning_rate": 1e-05, "loss": 1.0026, "step": 81870 }, { "epoch": 72.5199291408326, "grad_norm": 0.24972131848335266, "learning_rate": 1e-05, "loss": 0.9854, "step": 81875 }, { "epoch": 72.5243578387954, "grad_norm": 0.23894548416137695, "learning_rate": 1e-05, "loss": 0.9546, "step": 81880 }, { "epoch": 72.52878653675819, "grad_norm": 0.21484695374965668, "learning_rate": 1e-05, "loss": 0.9819, "step": 81885 }, { "epoch": 72.533215234721, "grad_norm": 0.22791941463947296, "learning_rate": 1e-05, "loss": 0.9421, "step": 81890 }, { "epoch": 72.53764393268379, "grad_norm": 0.21178391575813293, "learning_rate": 1e-05, "loss": 0.9617, "step": 81895 }, { "epoch": 72.54207263064659, "grad_norm": 0.2729080617427826, "learning_rate": 1e-05, "loss": 0.9776, "step": 81900 }, { "epoch": 72.5465013286094, "grad_norm": 0.24734045565128326, "learning_rate": 1e-05, "loss": 0.9539, "step": 81905 }, { "epoch": 72.55093002657219, "grad_norm": 0.2369898110628128, "learning_rate": 1e-05, "loss": 0.9483, "step": 81910 }, { "epoch": 72.55535872453498, "grad_norm": 0.29310154914855957, "learning_rate": 1e-05, "loss": 0.9568, "step": 81915 }, { "epoch": 72.55978742249779, "grad_norm": 0.2385549396276474, "learning_rate": 1e-05, "loss": 0.9509, "step": 81920 }, { "epoch": 72.56421612046059, "grad_norm": 0.25564342737197876, "learning_rate": 1e-05, "loss": 0.917, "step": 81925 }, { "epoch": 72.56864481842338, "grad_norm": 0.21443584561347961, "learning_rate": 1e-05, "loss": 0.9816, "step": 81930 }, { "epoch": 72.57307351638619, "grad_norm": 0.2723100781440735, "learning_rate": 1e-05, "loss": 0.9629, "step": 81935 }, { "epoch": 72.57750221434898, "grad_norm": 0.26437267661094666, "learning_rate": 1e-05, "loss": 0.9371, "step": 81940 }, { "epoch": 72.58193091231178, "grad_norm": 0.24043592810630798, "learning_rate": 1e-05, "loss": 1.0126, "step": 81945 }, { "epoch": 72.58635961027458, "grad_norm": 0.25841444730758667, "learning_rate": 1e-05, "loss": 1.0247, "step": 81950 }, { "epoch": 72.59078830823738, "grad_norm": 0.2862551510334015, "learning_rate": 1e-05, "loss": 0.9807, "step": 81955 }, { "epoch": 72.59521700620017, "grad_norm": 0.2475762963294983, "learning_rate": 1e-05, "loss": 0.9992, "step": 81960 }, { "epoch": 72.59964570416298, "grad_norm": 0.2246282547712326, "learning_rate": 1e-05, "loss": 0.979, "step": 81965 }, { "epoch": 72.60407440212578, "grad_norm": 0.28102266788482666, "learning_rate": 1e-05, "loss": 0.9325, "step": 81970 }, { "epoch": 72.60850310008857, "grad_norm": 0.22349387407302856, "learning_rate": 1e-05, "loss": 0.9945, "step": 81975 }, { "epoch": 72.61293179805138, "grad_norm": 0.3062332570552826, "learning_rate": 1e-05, "loss": 0.9362, "step": 81980 }, { "epoch": 72.61736049601417, "grad_norm": 0.2933221459388733, "learning_rate": 1e-05, "loss": 1.0086, "step": 81985 }, { "epoch": 72.62178919397697, "grad_norm": 0.23921425640583038, "learning_rate": 1e-05, "loss": 0.9942, "step": 81990 }, { "epoch": 72.62621789193977, "grad_norm": 0.23220431804656982, "learning_rate": 1e-05, "loss": 0.9604, "step": 81995 }, { "epoch": 72.63064658990257, "grad_norm": 0.24322591722011566, "learning_rate": 1e-05, "loss": 0.9947, "step": 82000 }, { "epoch": 72.63507528786536, "grad_norm": 0.22642558813095093, "learning_rate": 1e-05, "loss": 0.9437, "step": 82005 }, { "epoch": 72.63950398582817, "grad_norm": 0.22948335111141205, "learning_rate": 1e-05, "loss": 0.9854, "step": 82010 }, { "epoch": 72.64393268379096, "grad_norm": 0.29430514574050903, "learning_rate": 1e-05, "loss": 0.9434, "step": 82015 }, { "epoch": 72.64836138175376, "grad_norm": 0.26079872250556946, "learning_rate": 1e-05, "loss": 1.0043, "step": 82020 }, { "epoch": 72.65279007971657, "grad_norm": 0.24384747445583344, "learning_rate": 1e-05, "loss": 0.9918, "step": 82025 }, { "epoch": 72.65721877767936, "grad_norm": 0.2269025295972824, "learning_rate": 1e-05, "loss": 0.9842, "step": 82030 }, { "epoch": 72.66164747564216, "grad_norm": 0.1892281323671341, "learning_rate": 1e-05, "loss": 0.9906, "step": 82035 }, { "epoch": 72.66607617360496, "grad_norm": 0.23388154804706573, "learning_rate": 1e-05, "loss": 0.9178, "step": 82040 }, { "epoch": 72.67050487156776, "grad_norm": 0.25561922788619995, "learning_rate": 1e-05, "loss": 0.9813, "step": 82045 }, { "epoch": 72.67493356953055, "grad_norm": 0.23890981078147888, "learning_rate": 1e-05, "loss": 0.955, "step": 82050 }, { "epoch": 72.67936226749336, "grad_norm": 0.2638990581035614, "learning_rate": 1e-05, "loss": 1.0164, "step": 82055 }, { "epoch": 72.68379096545615, "grad_norm": 0.260124146938324, "learning_rate": 1e-05, "loss": 0.9882, "step": 82060 }, { "epoch": 72.68821966341895, "grad_norm": 0.2417614609003067, "learning_rate": 1e-05, "loss": 1.0233, "step": 82065 }, { "epoch": 72.69264836138176, "grad_norm": 0.2613557279109955, "learning_rate": 1e-05, "loss": 0.9482, "step": 82070 }, { "epoch": 72.69707705934455, "grad_norm": 0.27670252323150635, "learning_rate": 1e-05, "loss": 0.9637, "step": 82075 }, { "epoch": 72.70150575730734, "grad_norm": 0.22210197150707245, "learning_rate": 1e-05, "loss": 0.9963, "step": 82080 }, { "epoch": 72.70593445527015, "grad_norm": 0.2397746443748474, "learning_rate": 1e-05, "loss": 0.9006, "step": 82085 }, { "epoch": 72.71036315323295, "grad_norm": 0.25654351711273193, "learning_rate": 1e-05, "loss": 1.0082, "step": 82090 }, { "epoch": 72.71479185119574, "grad_norm": 0.24264849722385406, "learning_rate": 1e-05, "loss": 0.9813, "step": 82095 }, { "epoch": 72.71922054915855, "grad_norm": 0.207445427775383, "learning_rate": 1e-05, "loss": 0.9671, "step": 82100 }, { "epoch": 72.72364924712134, "grad_norm": 0.2603868544101715, "learning_rate": 1e-05, "loss": 0.9771, "step": 82105 }, { "epoch": 72.72807794508415, "grad_norm": 0.2347344607114792, "learning_rate": 1e-05, "loss": 0.9519, "step": 82110 }, { "epoch": 72.73250664304695, "grad_norm": 0.20686018466949463, "learning_rate": 1e-05, "loss": 0.9478, "step": 82115 }, { "epoch": 72.73693534100974, "grad_norm": 0.23141111433506012, "learning_rate": 1e-05, "loss": 0.9592, "step": 82120 }, { "epoch": 72.74136403897255, "grad_norm": 0.23409943282604218, "learning_rate": 1e-05, "loss": 0.9411, "step": 82125 }, { "epoch": 72.74579273693534, "grad_norm": 0.22090205550193787, "learning_rate": 1e-05, "loss": 0.9224, "step": 82130 }, { "epoch": 72.75022143489814, "grad_norm": 0.23999835550785065, "learning_rate": 1e-05, "loss": 0.9917, "step": 82135 }, { "epoch": 72.75465013286095, "grad_norm": 0.22561438381671906, "learning_rate": 1e-05, "loss": 0.9731, "step": 82140 }, { "epoch": 72.75907883082374, "grad_norm": 0.2848077714443207, "learning_rate": 1e-05, "loss": 0.9491, "step": 82145 }, { "epoch": 72.76350752878653, "grad_norm": 0.2460223138332367, "learning_rate": 1e-05, "loss": 0.9411, "step": 82150 }, { "epoch": 72.76793622674934, "grad_norm": 0.2385382503271103, "learning_rate": 1e-05, "loss": 0.9216, "step": 82155 }, { "epoch": 72.77236492471214, "grad_norm": 0.21186718344688416, "learning_rate": 1e-05, "loss": 0.9549, "step": 82160 }, { "epoch": 72.77679362267493, "grad_norm": 0.24680039286613464, "learning_rate": 1e-05, "loss": 0.9586, "step": 82165 }, { "epoch": 72.78122232063774, "grad_norm": 0.237496480345726, "learning_rate": 1e-05, "loss": 0.9672, "step": 82170 }, { "epoch": 72.78565101860053, "grad_norm": 0.21692587435245514, "learning_rate": 1e-05, "loss": 1.018, "step": 82175 }, { "epoch": 72.79007971656333, "grad_norm": 0.2576132118701935, "learning_rate": 1e-05, "loss": 0.9229, "step": 82180 }, { "epoch": 72.79450841452613, "grad_norm": 0.2511577606201172, "learning_rate": 1e-05, "loss": 1.0024, "step": 82185 }, { "epoch": 72.79893711248893, "grad_norm": 0.2365998476743698, "learning_rate": 1e-05, "loss": 1.0149, "step": 82190 }, { "epoch": 72.80336581045172, "grad_norm": 0.3116912841796875, "learning_rate": 1e-05, "loss": 0.9088, "step": 82195 }, { "epoch": 72.80779450841453, "grad_norm": 0.20943093299865723, "learning_rate": 1e-05, "loss": 0.959, "step": 82200 }, { "epoch": 72.81222320637733, "grad_norm": 0.24530185759067535, "learning_rate": 1e-05, "loss": 0.934, "step": 82205 }, { "epoch": 72.81665190434012, "grad_norm": 0.21564076840877533, "learning_rate": 1e-05, "loss": 0.9423, "step": 82210 }, { "epoch": 72.82108060230293, "grad_norm": 0.27512311935424805, "learning_rate": 1e-05, "loss": 0.9632, "step": 82215 }, { "epoch": 72.82550930026572, "grad_norm": 0.2699989676475525, "learning_rate": 1e-05, "loss": 0.9645, "step": 82220 }, { "epoch": 72.82993799822852, "grad_norm": 0.2616477310657501, "learning_rate": 1e-05, "loss": 0.9631, "step": 82225 }, { "epoch": 72.83436669619132, "grad_norm": 0.200178861618042, "learning_rate": 1e-05, "loss": 0.9582, "step": 82230 }, { "epoch": 72.83879539415412, "grad_norm": 0.26071739196777344, "learning_rate": 1e-05, "loss": 0.9541, "step": 82235 }, { "epoch": 72.84322409211691, "grad_norm": 0.2706190347671509, "learning_rate": 1e-05, "loss": 0.9585, "step": 82240 }, { "epoch": 72.84765279007972, "grad_norm": 0.23201747238636017, "learning_rate": 1e-05, "loss": 1.006, "step": 82245 }, { "epoch": 72.85208148804251, "grad_norm": 0.2525644898414612, "learning_rate": 1e-05, "loss": 1.0021, "step": 82250 }, { "epoch": 72.85651018600531, "grad_norm": 0.28115618228912354, "learning_rate": 1e-05, "loss": 0.9404, "step": 82255 }, { "epoch": 72.86093888396812, "grad_norm": 0.264055997133255, "learning_rate": 1e-05, "loss": 1.0422, "step": 82260 }, { "epoch": 72.86536758193091, "grad_norm": 0.2329600304365158, "learning_rate": 1e-05, "loss": 0.974, "step": 82265 }, { "epoch": 72.8697962798937, "grad_norm": 0.237477108836174, "learning_rate": 1e-05, "loss": 0.9726, "step": 82270 }, { "epoch": 72.87422497785651, "grad_norm": 0.22517579793930054, "learning_rate": 1e-05, "loss": 0.9474, "step": 82275 }, { "epoch": 72.87865367581931, "grad_norm": 0.2691676616668701, "learning_rate": 1e-05, "loss": 0.9547, "step": 82280 }, { "epoch": 72.8830823737821, "grad_norm": 0.2528765797615051, "learning_rate": 1e-05, "loss": 0.9715, "step": 82285 }, { "epoch": 72.88751107174491, "grad_norm": 0.2579416036605835, "learning_rate": 1e-05, "loss": 0.9412, "step": 82290 }, { "epoch": 72.8919397697077, "grad_norm": 0.25088420510292053, "learning_rate": 1e-05, "loss": 0.9678, "step": 82295 }, { "epoch": 72.8963684676705, "grad_norm": 0.24960538744926453, "learning_rate": 1e-05, "loss": 0.9993, "step": 82300 }, { "epoch": 72.9007971656333, "grad_norm": 0.24997778236865997, "learning_rate": 1e-05, "loss": 1.0026, "step": 82305 }, { "epoch": 72.9052258635961, "grad_norm": 0.2649942636489868, "learning_rate": 1e-05, "loss": 0.9435, "step": 82310 }, { "epoch": 72.9096545615589, "grad_norm": 0.22673450410366058, "learning_rate": 1e-05, "loss": 0.9303, "step": 82315 }, { "epoch": 72.9140832595217, "grad_norm": 0.29669609665870667, "learning_rate": 1e-05, "loss": 0.9215, "step": 82320 }, { "epoch": 72.9185119574845, "grad_norm": 0.22993719577789307, "learning_rate": 1e-05, "loss": 0.9694, "step": 82325 }, { "epoch": 72.92294065544729, "grad_norm": 0.2600945234298706, "learning_rate": 1e-05, "loss": 0.9725, "step": 82330 }, { "epoch": 72.9273693534101, "grad_norm": 0.26497912406921387, "learning_rate": 1e-05, "loss": 0.9747, "step": 82335 }, { "epoch": 72.9317980513729, "grad_norm": 0.23068685829639435, "learning_rate": 1e-05, "loss": 1.0239, "step": 82340 }, { "epoch": 72.9362267493357, "grad_norm": 0.24579927325248718, "learning_rate": 1e-05, "loss": 0.95, "step": 82345 }, { "epoch": 72.9406554472985, "grad_norm": 0.24136674404144287, "learning_rate": 1e-05, "loss": 0.976, "step": 82350 }, { "epoch": 72.94508414526129, "grad_norm": 0.24702665209770203, "learning_rate": 1e-05, "loss": 0.9598, "step": 82355 }, { "epoch": 72.9495128432241, "grad_norm": 0.2417319118976593, "learning_rate": 1e-05, "loss": 0.9921, "step": 82360 }, { "epoch": 72.9539415411869, "grad_norm": 0.2343817800283432, "learning_rate": 1e-05, "loss": 0.9693, "step": 82365 }, { "epoch": 72.95837023914969, "grad_norm": 0.2281305342912674, "learning_rate": 1e-05, "loss": 0.976, "step": 82370 }, { "epoch": 72.9627989371125, "grad_norm": 0.24599644541740417, "learning_rate": 1e-05, "loss": 0.9647, "step": 82375 }, { "epoch": 72.96722763507529, "grad_norm": 0.2539338767528534, "learning_rate": 1e-05, "loss": 1.0077, "step": 82380 }, { "epoch": 72.97165633303808, "grad_norm": 0.2511104643344879, "learning_rate": 1e-05, "loss": 0.9213, "step": 82385 }, { "epoch": 72.97608503100089, "grad_norm": 0.2469913363456726, "learning_rate": 1e-05, "loss": 0.9379, "step": 82390 }, { "epoch": 72.98051372896369, "grad_norm": 0.21827562153339386, "learning_rate": 1e-05, "loss": 1.0091, "step": 82395 }, { "epoch": 72.98494242692648, "grad_norm": 0.21459631621837616, "learning_rate": 1e-05, "loss": 0.9639, "step": 82400 }, { "epoch": 72.98937112488929, "grad_norm": 0.23577465116977692, "learning_rate": 1e-05, "loss": 0.9965, "step": 82405 }, { "epoch": 72.99379982285208, "grad_norm": 0.21619725227355957, "learning_rate": 1e-05, "loss": 0.9392, "step": 82410 }, { "epoch": 72.99822852081488, "grad_norm": 0.23966482281684875, "learning_rate": 1e-05, "loss": 1.0052, "step": 82415 }, { "epoch": 73.00265721877768, "grad_norm": 0.2208172082901001, "learning_rate": 1e-05, "loss": 0.9412, "step": 82420 }, { "epoch": 73.00708591674048, "grad_norm": 0.21438747644424438, "learning_rate": 1e-05, "loss": 0.9796, "step": 82425 }, { "epoch": 73.01151461470327, "grad_norm": 0.22542330622673035, "learning_rate": 1e-05, "loss": 0.9752, "step": 82430 }, { "epoch": 73.01594331266608, "grad_norm": 0.2583635747432709, "learning_rate": 1e-05, "loss": 0.8995, "step": 82435 }, { "epoch": 73.02037201062888, "grad_norm": 0.23522783815860748, "learning_rate": 1e-05, "loss": 0.9385, "step": 82440 }, { "epoch": 73.02480070859167, "grad_norm": 0.20607879757881165, "learning_rate": 1e-05, "loss": 0.9344, "step": 82445 }, { "epoch": 73.02922940655448, "grad_norm": 0.22864672541618347, "learning_rate": 1e-05, "loss": 0.9398, "step": 82450 }, { "epoch": 73.03365810451727, "grad_norm": 0.23980331420898438, "learning_rate": 1e-05, "loss": 0.9685, "step": 82455 }, { "epoch": 73.03808680248007, "grad_norm": 0.227765753865242, "learning_rate": 1e-05, "loss": 0.9323, "step": 82460 }, { "epoch": 73.04251550044287, "grad_norm": 0.25453898310661316, "learning_rate": 1e-05, "loss": 0.9673, "step": 82465 }, { "epoch": 73.04694419840567, "grad_norm": 0.22375932335853577, "learning_rate": 1e-05, "loss": 0.9746, "step": 82470 }, { "epoch": 73.05137289636846, "grad_norm": 0.2136206030845642, "learning_rate": 1e-05, "loss": 0.887, "step": 82475 }, { "epoch": 73.05580159433127, "grad_norm": 0.28091931343078613, "learning_rate": 1e-05, "loss": 0.9731, "step": 82480 }, { "epoch": 73.06023029229407, "grad_norm": 0.26697659492492676, "learning_rate": 1e-05, "loss": 0.9767, "step": 82485 }, { "epoch": 73.06465899025686, "grad_norm": 0.2722189724445343, "learning_rate": 1e-05, "loss": 1.0574, "step": 82490 }, { "epoch": 73.06908768821967, "grad_norm": 0.23614715039730072, "learning_rate": 1e-05, "loss": 1.0005, "step": 82495 }, { "epoch": 73.07351638618246, "grad_norm": 0.2785855829715729, "learning_rate": 1e-05, "loss": 0.9343, "step": 82500 }, { "epoch": 73.07794508414526, "grad_norm": 0.2589624226093292, "learning_rate": 1e-05, "loss": 1.0078, "step": 82505 }, { "epoch": 73.08237378210806, "grad_norm": 0.23542962968349457, "learning_rate": 1e-05, "loss": 0.9646, "step": 82510 }, { "epoch": 73.08680248007086, "grad_norm": 0.228175550699234, "learning_rate": 1e-05, "loss": 0.9517, "step": 82515 }, { "epoch": 73.09123117803365, "grad_norm": 0.2737840712070465, "learning_rate": 1e-05, "loss": 0.9587, "step": 82520 }, { "epoch": 73.09565987599646, "grad_norm": 0.26945221424102783, "learning_rate": 1e-05, "loss": 0.9648, "step": 82525 }, { "epoch": 73.10008857395925, "grad_norm": 0.24570070207118988, "learning_rate": 1e-05, "loss": 0.9402, "step": 82530 }, { "epoch": 73.10451727192205, "grad_norm": 0.24858075380325317, "learning_rate": 1e-05, "loss": 0.9507, "step": 82535 }, { "epoch": 73.10894596988486, "grad_norm": 0.21923251450061798, "learning_rate": 1e-05, "loss": 1.01, "step": 82540 }, { "epoch": 73.11337466784765, "grad_norm": 0.2247978150844574, "learning_rate": 1e-05, "loss": 1.0061, "step": 82545 }, { "epoch": 73.11780336581045, "grad_norm": 0.22685687243938446, "learning_rate": 1e-05, "loss": 0.9198, "step": 82550 }, { "epoch": 73.12223206377325, "grad_norm": 0.25057026743888855, "learning_rate": 1e-05, "loss": 0.9894, "step": 82555 }, { "epoch": 73.12666076173605, "grad_norm": 0.28565722703933716, "learning_rate": 1e-05, "loss": 0.972, "step": 82560 }, { "epoch": 73.13108945969884, "grad_norm": 0.22555232048034668, "learning_rate": 1e-05, "loss": 0.9562, "step": 82565 }, { "epoch": 73.13551815766165, "grad_norm": 0.24804599583148956, "learning_rate": 1e-05, "loss": 0.99, "step": 82570 }, { "epoch": 73.13994685562444, "grad_norm": 0.25632360577583313, "learning_rate": 1e-05, "loss": 0.966, "step": 82575 }, { "epoch": 73.14437555358724, "grad_norm": 0.22083309292793274, "learning_rate": 1e-05, "loss": 0.9473, "step": 82580 }, { "epoch": 73.14880425155005, "grad_norm": 0.2413714975118637, "learning_rate": 1e-05, "loss": 0.9837, "step": 82585 }, { "epoch": 73.15323294951284, "grad_norm": 0.22615182399749756, "learning_rate": 1e-05, "loss": 1.0245, "step": 82590 }, { "epoch": 73.15766164747565, "grad_norm": 0.275217205286026, "learning_rate": 1e-05, "loss": 0.9531, "step": 82595 }, { "epoch": 73.16209034543844, "grad_norm": 0.2280871719121933, "learning_rate": 1e-05, "loss": 1.0145, "step": 82600 }, { "epoch": 73.16651904340124, "grad_norm": 0.24297204613685608, "learning_rate": 1e-05, "loss": 0.9376, "step": 82605 }, { "epoch": 73.17094774136405, "grad_norm": 0.21975819766521454, "learning_rate": 1e-05, "loss": 1.0096, "step": 82610 }, { "epoch": 73.17537643932684, "grad_norm": 0.23662470281124115, "learning_rate": 1e-05, "loss": 0.9477, "step": 82615 }, { "epoch": 73.17980513728963, "grad_norm": 0.2460552304983139, "learning_rate": 1e-05, "loss": 0.9554, "step": 82620 }, { "epoch": 73.18423383525244, "grad_norm": 0.21850033104419708, "learning_rate": 1e-05, "loss": 0.9853, "step": 82625 }, { "epoch": 73.18866253321524, "grad_norm": 0.24351060390472412, "learning_rate": 1e-05, "loss": 0.966, "step": 82630 }, { "epoch": 73.19309123117803, "grad_norm": 0.21935692429542542, "learning_rate": 1e-05, "loss": 0.9604, "step": 82635 }, { "epoch": 73.19751992914084, "grad_norm": 0.28177767992019653, "learning_rate": 1e-05, "loss": 0.9693, "step": 82640 }, { "epoch": 73.20194862710363, "grad_norm": 0.21997953951358795, "learning_rate": 1e-05, "loss": 0.9933, "step": 82645 }, { "epoch": 73.20637732506643, "grad_norm": 0.23230719566345215, "learning_rate": 1e-05, "loss": 0.9998, "step": 82650 }, { "epoch": 73.21080602302924, "grad_norm": 0.2655057907104492, "learning_rate": 1e-05, "loss": 0.8886, "step": 82655 }, { "epoch": 73.21523472099203, "grad_norm": 0.22441129386425018, "learning_rate": 1e-05, "loss": 0.9446, "step": 82660 }, { "epoch": 73.21966341895482, "grad_norm": 0.23323912918567657, "learning_rate": 1e-05, "loss": 0.9286, "step": 82665 }, { "epoch": 73.22409211691763, "grad_norm": 0.23481370508670807, "learning_rate": 1e-05, "loss": 0.9584, "step": 82670 }, { "epoch": 73.22852081488043, "grad_norm": 0.2173687219619751, "learning_rate": 1e-05, "loss": 0.9489, "step": 82675 }, { "epoch": 73.23294951284322, "grad_norm": 0.25321415066719055, "learning_rate": 1e-05, "loss": 1.0289, "step": 82680 }, { "epoch": 73.23737821080603, "grad_norm": 0.21834953129291534, "learning_rate": 1e-05, "loss": 0.9243, "step": 82685 }, { "epoch": 73.24180690876882, "grad_norm": 0.297186940908432, "learning_rate": 1e-05, "loss": 0.916, "step": 82690 }, { "epoch": 73.24623560673162, "grad_norm": 0.2575085759162903, "learning_rate": 1e-05, "loss": 0.9922, "step": 82695 }, { "epoch": 73.25066430469442, "grad_norm": 0.2264639139175415, "learning_rate": 1e-05, "loss": 0.9993, "step": 82700 }, { "epoch": 73.25509300265722, "grad_norm": 0.22753046452999115, "learning_rate": 1e-05, "loss": 0.9862, "step": 82705 }, { "epoch": 73.25952170062001, "grad_norm": 0.22967657446861267, "learning_rate": 1e-05, "loss": 0.9782, "step": 82710 }, { "epoch": 73.26395039858282, "grad_norm": 0.2646719515323639, "learning_rate": 1e-05, "loss": 0.9819, "step": 82715 }, { "epoch": 73.26837909654562, "grad_norm": 0.2503224015235901, "learning_rate": 1e-05, "loss": 0.9609, "step": 82720 }, { "epoch": 73.27280779450841, "grad_norm": 0.2612695097923279, "learning_rate": 1e-05, "loss": 0.9727, "step": 82725 }, { "epoch": 73.27723649247122, "grad_norm": 0.2478746771812439, "learning_rate": 1e-05, "loss": 0.9544, "step": 82730 }, { "epoch": 73.28166519043401, "grad_norm": 0.28299659490585327, "learning_rate": 1e-05, "loss": 0.9871, "step": 82735 }, { "epoch": 73.2860938883968, "grad_norm": 0.26097890734672546, "learning_rate": 1e-05, "loss": 0.9486, "step": 82740 }, { "epoch": 73.29052258635961, "grad_norm": 0.24240058660507202, "learning_rate": 1e-05, "loss": 0.9965, "step": 82745 }, { "epoch": 73.29495128432241, "grad_norm": 0.2340339571237564, "learning_rate": 1e-05, "loss": 0.9613, "step": 82750 }, { "epoch": 73.2993799822852, "grad_norm": 0.2833745777606964, "learning_rate": 1e-05, "loss": 1.0017, "step": 82755 }, { "epoch": 73.30380868024801, "grad_norm": 0.22378607094287872, "learning_rate": 1e-05, "loss": 0.9449, "step": 82760 }, { "epoch": 73.3082373782108, "grad_norm": 0.2732107937335968, "learning_rate": 1e-05, "loss": 0.9356, "step": 82765 }, { "epoch": 73.3126660761736, "grad_norm": 0.2284357100725174, "learning_rate": 1e-05, "loss": 0.9571, "step": 82770 }, { "epoch": 73.31709477413641, "grad_norm": 0.24475449323654175, "learning_rate": 1e-05, "loss": 0.9833, "step": 82775 }, { "epoch": 73.3215234720992, "grad_norm": 0.22579967975616455, "learning_rate": 1e-05, "loss": 0.9672, "step": 82780 }, { "epoch": 73.325952170062, "grad_norm": 0.2380644530057907, "learning_rate": 1e-05, "loss": 1.0477, "step": 82785 }, { "epoch": 73.3303808680248, "grad_norm": 0.24849334359169006, "learning_rate": 1e-05, "loss": 0.902, "step": 82790 }, { "epoch": 73.3348095659876, "grad_norm": 0.2405947595834732, "learning_rate": 1e-05, "loss": 0.9469, "step": 82795 }, { "epoch": 73.33923826395039, "grad_norm": 0.2670816481113434, "learning_rate": 1e-05, "loss": 0.9285, "step": 82800 }, { "epoch": 73.3436669619132, "grad_norm": 0.23080633580684662, "learning_rate": 1e-05, "loss": 0.9569, "step": 82805 }, { "epoch": 73.348095659876, "grad_norm": 0.23334714770317078, "learning_rate": 1e-05, "loss": 0.9884, "step": 82810 }, { "epoch": 73.35252435783879, "grad_norm": 0.25967931747436523, "learning_rate": 1e-05, "loss": 0.9624, "step": 82815 }, { "epoch": 73.3569530558016, "grad_norm": 0.24247030913829803, "learning_rate": 1e-05, "loss": 0.9989, "step": 82820 }, { "epoch": 73.36138175376439, "grad_norm": 0.23570400476455688, "learning_rate": 1e-05, "loss": 0.9845, "step": 82825 }, { "epoch": 73.36581045172719, "grad_norm": 0.23097412288188934, "learning_rate": 1e-05, "loss": 0.9498, "step": 82830 }, { "epoch": 73.37023914969, "grad_norm": 0.22044362127780914, "learning_rate": 1e-05, "loss": 0.9598, "step": 82835 }, { "epoch": 73.37466784765279, "grad_norm": 0.24457845091819763, "learning_rate": 1e-05, "loss": 0.9521, "step": 82840 }, { "epoch": 73.3790965456156, "grad_norm": 0.25608763098716736, "learning_rate": 1e-05, "loss": 0.9521, "step": 82845 }, { "epoch": 73.38352524357839, "grad_norm": 0.268093466758728, "learning_rate": 1e-05, "loss": 0.9548, "step": 82850 }, { "epoch": 73.38795394154118, "grad_norm": 0.2697315216064453, "learning_rate": 1e-05, "loss": 0.9079, "step": 82855 }, { "epoch": 73.39238263950399, "grad_norm": 0.2225591391324997, "learning_rate": 1e-05, "loss": 1.0, "step": 82860 }, { "epoch": 73.39681133746679, "grad_norm": 0.2316623479127884, "learning_rate": 1e-05, "loss": 0.9884, "step": 82865 }, { "epoch": 73.40124003542958, "grad_norm": 0.25394290685653687, "learning_rate": 1e-05, "loss": 0.9903, "step": 82870 }, { "epoch": 73.40566873339239, "grad_norm": 0.25854936242103577, "learning_rate": 1e-05, "loss": 0.9496, "step": 82875 }, { "epoch": 73.41009743135518, "grad_norm": 0.2392587661743164, "learning_rate": 1e-05, "loss": 0.9359, "step": 82880 }, { "epoch": 73.41452612931798, "grad_norm": 0.23477937281131744, "learning_rate": 1e-05, "loss": 0.9426, "step": 82885 }, { "epoch": 73.41895482728079, "grad_norm": 0.23929959535598755, "learning_rate": 1e-05, "loss": 0.964, "step": 82890 }, { "epoch": 73.42338352524358, "grad_norm": 0.2164512723684311, "learning_rate": 1e-05, "loss": 0.9454, "step": 82895 }, { "epoch": 73.42781222320637, "grad_norm": 0.21970677375793457, "learning_rate": 1e-05, "loss": 0.9975, "step": 82900 }, { "epoch": 73.43224092116918, "grad_norm": 0.24705880880355835, "learning_rate": 1e-05, "loss": 0.9726, "step": 82905 }, { "epoch": 73.43666961913198, "grad_norm": 0.23214955627918243, "learning_rate": 1e-05, "loss": 0.9289, "step": 82910 }, { "epoch": 73.44109831709477, "grad_norm": 0.27697354555130005, "learning_rate": 1e-05, "loss": 0.9234, "step": 82915 }, { "epoch": 73.44552701505758, "grad_norm": 0.2728331685066223, "learning_rate": 1e-05, "loss": 0.9698, "step": 82920 }, { "epoch": 73.44995571302037, "grad_norm": 0.24829663336277008, "learning_rate": 1e-05, "loss": 0.9341, "step": 82925 }, { "epoch": 73.45438441098317, "grad_norm": 0.3199656307697296, "learning_rate": 1e-05, "loss": 0.9293, "step": 82930 }, { "epoch": 73.45881310894598, "grad_norm": 0.2506788671016693, "learning_rate": 1e-05, "loss": 0.9659, "step": 82935 }, { "epoch": 73.46324180690877, "grad_norm": 0.2483828067779541, "learning_rate": 1e-05, "loss": 1.0273, "step": 82940 }, { "epoch": 73.46767050487156, "grad_norm": 0.22795487940311432, "learning_rate": 1e-05, "loss": 0.955, "step": 82945 }, { "epoch": 73.47209920283437, "grad_norm": 0.23457051813602448, "learning_rate": 1e-05, "loss": 0.9394, "step": 82950 }, { "epoch": 73.47652790079717, "grad_norm": 0.26390478014945984, "learning_rate": 1e-05, "loss": 0.9633, "step": 82955 }, { "epoch": 73.48095659875996, "grad_norm": 0.22247827053070068, "learning_rate": 1e-05, "loss": 0.9898, "step": 82960 }, { "epoch": 73.48538529672277, "grad_norm": 0.22308751940727234, "learning_rate": 1e-05, "loss": 0.9229, "step": 82965 }, { "epoch": 73.48981399468556, "grad_norm": 0.27113136649131775, "learning_rate": 1e-05, "loss": 0.941, "step": 82970 }, { "epoch": 73.49424269264836, "grad_norm": 0.2309257984161377, "learning_rate": 1e-05, "loss": 0.9202, "step": 82975 }, { "epoch": 73.49867139061116, "grad_norm": 0.20855991542339325, "learning_rate": 1e-05, "loss": 0.959, "step": 82980 }, { "epoch": 73.50310008857396, "grad_norm": 0.2625739872455597, "learning_rate": 1e-05, "loss": 0.9483, "step": 82985 }, { "epoch": 73.50752878653675, "grad_norm": 0.20825833082199097, "learning_rate": 1e-05, "loss": 0.9534, "step": 82990 }, { "epoch": 73.51195748449956, "grad_norm": 0.2604326009750366, "learning_rate": 1e-05, "loss": 0.9397, "step": 82995 }, { "epoch": 73.51638618246236, "grad_norm": 0.25394269824028015, "learning_rate": 1e-05, "loss": 0.9189, "step": 83000 }, { "epoch": 73.52081488042515, "grad_norm": 0.22562089562416077, "learning_rate": 1e-05, "loss": 1.0096, "step": 83005 }, { "epoch": 73.52524357838796, "grad_norm": 0.22459916770458221, "learning_rate": 1e-05, "loss": 0.9523, "step": 83010 }, { "epoch": 73.52967227635075, "grad_norm": 0.23401667177677155, "learning_rate": 1e-05, "loss": 0.9632, "step": 83015 }, { "epoch": 73.53410097431355, "grad_norm": 0.25175946950912476, "learning_rate": 1e-05, "loss": 0.9687, "step": 83020 }, { "epoch": 73.53852967227635, "grad_norm": 0.20914360880851746, "learning_rate": 1e-05, "loss": 1.0177, "step": 83025 }, { "epoch": 73.54295837023915, "grad_norm": 0.38618069887161255, "learning_rate": 1e-05, "loss": 0.9007, "step": 83030 }, { "epoch": 73.54738706820194, "grad_norm": 0.24266676604747772, "learning_rate": 1e-05, "loss": 0.9661, "step": 83035 }, { "epoch": 73.55181576616475, "grad_norm": 0.24347826838493347, "learning_rate": 1e-05, "loss": 0.9268, "step": 83040 }, { "epoch": 73.55624446412754, "grad_norm": 0.21379315853118896, "learning_rate": 1e-05, "loss": 0.9446, "step": 83045 }, { "epoch": 73.56067316209034, "grad_norm": 0.23964732885360718, "learning_rate": 1e-05, "loss": 0.969, "step": 83050 }, { "epoch": 73.56510186005315, "grad_norm": 0.19100530445575714, "learning_rate": 1e-05, "loss": 0.9866, "step": 83055 }, { "epoch": 73.56953055801594, "grad_norm": 0.237108513712883, "learning_rate": 1e-05, "loss": 0.9546, "step": 83060 }, { "epoch": 73.57395925597874, "grad_norm": 0.25132131576538086, "learning_rate": 1e-05, "loss": 0.9671, "step": 83065 }, { "epoch": 73.57838795394154, "grad_norm": 0.25894325971603394, "learning_rate": 1e-05, "loss": 0.9684, "step": 83070 }, { "epoch": 73.58281665190434, "grad_norm": 0.2741222381591797, "learning_rate": 1e-05, "loss": 0.9935, "step": 83075 }, { "epoch": 73.58724534986715, "grad_norm": 0.2366926670074463, "learning_rate": 1e-05, "loss": 0.9703, "step": 83080 }, { "epoch": 73.59167404782994, "grad_norm": 0.2619396150112152, "learning_rate": 1e-05, "loss": 0.9814, "step": 83085 }, { "epoch": 73.59610274579273, "grad_norm": 0.22304563224315643, "learning_rate": 1e-05, "loss": 0.9405, "step": 83090 }, { "epoch": 73.60053144375554, "grad_norm": 0.20240060985088348, "learning_rate": 1e-05, "loss": 1.0143, "step": 83095 }, { "epoch": 73.60496014171834, "grad_norm": 0.2572607100009918, "learning_rate": 1e-05, "loss": 0.9623, "step": 83100 }, { "epoch": 73.60938883968113, "grad_norm": 0.2057189792394638, "learning_rate": 1e-05, "loss": 0.9475, "step": 83105 }, { "epoch": 73.61381753764394, "grad_norm": 0.20827379822731018, "learning_rate": 1e-05, "loss": 0.9387, "step": 83110 }, { "epoch": 73.61824623560673, "grad_norm": 0.204784095287323, "learning_rate": 1e-05, "loss": 0.9683, "step": 83115 }, { "epoch": 73.62267493356953, "grad_norm": 0.25376030802726746, "learning_rate": 1e-05, "loss": 0.9685, "step": 83120 }, { "epoch": 73.62710363153234, "grad_norm": 0.26508691906929016, "learning_rate": 1e-05, "loss": 1.0029, "step": 83125 }, { "epoch": 73.63153232949513, "grad_norm": 0.2635312080383301, "learning_rate": 1e-05, "loss": 0.9165, "step": 83130 }, { "epoch": 73.63596102745792, "grad_norm": 0.22795486450195312, "learning_rate": 1e-05, "loss": 0.9631, "step": 83135 }, { "epoch": 73.64038972542073, "grad_norm": 0.2265150398015976, "learning_rate": 1e-05, "loss": 1.0049, "step": 83140 }, { "epoch": 73.64481842338353, "grad_norm": 0.21949546039104462, "learning_rate": 1e-05, "loss": 0.9824, "step": 83145 }, { "epoch": 73.64924712134632, "grad_norm": 0.23996548354625702, "learning_rate": 1e-05, "loss": 0.9265, "step": 83150 }, { "epoch": 73.65367581930913, "grad_norm": 0.24817617237567902, "learning_rate": 1e-05, "loss": 0.9252, "step": 83155 }, { "epoch": 73.65810451727192, "grad_norm": 0.24546799063682556, "learning_rate": 1e-05, "loss": 0.9799, "step": 83160 }, { "epoch": 73.66253321523472, "grad_norm": 0.2327755242586136, "learning_rate": 1e-05, "loss": 0.9369, "step": 83165 }, { "epoch": 73.66696191319753, "grad_norm": 0.2609008550643921, "learning_rate": 1e-05, "loss": 0.9931, "step": 83170 }, { "epoch": 73.67139061116032, "grad_norm": 0.2279651015996933, "learning_rate": 1e-05, "loss": 0.9484, "step": 83175 }, { "epoch": 73.67581930912311, "grad_norm": 0.2589198648929596, "learning_rate": 1e-05, "loss": 0.9527, "step": 83180 }, { "epoch": 73.68024800708592, "grad_norm": 0.2010425478219986, "learning_rate": 1e-05, "loss": 0.9614, "step": 83185 }, { "epoch": 73.68467670504872, "grad_norm": 0.20207096636295319, "learning_rate": 1e-05, "loss": 0.9112, "step": 83190 }, { "epoch": 73.68910540301151, "grad_norm": 0.24023544788360596, "learning_rate": 1e-05, "loss": 0.9917, "step": 83195 }, { "epoch": 73.69353410097432, "grad_norm": 0.21951812505722046, "learning_rate": 1e-05, "loss": 0.9959, "step": 83200 }, { "epoch": 73.69796279893711, "grad_norm": 0.24895143508911133, "learning_rate": 1e-05, "loss": 0.9505, "step": 83205 }, { "epoch": 73.7023914968999, "grad_norm": 0.22392037510871887, "learning_rate": 1e-05, "loss": 0.9341, "step": 83210 }, { "epoch": 73.70682019486271, "grad_norm": 0.2230488806962967, "learning_rate": 1e-05, "loss": 0.9677, "step": 83215 }, { "epoch": 73.71124889282551, "grad_norm": 0.25851505994796753, "learning_rate": 1e-05, "loss": 0.9559, "step": 83220 }, { "epoch": 73.7156775907883, "grad_norm": 0.2613077461719513, "learning_rate": 1e-05, "loss": 1.0098, "step": 83225 }, { "epoch": 73.72010628875111, "grad_norm": 0.25208404660224915, "learning_rate": 1e-05, "loss": 0.9909, "step": 83230 }, { "epoch": 73.7245349867139, "grad_norm": 0.2495100200176239, "learning_rate": 1e-05, "loss": 0.9526, "step": 83235 }, { "epoch": 73.7289636846767, "grad_norm": 0.2525523602962494, "learning_rate": 1e-05, "loss": 0.9418, "step": 83240 }, { "epoch": 73.73339238263951, "grad_norm": 0.21497578918933868, "learning_rate": 1e-05, "loss": 1.0068, "step": 83245 }, { "epoch": 73.7378210806023, "grad_norm": 0.2412101775407791, "learning_rate": 1e-05, "loss": 0.9826, "step": 83250 }, { "epoch": 73.7422497785651, "grad_norm": 0.27067261934280396, "learning_rate": 1e-05, "loss": 0.9308, "step": 83255 }, { "epoch": 73.7466784765279, "grad_norm": 0.27735018730163574, "learning_rate": 1e-05, "loss": 0.9406, "step": 83260 }, { "epoch": 73.7511071744907, "grad_norm": 0.22332754731178284, "learning_rate": 1e-05, "loss": 0.9617, "step": 83265 }, { "epoch": 73.75553587245349, "grad_norm": 0.21955981850624084, "learning_rate": 1e-05, "loss": 0.9707, "step": 83270 }, { "epoch": 73.7599645704163, "grad_norm": 0.24124151468276978, "learning_rate": 1e-05, "loss": 0.9505, "step": 83275 }, { "epoch": 73.7643932683791, "grad_norm": 0.2841683626174927, "learning_rate": 1e-05, "loss": 0.9829, "step": 83280 }, { "epoch": 73.76882196634189, "grad_norm": 0.2517390251159668, "learning_rate": 1e-05, "loss": 1.0646, "step": 83285 }, { "epoch": 73.7732506643047, "grad_norm": 0.25931620597839355, "learning_rate": 1e-05, "loss": 0.8927, "step": 83290 }, { "epoch": 73.77767936226749, "grad_norm": 0.23221397399902344, "learning_rate": 1e-05, "loss": 0.978, "step": 83295 }, { "epoch": 73.78210806023029, "grad_norm": 0.2714071571826935, "learning_rate": 1e-05, "loss": 0.9809, "step": 83300 }, { "epoch": 73.7865367581931, "grad_norm": 0.25025326013565063, "learning_rate": 1e-05, "loss": 0.9878, "step": 83305 }, { "epoch": 73.79096545615589, "grad_norm": 0.23740044236183167, "learning_rate": 1e-05, "loss": 0.9473, "step": 83310 }, { "epoch": 73.79539415411868, "grad_norm": 0.2720984220504761, "learning_rate": 1e-05, "loss": 0.9604, "step": 83315 }, { "epoch": 73.79982285208149, "grad_norm": 0.22173546254634857, "learning_rate": 1e-05, "loss": 0.9494, "step": 83320 }, { "epoch": 73.80425155004428, "grad_norm": 0.26164770126342773, "learning_rate": 1e-05, "loss": 0.9546, "step": 83325 }, { "epoch": 73.8086802480071, "grad_norm": 0.23208299279212952, "learning_rate": 1e-05, "loss": 0.9444, "step": 83330 }, { "epoch": 73.81310894596989, "grad_norm": 0.2349410206079483, "learning_rate": 1e-05, "loss": 0.9836, "step": 83335 }, { "epoch": 73.81753764393268, "grad_norm": 0.26026564836502075, "learning_rate": 1e-05, "loss": 0.9221, "step": 83340 }, { "epoch": 73.82196634189549, "grad_norm": 0.24884694814682007, "learning_rate": 1e-05, "loss": 0.9488, "step": 83345 }, { "epoch": 73.82639503985828, "grad_norm": 0.2892662584781647, "learning_rate": 1e-05, "loss": 0.9267, "step": 83350 }, { "epoch": 73.83082373782108, "grad_norm": 0.26073071360588074, "learning_rate": 1e-05, "loss": 0.9322, "step": 83355 }, { "epoch": 73.83525243578389, "grad_norm": 0.27598854899406433, "learning_rate": 1e-05, "loss": 0.9691, "step": 83360 }, { "epoch": 73.83968113374668, "grad_norm": 0.22965501248836517, "learning_rate": 1e-05, "loss": 0.8721, "step": 83365 }, { "epoch": 73.84410983170947, "grad_norm": 0.21010957658290863, "learning_rate": 1e-05, "loss": 0.9415, "step": 83370 }, { "epoch": 73.84853852967228, "grad_norm": 0.22666208446025848, "learning_rate": 1e-05, "loss": 0.9281, "step": 83375 }, { "epoch": 73.85296722763508, "grad_norm": 0.2133139967918396, "learning_rate": 1e-05, "loss": 0.9314, "step": 83380 }, { "epoch": 73.85739592559787, "grad_norm": 0.21636372804641724, "learning_rate": 1e-05, "loss": 0.9505, "step": 83385 }, { "epoch": 73.86182462356068, "grad_norm": 0.23691928386688232, "learning_rate": 1e-05, "loss": 0.9627, "step": 83390 }, { "epoch": 73.86625332152347, "grad_norm": 0.2837972939014435, "learning_rate": 1e-05, "loss": 0.967, "step": 83395 }, { "epoch": 73.87068201948627, "grad_norm": 0.24087807536125183, "learning_rate": 1e-05, "loss": 0.9467, "step": 83400 }, { "epoch": 73.87511071744908, "grad_norm": 0.2588447630405426, "learning_rate": 1e-05, "loss": 0.9179, "step": 83405 }, { "epoch": 73.87953941541187, "grad_norm": 0.2112160474061966, "learning_rate": 1e-05, "loss": 0.9564, "step": 83410 }, { "epoch": 73.88396811337466, "grad_norm": 0.2502126395702362, "learning_rate": 1e-05, "loss": 0.9367, "step": 83415 }, { "epoch": 73.88839681133747, "grad_norm": 0.250557005405426, "learning_rate": 1e-05, "loss": 0.8922, "step": 83420 }, { "epoch": 73.89282550930027, "grad_norm": 0.23844382166862488, "learning_rate": 1e-05, "loss": 0.9763, "step": 83425 }, { "epoch": 73.89725420726306, "grad_norm": 0.24517607688903809, "learning_rate": 1e-05, "loss": 0.9527, "step": 83430 }, { "epoch": 73.90168290522587, "grad_norm": 0.2454157918691635, "learning_rate": 1e-05, "loss": 0.9938, "step": 83435 }, { "epoch": 73.90611160318866, "grad_norm": 0.20270434021949768, "learning_rate": 1e-05, "loss": 0.9296, "step": 83440 }, { "epoch": 73.91054030115146, "grad_norm": 0.23969697952270508, "learning_rate": 1e-05, "loss": 0.93, "step": 83445 }, { "epoch": 73.91496899911427, "grad_norm": 0.2249893993139267, "learning_rate": 1e-05, "loss": 0.9334, "step": 83450 }, { "epoch": 73.91939769707706, "grad_norm": 0.24709507822990417, "learning_rate": 1e-05, "loss": 0.9471, "step": 83455 }, { "epoch": 73.92382639503985, "grad_norm": 0.22869141399860382, "learning_rate": 1e-05, "loss": 0.9762, "step": 83460 }, { "epoch": 73.92825509300266, "grad_norm": 0.2149641215801239, "learning_rate": 1e-05, "loss": 0.9998, "step": 83465 }, { "epoch": 73.93268379096546, "grad_norm": 0.2631613314151764, "learning_rate": 1e-05, "loss": 0.9351, "step": 83470 }, { "epoch": 73.93711248892825, "grad_norm": 0.21642833948135376, "learning_rate": 1e-05, "loss": 0.9894, "step": 83475 }, { "epoch": 73.94154118689106, "grad_norm": 0.21103224158287048, "learning_rate": 1e-05, "loss": 0.9406, "step": 83480 }, { "epoch": 73.94596988485385, "grad_norm": 0.20202122628688812, "learning_rate": 1e-05, "loss": 1.0347, "step": 83485 }, { "epoch": 73.95039858281665, "grad_norm": 0.23157206177711487, "learning_rate": 1e-05, "loss": 0.9501, "step": 83490 }, { "epoch": 73.95482728077945, "grad_norm": 0.23936142027378082, "learning_rate": 1e-05, "loss": 0.9487, "step": 83495 }, { "epoch": 73.95925597874225, "grad_norm": 0.19143910706043243, "learning_rate": 1e-05, "loss": 0.8995, "step": 83500 }, { "epoch": 73.96368467670504, "grad_norm": 0.297951877117157, "learning_rate": 1e-05, "loss": 0.9598, "step": 83505 }, { "epoch": 73.96811337466785, "grad_norm": 0.26755356788635254, "learning_rate": 1e-05, "loss": 1.0491, "step": 83510 }, { "epoch": 73.97254207263065, "grad_norm": 0.24796029925346375, "learning_rate": 1e-05, "loss": 0.9749, "step": 83515 }, { "epoch": 73.97697077059344, "grad_norm": 0.28519660234451294, "learning_rate": 1e-05, "loss": 0.9911, "step": 83520 }, { "epoch": 73.98139946855625, "grad_norm": 0.24823503196239471, "learning_rate": 1e-05, "loss": 0.9727, "step": 83525 }, { "epoch": 73.98582816651904, "grad_norm": 0.24633878469467163, "learning_rate": 1e-05, "loss": 0.9755, "step": 83530 }, { "epoch": 73.99025686448184, "grad_norm": 0.22910018265247345, "learning_rate": 1e-05, "loss": 1.0049, "step": 83535 }, { "epoch": 73.99468556244464, "grad_norm": 0.26459747552871704, "learning_rate": 1e-05, "loss": 0.9209, "step": 83540 }, { "epoch": 73.99911426040744, "grad_norm": 0.25838255882263184, "learning_rate": 1e-05, "loss": 0.9713, "step": 83545 }, { "epoch": 74.00354295837023, "grad_norm": 0.21516035497188568, "learning_rate": 1e-05, "loss": 0.9606, "step": 83550 }, { "epoch": 74.00797165633304, "grad_norm": 0.2380024641752243, "learning_rate": 1e-05, "loss": 0.9583, "step": 83555 }, { "epoch": 74.01240035429583, "grad_norm": 0.25423529744148254, "learning_rate": 1e-05, "loss": 0.9464, "step": 83560 }, { "epoch": 74.01682905225863, "grad_norm": 0.18841908872127533, "learning_rate": 1e-05, "loss": 0.9558, "step": 83565 }, { "epoch": 74.02125775022144, "grad_norm": 0.22169649600982666, "learning_rate": 1e-05, "loss": 0.9468, "step": 83570 }, { "epoch": 74.02568644818423, "grad_norm": 0.24355438351631165, "learning_rate": 1e-05, "loss": 0.9544, "step": 83575 }, { "epoch": 74.03011514614704, "grad_norm": 0.2513810694217682, "learning_rate": 1e-05, "loss": 0.9484, "step": 83580 }, { "epoch": 74.03454384410983, "grad_norm": 0.2404751479625702, "learning_rate": 1e-05, "loss": 0.919, "step": 83585 }, { "epoch": 74.03897254207263, "grad_norm": 0.21027123928070068, "learning_rate": 1e-05, "loss": 0.9794, "step": 83590 }, { "epoch": 74.04340124003544, "grad_norm": 0.23689305782318115, "learning_rate": 1e-05, "loss": 0.9566, "step": 83595 }, { "epoch": 74.04782993799823, "grad_norm": 0.2254077047109604, "learning_rate": 1e-05, "loss": 1.013, "step": 83600 }, { "epoch": 74.05225863596102, "grad_norm": 0.22020323574543, "learning_rate": 1e-05, "loss": 1.0195, "step": 83605 }, { "epoch": 74.05668733392383, "grad_norm": 0.24068717658519745, "learning_rate": 1e-05, "loss": 1.0202, "step": 83610 }, { "epoch": 74.06111603188663, "grad_norm": 0.25655418634414673, "learning_rate": 1e-05, "loss": 0.985, "step": 83615 }, { "epoch": 74.06554472984942, "grad_norm": 0.23864029347896576, "learning_rate": 1e-05, "loss": 0.9607, "step": 83620 }, { "epoch": 74.06997342781223, "grad_norm": 0.2524789869785309, "learning_rate": 1e-05, "loss": 0.9314, "step": 83625 }, { "epoch": 74.07440212577502, "grad_norm": 0.244278222322464, "learning_rate": 1e-05, "loss": 0.9567, "step": 83630 }, { "epoch": 74.07883082373782, "grad_norm": 0.2437145859003067, "learning_rate": 1e-05, "loss": 0.9958, "step": 83635 }, { "epoch": 74.08325952170063, "grad_norm": 0.2703615427017212, "learning_rate": 1e-05, "loss": 0.9744, "step": 83640 }, { "epoch": 74.08768821966342, "grad_norm": 0.22829675674438477, "learning_rate": 1e-05, "loss": 0.9326, "step": 83645 }, { "epoch": 74.09211691762621, "grad_norm": 0.276937872171402, "learning_rate": 1e-05, "loss": 0.9944, "step": 83650 }, { "epoch": 74.09654561558902, "grad_norm": 0.2446051687002182, "learning_rate": 1e-05, "loss": 0.9622, "step": 83655 }, { "epoch": 74.10097431355182, "grad_norm": 0.26338323950767517, "learning_rate": 1e-05, "loss": 0.9402, "step": 83660 }, { "epoch": 74.10540301151461, "grad_norm": 0.24262087047100067, "learning_rate": 1e-05, "loss": 0.9346, "step": 83665 }, { "epoch": 74.10983170947742, "grad_norm": 0.2427598237991333, "learning_rate": 1e-05, "loss": 0.9548, "step": 83670 }, { "epoch": 74.11426040744021, "grad_norm": 0.19425921142101288, "learning_rate": 1e-05, "loss": 1.017, "step": 83675 }, { "epoch": 74.118689105403, "grad_norm": 0.23510712385177612, "learning_rate": 1e-05, "loss": 0.9967, "step": 83680 }, { "epoch": 74.12311780336582, "grad_norm": 0.27288198471069336, "learning_rate": 1e-05, "loss": 0.9754, "step": 83685 }, { "epoch": 74.12754650132861, "grad_norm": 0.2708515524864197, "learning_rate": 1e-05, "loss": 0.9448, "step": 83690 }, { "epoch": 74.1319751992914, "grad_norm": 0.22248634696006775, "learning_rate": 1e-05, "loss": 0.9831, "step": 83695 }, { "epoch": 74.13640389725421, "grad_norm": 0.26697608828544617, "learning_rate": 1e-05, "loss": 0.9659, "step": 83700 }, { "epoch": 74.140832595217, "grad_norm": 0.2455085813999176, "learning_rate": 1e-05, "loss": 0.9443, "step": 83705 }, { "epoch": 74.1452612931798, "grad_norm": 0.23500816524028778, "learning_rate": 1e-05, "loss": 1.012, "step": 83710 }, { "epoch": 74.14968999114261, "grad_norm": 0.2902398109436035, "learning_rate": 1e-05, "loss": 0.9754, "step": 83715 }, { "epoch": 74.1541186891054, "grad_norm": 0.23216289281845093, "learning_rate": 1e-05, "loss": 0.9244, "step": 83720 }, { "epoch": 74.1585473870682, "grad_norm": 0.24378244578838348, "learning_rate": 1e-05, "loss": 0.9993, "step": 83725 }, { "epoch": 74.162976085031, "grad_norm": 0.2358853667974472, "learning_rate": 1e-05, "loss": 0.9707, "step": 83730 }, { "epoch": 74.1674047829938, "grad_norm": 0.2271830439567566, "learning_rate": 1e-05, "loss": 0.9116, "step": 83735 }, { "epoch": 74.1718334809566, "grad_norm": 0.21153266727924347, "learning_rate": 1e-05, "loss": 0.9009, "step": 83740 }, { "epoch": 74.1762621789194, "grad_norm": 0.24842631816864014, "learning_rate": 1e-05, "loss": 1.0011, "step": 83745 }, { "epoch": 74.1806908768822, "grad_norm": 0.2902752757072449, "learning_rate": 1e-05, "loss": 0.9958, "step": 83750 }, { "epoch": 74.18511957484499, "grad_norm": 0.23537321388721466, "learning_rate": 1e-05, "loss": 0.9571, "step": 83755 }, { "epoch": 74.1895482728078, "grad_norm": 0.25719574093818665, "learning_rate": 1e-05, "loss": 1.0079, "step": 83760 }, { "epoch": 74.19397697077059, "grad_norm": 0.229341059923172, "learning_rate": 1e-05, "loss": 0.9785, "step": 83765 }, { "epoch": 74.19840566873339, "grad_norm": 0.253781795501709, "learning_rate": 1e-05, "loss": 0.9892, "step": 83770 }, { "epoch": 74.2028343666962, "grad_norm": 0.22993256151676178, "learning_rate": 1e-05, "loss": 0.9799, "step": 83775 }, { "epoch": 74.20726306465899, "grad_norm": 0.22871562838554382, "learning_rate": 1e-05, "loss": 1.0125, "step": 83780 }, { "epoch": 74.21169176262178, "grad_norm": 0.24034012854099274, "learning_rate": 1e-05, "loss": 0.9598, "step": 83785 }, { "epoch": 74.21612046058459, "grad_norm": 0.23713849484920502, "learning_rate": 1e-05, "loss": 0.995, "step": 83790 }, { "epoch": 74.22054915854739, "grad_norm": 0.24475929141044617, "learning_rate": 1e-05, "loss": 0.9534, "step": 83795 }, { "epoch": 74.22497785651018, "grad_norm": 0.2614419758319855, "learning_rate": 1e-05, "loss": 0.9806, "step": 83800 }, { "epoch": 74.22940655447299, "grad_norm": 0.2253301590681076, "learning_rate": 1e-05, "loss": 1.0132, "step": 83805 }, { "epoch": 74.23383525243578, "grad_norm": 0.2708972990512848, "learning_rate": 1e-05, "loss": 0.9138, "step": 83810 }, { "epoch": 74.23826395039858, "grad_norm": 0.24148797988891602, "learning_rate": 1e-05, "loss": 0.9755, "step": 83815 }, { "epoch": 74.24269264836138, "grad_norm": 0.2569144070148468, "learning_rate": 1e-05, "loss": 0.9919, "step": 83820 }, { "epoch": 74.24712134632418, "grad_norm": 0.2280445098876953, "learning_rate": 1e-05, "loss": 0.9875, "step": 83825 }, { "epoch": 74.25155004428699, "grad_norm": 0.2669070363044739, "learning_rate": 1e-05, "loss": 0.9607, "step": 83830 }, { "epoch": 74.25597874224978, "grad_norm": 0.21201454102993011, "learning_rate": 1e-05, "loss": 0.9236, "step": 83835 }, { "epoch": 74.26040744021257, "grad_norm": 0.27060744166374207, "learning_rate": 1e-05, "loss": 0.9799, "step": 83840 }, { "epoch": 74.26483613817538, "grad_norm": 0.2669195234775543, "learning_rate": 1e-05, "loss": 0.9696, "step": 83845 }, { "epoch": 74.26926483613818, "grad_norm": 0.30005455017089844, "learning_rate": 1e-05, "loss": 0.9395, "step": 83850 }, { "epoch": 74.27369353410097, "grad_norm": 0.2485140711069107, "learning_rate": 1e-05, "loss": 0.9233, "step": 83855 }, { "epoch": 74.27812223206378, "grad_norm": 0.23749056458473206, "learning_rate": 1e-05, "loss": 0.9686, "step": 83860 }, { "epoch": 74.28255093002657, "grad_norm": 0.29755693674087524, "learning_rate": 1e-05, "loss": 1.0007, "step": 83865 }, { "epoch": 74.28697962798937, "grad_norm": 0.2690080404281616, "learning_rate": 1e-05, "loss": 0.9837, "step": 83870 }, { "epoch": 74.29140832595218, "grad_norm": 0.26526302099227905, "learning_rate": 1e-05, "loss": 0.943, "step": 83875 }, { "epoch": 74.29583702391497, "grad_norm": 0.22737567126750946, "learning_rate": 1e-05, "loss": 0.9871, "step": 83880 }, { "epoch": 74.30026572187776, "grad_norm": 0.20818877220153809, "learning_rate": 1e-05, "loss": 0.9813, "step": 83885 }, { "epoch": 74.30469441984057, "grad_norm": 0.23308666050434113, "learning_rate": 1e-05, "loss": 0.9743, "step": 83890 }, { "epoch": 74.30912311780337, "grad_norm": 0.2413572072982788, "learning_rate": 1e-05, "loss": 0.9467, "step": 83895 }, { "epoch": 74.31355181576616, "grad_norm": 0.23688402771949768, "learning_rate": 1e-05, "loss": 1.0045, "step": 83900 }, { "epoch": 74.31798051372897, "grad_norm": 0.24354438483715057, "learning_rate": 1e-05, "loss": 0.8995, "step": 83905 }, { "epoch": 74.32240921169176, "grad_norm": 0.22352060675621033, "learning_rate": 1e-05, "loss": 0.9157, "step": 83910 }, { "epoch": 74.32683790965456, "grad_norm": 0.247068390250206, "learning_rate": 1e-05, "loss": 0.9722, "step": 83915 }, { "epoch": 74.33126660761737, "grad_norm": 0.23777520656585693, "learning_rate": 1e-05, "loss": 0.9331, "step": 83920 }, { "epoch": 74.33569530558016, "grad_norm": 0.2843903601169586, "learning_rate": 1e-05, "loss": 0.9461, "step": 83925 }, { "epoch": 74.34012400354295, "grad_norm": 0.2221992164850235, "learning_rate": 1e-05, "loss": 0.9246, "step": 83930 }, { "epoch": 74.34455270150576, "grad_norm": 0.22756318747997284, "learning_rate": 1e-05, "loss": 0.9815, "step": 83935 }, { "epoch": 74.34898139946856, "grad_norm": 0.2188238501548767, "learning_rate": 1e-05, "loss": 1.0211, "step": 83940 }, { "epoch": 74.35341009743135, "grad_norm": 0.23725594580173492, "learning_rate": 1e-05, "loss": 0.9378, "step": 83945 }, { "epoch": 74.35783879539416, "grad_norm": 0.23624736070632935, "learning_rate": 1e-05, "loss": 0.9393, "step": 83950 }, { "epoch": 74.36226749335695, "grad_norm": 0.22528259456157684, "learning_rate": 1e-05, "loss": 0.9492, "step": 83955 }, { "epoch": 74.36669619131975, "grad_norm": 0.21942612528800964, "learning_rate": 1e-05, "loss": 0.9975, "step": 83960 }, { "epoch": 74.37112488928256, "grad_norm": 0.24189907312393188, "learning_rate": 1e-05, "loss": 0.967, "step": 83965 }, { "epoch": 74.37555358724535, "grad_norm": 0.2378605455160141, "learning_rate": 1e-05, "loss": 0.9537, "step": 83970 }, { "epoch": 74.37998228520814, "grad_norm": 0.23476238548755646, "learning_rate": 1e-05, "loss": 0.9583, "step": 83975 }, { "epoch": 74.38441098317095, "grad_norm": 0.25473737716674805, "learning_rate": 1e-05, "loss": 0.9361, "step": 83980 }, { "epoch": 74.38883968113375, "grad_norm": 0.27539709210395813, "learning_rate": 1e-05, "loss": 0.9889, "step": 83985 }, { "epoch": 74.39326837909654, "grad_norm": 0.21888090670108795, "learning_rate": 1e-05, "loss": 0.9162, "step": 83990 }, { "epoch": 74.39769707705935, "grad_norm": 0.28328707814216614, "learning_rate": 1e-05, "loss": 0.9478, "step": 83995 }, { "epoch": 74.40212577502214, "grad_norm": 0.2491467148065567, "learning_rate": 1e-05, "loss": 0.9409, "step": 84000 }, { "epoch": 74.40655447298494, "grad_norm": 0.24903646111488342, "learning_rate": 1e-05, "loss": 0.9533, "step": 84005 }, { "epoch": 74.41098317094774, "grad_norm": 0.2287069708108902, "learning_rate": 1e-05, "loss": 0.9343, "step": 84010 }, { "epoch": 74.41541186891054, "grad_norm": 0.24306733906269073, "learning_rate": 1e-05, "loss": 1.001, "step": 84015 }, { "epoch": 74.41984056687333, "grad_norm": 0.22020022571086884, "learning_rate": 1e-05, "loss": 0.9701, "step": 84020 }, { "epoch": 74.42426926483614, "grad_norm": 0.2536507546901703, "learning_rate": 1e-05, "loss": 0.9757, "step": 84025 }, { "epoch": 74.42869796279894, "grad_norm": 0.20949707925319672, "learning_rate": 1e-05, "loss": 0.9701, "step": 84030 }, { "epoch": 74.43312666076173, "grad_norm": 0.2516788840293884, "learning_rate": 1e-05, "loss": 0.9371, "step": 84035 }, { "epoch": 74.43755535872454, "grad_norm": 0.25324663519859314, "learning_rate": 1e-05, "loss": 0.9946, "step": 84040 }, { "epoch": 74.44198405668733, "grad_norm": 0.22406317293643951, "learning_rate": 1e-05, "loss": 0.9343, "step": 84045 }, { "epoch": 74.44641275465013, "grad_norm": 0.25456079840660095, "learning_rate": 1e-05, "loss": 0.9643, "step": 84050 }, { "epoch": 74.45084145261293, "grad_norm": 0.23706777393817902, "learning_rate": 1e-05, "loss": 1.0177, "step": 84055 }, { "epoch": 74.45527015057573, "grad_norm": 0.2422616183757782, "learning_rate": 1e-05, "loss": 0.9386, "step": 84060 }, { "epoch": 74.45969884853854, "grad_norm": 0.2437332719564438, "learning_rate": 1e-05, "loss": 0.9004, "step": 84065 }, { "epoch": 74.46412754650133, "grad_norm": 0.25202903151512146, "learning_rate": 1e-05, "loss": 0.9634, "step": 84070 }, { "epoch": 74.46855624446412, "grad_norm": 0.25213393568992615, "learning_rate": 1e-05, "loss": 1.021, "step": 84075 }, { "epoch": 74.47298494242693, "grad_norm": 0.2552889585494995, "learning_rate": 1e-05, "loss": 1.0029, "step": 84080 }, { "epoch": 74.47741364038973, "grad_norm": 0.23778022825717926, "learning_rate": 1e-05, "loss": 0.9454, "step": 84085 }, { "epoch": 74.48184233835252, "grad_norm": 0.22344079613685608, "learning_rate": 1e-05, "loss": 1.0069, "step": 84090 }, { "epoch": 74.48627103631533, "grad_norm": 0.25441935658454895, "learning_rate": 1e-05, "loss": 0.9918, "step": 84095 }, { "epoch": 74.49069973427812, "grad_norm": 0.2612452208995819, "learning_rate": 1e-05, "loss": 0.9318, "step": 84100 }, { "epoch": 74.49512843224092, "grad_norm": 0.2253248542547226, "learning_rate": 1e-05, "loss": 0.9681, "step": 84105 }, { "epoch": 74.49955713020373, "grad_norm": 0.24539245665073395, "learning_rate": 1e-05, "loss": 1.0394, "step": 84110 }, { "epoch": 74.50398582816652, "grad_norm": 0.23280608654022217, "learning_rate": 1e-05, "loss": 0.9826, "step": 84115 }, { "epoch": 74.50841452612931, "grad_norm": 0.22873243689537048, "learning_rate": 1e-05, "loss": 0.9985, "step": 84120 }, { "epoch": 74.51284322409212, "grad_norm": 0.2524687647819519, "learning_rate": 1e-05, "loss": 0.9708, "step": 84125 }, { "epoch": 74.51727192205492, "grad_norm": 0.22405165433883667, "learning_rate": 1e-05, "loss": 0.9484, "step": 84130 }, { "epoch": 74.52170062001771, "grad_norm": 0.24014557898044586, "learning_rate": 1e-05, "loss": 0.9568, "step": 84135 }, { "epoch": 74.52612931798052, "grad_norm": 0.2226085066795349, "learning_rate": 1e-05, "loss": 1.0028, "step": 84140 }, { "epoch": 74.53055801594331, "grad_norm": 0.25360554456710815, "learning_rate": 1e-05, "loss": 1.0151, "step": 84145 }, { "epoch": 74.53498671390611, "grad_norm": 0.2232673466205597, "learning_rate": 1e-05, "loss": 1.0021, "step": 84150 }, { "epoch": 74.53941541186892, "grad_norm": 0.28131598234176636, "learning_rate": 1e-05, "loss": 0.9546, "step": 84155 }, { "epoch": 74.54384410983171, "grad_norm": 0.23310501873493195, "learning_rate": 1e-05, "loss": 0.9829, "step": 84160 }, { "epoch": 74.5482728077945, "grad_norm": 0.21536749601364136, "learning_rate": 1e-05, "loss": 0.9947, "step": 84165 }, { "epoch": 74.55270150575731, "grad_norm": 0.2521098554134369, "learning_rate": 1e-05, "loss": 0.9741, "step": 84170 }, { "epoch": 74.5571302037201, "grad_norm": 0.26201021671295166, "learning_rate": 1e-05, "loss": 1.0025, "step": 84175 }, { "epoch": 74.5615589016829, "grad_norm": 0.23942938446998596, "learning_rate": 1e-05, "loss": 0.9714, "step": 84180 }, { "epoch": 74.56598759964571, "grad_norm": 0.23541148006916046, "learning_rate": 1e-05, "loss": 0.9541, "step": 84185 }, { "epoch": 74.5704162976085, "grad_norm": 0.240321084856987, "learning_rate": 1e-05, "loss": 0.9689, "step": 84190 }, { "epoch": 74.5748449955713, "grad_norm": 0.2605343759059906, "learning_rate": 1e-05, "loss": 0.9441, "step": 84195 }, { "epoch": 74.5792736935341, "grad_norm": 0.28967800736427307, "learning_rate": 1e-05, "loss": 0.9791, "step": 84200 }, { "epoch": 74.5837023914969, "grad_norm": 0.25075262784957886, "learning_rate": 1e-05, "loss": 0.9841, "step": 84205 }, { "epoch": 74.5881310894597, "grad_norm": 0.2145904004573822, "learning_rate": 1e-05, "loss": 0.9706, "step": 84210 }, { "epoch": 74.5925597874225, "grad_norm": 0.221253901720047, "learning_rate": 1e-05, "loss": 0.9561, "step": 84215 }, { "epoch": 74.5969884853853, "grad_norm": 0.23714208602905273, "learning_rate": 1e-05, "loss": 0.9439, "step": 84220 }, { "epoch": 74.60141718334809, "grad_norm": 0.24737609922885895, "learning_rate": 1e-05, "loss": 0.9452, "step": 84225 }, { "epoch": 74.6058458813109, "grad_norm": 0.1893225461244583, "learning_rate": 1e-05, "loss": 0.9826, "step": 84230 }, { "epoch": 74.61027457927369, "grad_norm": 0.27101677656173706, "learning_rate": 1e-05, "loss": 0.9584, "step": 84235 }, { "epoch": 74.61470327723649, "grad_norm": 0.26572537422180176, "learning_rate": 1e-05, "loss": 0.9438, "step": 84240 }, { "epoch": 74.6191319751993, "grad_norm": 0.2822604179382324, "learning_rate": 1e-05, "loss": 0.9421, "step": 84245 }, { "epoch": 74.62356067316209, "grad_norm": 0.2608496844768524, "learning_rate": 1e-05, "loss": 0.9476, "step": 84250 }, { "epoch": 74.62798937112488, "grad_norm": 0.24638453125953674, "learning_rate": 1e-05, "loss": 0.9337, "step": 84255 }, { "epoch": 74.63241806908769, "grad_norm": 0.2655778229236603, "learning_rate": 1e-05, "loss": 0.9738, "step": 84260 }, { "epoch": 74.63684676705049, "grad_norm": 0.2132103443145752, "learning_rate": 1e-05, "loss": 0.9175, "step": 84265 }, { "epoch": 74.64127546501328, "grad_norm": 0.27513816952705383, "learning_rate": 1e-05, "loss": 0.9526, "step": 84270 }, { "epoch": 74.64570416297609, "grad_norm": 0.23372705280780792, "learning_rate": 1e-05, "loss": 0.9898, "step": 84275 }, { "epoch": 74.65013286093888, "grad_norm": 0.2767297029495239, "learning_rate": 1e-05, "loss": 0.9314, "step": 84280 }, { "epoch": 74.65456155890168, "grad_norm": 0.2111191302537918, "learning_rate": 1e-05, "loss": 0.9651, "step": 84285 }, { "epoch": 74.65899025686448, "grad_norm": 0.21248102188110352, "learning_rate": 1e-05, "loss": 0.9564, "step": 84290 }, { "epoch": 74.66341895482728, "grad_norm": 0.21242599189281464, "learning_rate": 1e-05, "loss": 0.9413, "step": 84295 }, { "epoch": 74.66784765279007, "grad_norm": 0.28285810351371765, "learning_rate": 1e-05, "loss": 0.9275, "step": 84300 }, { "epoch": 74.67227635075288, "grad_norm": 0.22369693219661713, "learning_rate": 1e-05, "loss": 0.9337, "step": 84305 }, { "epoch": 74.67670504871568, "grad_norm": 0.218536838889122, "learning_rate": 1e-05, "loss": 0.9904, "step": 84310 }, { "epoch": 74.68113374667848, "grad_norm": 0.23357917368412018, "learning_rate": 1e-05, "loss": 0.9379, "step": 84315 }, { "epoch": 74.68556244464128, "grad_norm": 0.22577446699142456, "learning_rate": 1e-05, "loss": 0.9783, "step": 84320 }, { "epoch": 74.68999114260407, "grad_norm": 0.3132842481136322, "learning_rate": 1e-05, "loss": 0.9601, "step": 84325 }, { "epoch": 74.69441984056688, "grad_norm": 0.23717401921749115, "learning_rate": 1e-05, "loss": 0.9253, "step": 84330 }, { "epoch": 74.69884853852967, "grad_norm": 0.26239141821861267, "learning_rate": 1e-05, "loss": 0.951, "step": 84335 }, { "epoch": 74.70327723649247, "grad_norm": 0.2510971128940582, "learning_rate": 1e-05, "loss": 0.9768, "step": 84340 }, { "epoch": 74.70770593445528, "grad_norm": 0.24655738472938538, "learning_rate": 1e-05, "loss": 0.9721, "step": 84345 }, { "epoch": 74.71213463241807, "grad_norm": 0.2461847960948944, "learning_rate": 1e-05, "loss": 0.95, "step": 84350 }, { "epoch": 74.71656333038086, "grad_norm": 0.22440913319587708, "learning_rate": 1e-05, "loss": 0.9678, "step": 84355 }, { "epoch": 74.72099202834367, "grad_norm": 0.22830170392990112, "learning_rate": 1e-05, "loss": 1.0138, "step": 84360 }, { "epoch": 74.72542072630647, "grad_norm": 0.2553552985191345, "learning_rate": 1e-05, "loss": 1.0061, "step": 84365 }, { "epoch": 74.72984942426926, "grad_norm": 0.21517328917980194, "learning_rate": 1e-05, "loss": 0.9323, "step": 84370 }, { "epoch": 74.73427812223207, "grad_norm": 0.238071009516716, "learning_rate": 1e-05, "loss": 0.9687, "step": 84375 }, { "epoch": 74.73870682019486, "grad_norm": 0.21605949103832245, "learning_rate": 1e-05, "loss": 0.9891, "step": 84380 }, { "epoch": 74.74313551815766, "grad_norm": 0.22177082300186157, "learning_rate": 1e-05, "loss": 1.0396, "step": 84385 }, { "epoch": 74.74756421612047, "grad_norm": 0.22120670974254608, "learning_rate": 1e-05, "loss": 0.9887, "step": 84390 }, { "epoch": 74.75199291408326, "grad_norm": 0.23397816717624664, "learning_rate": 1e-05, "loss": 0.9904, "step": 84395 }, { "epoch": 74.75642161204605, "grad_norm": 0.24542438983917236, "learning_rate": 1e-05, "loss": 0.9525, "step": 84400 }, { "epoch": 74.76085031000886, "grad_norm": 0.21681912243366241, "learning_rate": 1e-05, "loss": 0.9752, "step": 84405 }, { "epoch": 74.76527900797166, "grad_norm": 0.24025574326515198, "learning_rate": 1e-05, "loss": 0.921, "step": 84410 }, { "epoch": 74.76970770593445, "grad_norm": 0.23549969494342804, "learning_rate": 1e-05, "loss": 0.9257, "step": 84415 }, { "epoch": 74.77413640389726, "grad_norm": 0.26955610513687134, "learning_rate": 1e-05, "loss": 0.9737, "step": 84420 }, { "epoch": 74.77856510186005, "grad_norm": 0.2821662724018097, "learning_rate": 1e-05, "loss": 0.9445, "step": 84425 }, { "epoch": 74.78299379982285, "grad_norm": 0.2508194148540497, "learning_rate": 1e-05, "loss": 0.934, "step": 84430 }, { "epoch": 74.78742249778566, "grad_norm": 0.2517653703689575, "learning_rate": 1e-05, "loss": 0.8962, "step": 84435 }, { "epoch": 74.79185119574845, "grad_norm": 0.23977205157279968, "learning_rate": 1e-05, "loss": 0.9849, "step": 84440 }, { "epoch": 74.79627989371124, "grad_norm": 0.2645267844200134, "learning_rate": 1e-05, "loss": 0.9856, "step": 84445 }, { "epoch": 74.80070859167405, "grad_norm": 0.21609662473201752, "learning_rate": 1e-05, "loss": 0.9706, "step": 84450 }, { "epoch": 74.80513728963685, "grad_norm": 0.25307223200798035, "learning_rate": 1e-05, "loss": 0.8657, "step": 84455 }, { "epoch": 74.80956598759964, "grad_norm": 0.27134793996810913, "learning_rate": 1e-05, "loss": 0.9795, "step": 84460 }, { "epoch": 74.81399468556245, "grad_norm": 0.24788208305835724, "learning_rate": 1e-05, "loss": 0.937, "step": 84465 }, { "epoch": 74.81842338352524, "grad_norm": 0.22885899245738983, "learning_rate": 1e-05, "loss": 0.9835, "step": 84470 }, { "epoch": 74.82285208148804, "grad_norm": 0.23829950392246246, "learning_rate": 1e-05, "loss": 1.0304, "step": 84475 }, { "epoch": 74.82728077945085, "grad_norm": 0.2025105059146881, "learning_rate": 1e-05, "loss": 0.9439, "step": 84480 }, { "epoch": 74.83170947741364, "grad_norm": 0.25875964760780334, "learning_rate": 1e-05, "loss": 0.9609, "step": 84485 }, { "epoch": 74.83613817537643, "grad_norm": 0.23047824203968048, "learning_rate": 1e-05, "loss": 0.9449, "step": 84490 }, { "epoch": 74.84056687333924, "grad_norm": 0.1988583207130432, "learning_rate": 1e-05, "loss": 1.0049, "step": 84495 }, { "epoch": 74.84499557130204, "grad_norm": 0.21686246991157532, "learning_rate": 1e-05, "loss": 0.9721, "step": 84500 }, { "epoch": 74.84942426926483, "grad_norm": 0.24168746173381805, "learning_rate": 1e-05, "loss": 0.9551, "step": 84505 }, { "epoch": 74.85385296722764, "grad_norm": 0.21927835047245026, "learning_rate": 1e-05, "loss": 0.9552, "step": 84510 }, { "epoch": 74.85828166519043, "grad_norm": 0.25283053517341614, "learning_rate": 1e-05, "loss": 0.9911, "step": 84515 }, { "epoch": 74.86271036315323, "grad_norm": 0.2474817931652069, "learning_rate": 1e-05, "loss": 0.921, "step": 84520 }, { "epoch": 74.86713906111603, "grad_norm": 0.2392907589673996, "learning_rate": 1e-05, "loss": 0.9291, "step": 84525 }, { "epoch": 74.87156775907883, "grad_norm": 0.2124769389629364, "learning_rate": 1e-05, "loss": 0.9632, "step": 84530 }, { "epoch": 74.87599645704162, "grad_norm": 0.24759037792682648, "learning_rate": 1e-05, "loss": 0.9659, "step": 84535 }, { "epoch": 74.88042515500443, "grad_norm": 0.22522376477718353, "learning_rate": 1e-05, "loss": 0.9744, "step": 84540 }, { "epoch": 74.88485385296723, "grad_norm": 0.21356217563152313, "learning_rate": 1e-05, "loss": 0.9354, "step": 84545 }, { "epoch": 74.88928255093003, "grad_norm": 0.2679639756679535, "learning_rate": 1e-05, "loss": 0.9564, "step": 84550 }, { "epoch": 74.89371124889283, "grad_norm": 0.25876304507255554, "learning_rate": 1e-05, "loss": 0.952, "step": 84555 }, { "epoch": 74.89813994685562, "grad_norm": 0.22637389600276947, "learning_rate": 1e-05, "loss": 0.9523, "step": 84560 }, { "epoch": 74.90256864481843, "grad_norm": 0.21849526464939117, "learning_rate": 1e-05, "loss": 0.9307, "step": 84565 }, { "epoch": 74.90699734278122, "grad_norm": 0.25481897592544556, "learning_rate": 1e-05, "loss": 1.0102, "step": 84570 }, { "epoch": 74.91142604074402, "grad_norm": 0.23954732716083527, "learning_rate": 1e-05, "loss": 0.9257, "step": 84575 }, { "epoch": 74.91585473870683, "grad_norm": 0.2385462075471878, "learning_rate": 1e-05, "loss": 0.9717, "step": 84580 }, { "epoch": 74.92028343666962, "grad_norm": 0.24303081631660461, "learning_rate": 1e-05, "loss": 0.9578, "step": 84585 }, { "epoch": 74.92471213463241, "grad_norm": 0.24699783325195312, "learning_rate": 1e-05, "loss": 0.9586, "step": 84590 }, { "epoch": 74.92914083259522, "grad_norm": 0.21922394633293152, "learning_rate": 1e-05, "loss": 0.9075, "step": 84595 }, { "epoch": 74.93356953055802, "grad_norm": 0.22815263271331787, "learning_rate": 1e-05, "loss": 0.9328, "step": 84600 }, { "epoch": 74.93799822852081, "grad_norm": 0.23953737318515778, "learning_rate": 1e-05, "loss": 0.9235, "step": 84605 }, { "epoch": 74.94242692648362, "grad_norm": 0.23508131504058838, "learning_rate": 1e-05, "loss": 0.981, "step": 84610 }, { "epoch": 74.94685562444641, "grad_norm": 0.288730263710022, "learning_rate": 1e-05, "loss": 0.9705, "step": 84615 }, { "epoch": 74.95128432240921, "grad_norm": 0.2662127912044525, "learning_rate": 1e-05, "loss": 0.9333, "step": 84620 }, { "epoch": 74.95571302037202, "grad_norm": 0.3024565577507019, "learning_rate": 1e-05, "loss": 0.9873, "step": 84625 }, { "epoch": 74.96014171833481, "grad_norm": 0.21293513476848602, "learning_rate": 1e-05, "loss": 0.9043, "step": 84630 }, { "epoch": 74.9645704162976, "grad_norm": 0.1992969512939453, "learning_rate": 1e-05, "loss": 0.9374, "step": 84635 }, { "epoch": 74.96899911426041, "grad_norm": 0.24272489547729492, "learning_rate": 1e-05, "loss": 0.985, "step": 84640 }, { "epoch": 74.9734278122232, "grad_norm": 0.26822975277900696, "learning_rate": 1e-05, "loss": 0.9747, "step": 84645 }, { "epoch": 74.977856510186, "grad_norm": 0.23995016515254974, "learning_rate": 1e-05, "loss": 0.9634, "step": 84650 }, { "epoch": 74.98228520814881, "grad_norm": 0.2250189483165741, "learning_rate": 1e-05, "loss": 0.9169, "step": 84655 }, { "epoch": 74.9867139061116, "grad_norm": 0.22450236976146698, "learning_rate": 1e-05, "loss": 1.0108, "step": 84660 }, { "epoch": 74.9911426040744, "grad_norm": 0.2503156065940857, "learning_rate": 1e-05, "loss": 0.9183, "step": 84665 }, { "epoch": 74.9955713020372, "grad_norm": 0.2240157276391983, "learning_rate": 1e-05, "loss": 0.9824, "step": 84670 }, { "epoch": 75.0, "grad_norm": 0.22003693878650665, "learning_rate": 1e-05, "loss": 0.9726, "step": 84675 }, { "epoch": 75.0044286979628, "grad_norm": 0.21870656311511993, "learning_rate": 1e-05, "loss": 0.955, "step": 84680 }, { "epoch": 75.0088573959256, "grad_norm": 0.22483786940574646, "learning_rate": 1e-05, "loss": 0.9838, "step": 84685 }, { "epoch": 75.0132860938884, "grad_norm": 0.22884005308151245, "learning_rate": 1e-05, "loss": 0.9191, "step": 84690 }, { "epoch": 75.01771479185119, "grad_norm": 0.28699758648872375, "learning_rate": 1e-05, "loss": 0.9776, "step": 84695 }, { "epoch": 75.022143489814, "grad_norm": 0.25010424852371216, "learning_rate": 1e-05, "loss": 0.9365, "step": 84700 }, { "epoch": 75.0265721877768, "grad_norm": 0.22076232731342316, "learning_rate": 1e-05, "loss": 0.8877, "step": 84705 }, { "epoch": 75.03100088573959, "grad_norm": 0.22312040627002716, "learning_rate": 1e-05, "loss": 0.9472, "step": 84710 }, { "epoch": 75.0354295837024, "grad_norm": 0.2565702497959137, "learning_rate": 1e-05, "loss": 0.9937, "step": 84715 }, { "epoch": 75.03985828166519, "grad_norm": 0.21622341871261597, "learning_rate": 1e-05, "loss": 0.9864, "step": 84720 }, { "epoch": 75.04428697962798, "grad_norm": 0.23020702600479126, "learning_rate": 1e-05, "loss": 0.965, "step": 84725 }, { "epoch": 75.04871567759079, "grad_norm": 0.26327991485595703, "learning_rate": 1e-05, "loss": 0.9918, "step": 84730 }, { "epoch": 75.05314437555359, "grad_norm": 0.22826343774795532, "learning_rate": 1e-05, "loss": 1.001, "step": 84735 }, { "epoch": 75.05757307351638, "grad_norm": 0.20753420889377594, "learning_rate": 1e-05, "loss": 0.9557, "step": 84740 }, { "epoch": 75.06200177147919, "grad_norm": 0.2340545803308487, "learning_rate": 1e-05, "loss": 1.0017, "step": 84745 }, { "epoch": 75.06643046944198, "grad_norm": 0.2826903462409973, "learning_rate": 1e-05, "loss": 0.9152, "step": 84750 }, { "epoch": 75.07085916740478, "grad_norm": 0.25694406032562256, "learning_rate": 1e-05, "loss": 1.0487, "step": 84755 }, { "epoch": 75.07528786536759, "grad_norm": 0.23377755284309387, "learning_rate": 1e-05, "loss": 0.9225, "step": 84760 }, { "epoch": 75.07971656333038, "grad_norm": 0.22043955326080322, "learning_rate": 1e-05, "loss": 0.9836, "step": 84765 }, { "epoch": 75.08414526129317, "grad_norm": 0.23454740643501282, "learning_rate": 1e-05, "loss": 1.029, "step": 84770 }, { "epoch": 75.08857395925598, "grad_norm": 0.27510154247283936, "learning_rate": 1e-05, "loss": 0.985, "step": 84775 }, { "epoch": 75.09300265721878, "grad_norm": 0.23960064351558685, "learning_rate": 1e-05, "loss": 1.0173, "step": 84780 }, { "epoch": 75.09743135518157, "grad_norm": 0.24262239038944244, "learning_rate": 1e-05, "loss": 0.9574, "step": 84785 }, { "epoch": 75.10186005314438, "grad_norm": 0.26969027519226074, "learning_rate": 1e-05, "loss": 0.9944, "step": 84790 }, { "epoch": 75.10628875110717, "grad_norm": 0.24327705800533295, "learning_rate": 1e-05, "loss": 0.9546, "step": 84795 }, { "epoch": 75.11071744906998, "grad_norm": 0.22460506856441498, "learning_rate": 1e-05, "loss": 0.9773, "step": 84800 }, { "epoch": 75.11514614703277, "grad_norm": 0.2183789163827896, "learning_rate": 1e-05, "loss": 0.9408, "step": 84805 }, { "epoch": 75.11957484499557, "grad_norm": 0.25380414724349976, "learning_rate": 1e-05, "loss": 0.9658, "step": 84810 }, { "epoch": 75.12400354295838, "grad_norm": 0.2654682993888855, "learning_rate": 1e-05, "loss": 0.9322, "step": 84815 }, { "epoch": 75.12843224092117, "grad_norm": 0.1973486691713333, "learning_rate": 1e-05, "loss": 0.9097, "step": 84820 }, { "epoch": 75.13286093888397, "grad_norm": 0.2441629022359848, "learning_rate": 1e-05, "loss": 0.9282, "step": 84825 }, { "epoch": 75.13728963684677, "grad_norm": 0.24071498215198517, "learning_rate": 1e-05, "loss": 0.9181, "step": 84830 }, { "epoch": 75.14171833480957, "grad_norm": 0.31346943974494934, "learning_rate": 1e-05, "loss": 0.9268, "step": 84835 }, { "epoch": 75.14614703277236, "grad_norm": 0.308391273021698, "learning_rate": 1e-05, "loss": 0.985, "step": 84840 }, { "epoch": 75.15057573073517, "grad_norm": 0.2587350606918335, "learning_rate": 1e-05, "loss": 0.9645, "step": 84845 }, { "epoch": 75.15500442869796, "grad_norm": 0.20671920478343964, "learning_rate": 1e-05, "loss": 0.9764, "step": 84850 }, { "epoch": 75.15943312666076, "grad_norm": 0.25550347566604614, "learning_rate": 1e-05, "loss": 0.9331, "step": 84855 }, { "epoch": 75.16386182462357, "grad_norm": 0.25200262665748596, "learning_rate": 1e-05, "loss": 0.9761, "step": 84860 }, { "epoch": 75.16829052258636, "grad_norm": 0.29642894864082336, "learning_rate": 1e-05, "loss": 0.9536, "step": 84865 }, { "epoch": 75.17271922054915, "grad_norm": 0.2914987802505493, "learning_rate": 1e-05, "loss": 1.0135, "step": 84870 }, { "epoch": 75.17714791851196, "grad_norm": 0.2569211423397064, "learning_rate": 1e-05, "loss": 0.9441, "step": 84875 }, { "epoch": 75.18157661647476, "grad_norm": 0.2678290605545044, "learning_rate": 1e-05, "loss": 0.9557, "step": 84880 }, { "epoch": 75.18600531443755, "grad_norm": 0.24707165360450745, "learning_rate": 1e-05, "loss": 0.9323, "step": 84885 }, { "epoch": 75.19043401240036, "grad_norm": 0.28388920426368713, "learning_rate": 1e-05, "loss": 0.9291, "step": 84890 }, { "epoch": 75.19486271036315, "grad_norm": 0.2352336347103119, "learning_rate": 1e-05, "loss": 0.9557, "step": 84895 }, { "epoch": 75.19929140832595, "grad_norm": 0.2437903881072998, "learning_rate": 1e-05, "loss": 0.9346, "step": 84900 }, { "epoch": 75.20372010628876, "grad_norm": 0.20318947732448578, "learning_rate": 1e-05, "loss": 0.9578, "step": 84905 }, { "epoch": 75.20814880425155, "grad_norm": 0.275124192237854, "learning_rate": 1e-05, "loss": 0.9317, "step": 84910 }, { "epoch": 75.21257750221434, "grad_norm": 0.2471960484981537, "learning_rate": 1e-05, "loss": 0.977, "step": 84915 }, { "epoch": 75.21700620017715, "grad_norm": 0.2619740664958954, "learning_rate": 1e-05, "loss": 0.9478, "step": 84920 }, { "epoch": 75.22143489813995, "grad_norm": 0.2774341106414795, "learning_rate": 1e-05, "loss": 0.964, "step": 84925 }, { "epoch": 75.22586359610274, "grad_norm": 0.24442876875400543, "learning_rate": 1e-05, "loss": 0.964, "step": 84930 }, { "epoch": 75.23029229406555, "grad_norm": 0.25724250078201294, "learning_rate": 1e-05, "loss": 0.9563, "step": 84935 }, { "epoch": 75.23472099202834, "grad_norm": 0.24393513798713684, "learning_rate": 1e-05, "loss": 0.9539, "step": 84940 }, { "epoch": 75.23914968999114, "grad_norm": 0.23192188143730164, "learning_rate": 1e-05, "loss": 0.9256, "step": 84945 }, { "epoch": 75.24357838795395, "grad_norm": 0.22205850481987, "learning_rate": 1e-05, "loss": 1.0053, "step": 84950 }, { "epoch": 75.24800708591674, "grad_norm": 0.2539752125740051, "learning_rate": 1e-05, "loss": 1.0369, "step": 84955 }, { "epoch": 75.25243578387953, "grad_norm": 0.2708742022514343, "learning_rate": 1e-05, "loss": 0.9566, "step": 84960 }, { "epoch": 75.25686448184234, "grad_norm": 0.229655921459198, "learning_rate": 1e-05, "loss": 0.9514, "step": 84965 }, { "epoch": 75.26129317980514, "grad_norm": 0.2129310667514801, "learning_rate": 1e-05, "loss": 1.036, "step": 84970 }, { "epoch": 75.26572187776793, "grad_norm": 0.24959397315979004, "learning_rate": 1e-05, "loss": 0.9379, "step": 84975 }, { "epoch": 75.27015057573074, "grad_norm": 0.258658230304718, "learning_rate": 1e-05, "loss": 0.9498, "step": 84980 }, { "epoch": 75.27457927369353, "grad_norm": 0.22723299264907837, "learning_rate": 1e-05, "loss": 0.933, "step": 84985 }, { "epoch": 75.27900797165633, "grad_norm": 0.23444893956184387, "learning_rate": 1e-05, "loss": 1.0206, "step": 84990 }, { "epoch": 75.28343666961914, "grad_norm": 0.26359301805496216, "learning_rate": 1e-05, "loss": 0.9804, "step": 84995 }, { "epoch": 75.28786536758193, "grad_norm": 0.21894216537475586, "learning_rate": 1e-05, "loss": 0.9108, "step": 85000 }, { "epoch": 75.29229406554472, "grad_norm": 0.2418021559715271, "learning_rate": 1e-05, "loss": 0.8826, "step": 85005 }, { "epoch": 75.29672276350753, "grad_norm": 0.2179296761751175, "learning_rate": 1e-05, "loss": 0.9585, "step": 85010 }, { "epoch": 75.30115146147033, "grad_norm": 0.28593719005584717, "learning_rate": 1e-05, "loss": 0.9675, "step": 85015 }, { "epoch": 75.30558015943312, "grad_norm": 0.2666097581386566, "learning_rate": 1e-05, "loss": 0.9629, "step": 85020 }, { "epoch": 75.31000885739593, "grad_norm": 0.2627635598182678, "learning_rate": 1e-05, "loss": 1.0036, "step": 85025 }, { "epoch": 75.31443755535872, "grad_norm": 0.2633863389492035, "learning_rate": 1e-05, "loss": 0.9419, "step": 85030 }, { "epoch": 75.31886625332152, "grad_norm": 0.2474062740802765, "learning_rate": 1e-05, "loss": 1.0117, "step": 85035 }, { "epoch": 75.32329495128432, "grad_norm": 0.24775506556034088, "learning_rate": 1e-05, "loss": 0.9727, "step": 85040 }, { "epoch": 75.32772364924712, "grad_norm": 0.23711426556110382, "learning_rate": 1e-05, "loss": 1.029, "step": 85045 }, { "epoch": 75.33215234720993, "grad_norm": 0.24943478405475616, "learning_rate": 1e-05, "loss": 0.9495, "step": 85050 }, { "epoch": 75.33658104517272, "grad_norm": 0.230828657746315, "learning_rate": 1e-05, "loss": 1.0289, "step": 85055 }, { "epoch": 75.34100974313552, "grad_norm": 0.2685759365558624, "learning_rate": 1e-05, "loss": 0.9643, "step": 85060 }, { "epoch": 75.34543844109832, "grad_norm": 0.23378632962703705, "learning_rate": 1e-05, "loss": 0.9791, "step": 85065 }, { "epoch": 75.34986713906112, "grad_norm": 0.2377382218837738, "learning_rate": 1e-05, "loss": 0.9858, "step": 85070 }, { "epoch": 75.35429583702391, "grad_norm": 0.2443789392709732, "learning_rate": 1e-05, "loss": 0.9557, "step": 85075 }, { "epoch": 75.35872453498672, "grad_norm": 0.2492707520723343, "learning_rate": 1e-05, "loss": 0.9097, "step": 85080 }, { "epoch": 75.36315323294951, "grad_norm": 0.23865725100040436, "learning_rate": 1e-05, "loss": 0.9653, "step": 85085 }, { "epoch": 75.36758193091231, "grad_norm": 0.21588142216205597, "learning_rate": 1e-05, "loss": 0.9123, "step": 85090 }, { "epoch": 75.37201062887512, "grad_norm": 0.24121707677841187, "learning_rate": 1e-05, "loss": 0.9837, "step": 85095 }, { "epoch": 75.37643932683791, "grad_norm": 0.2339557260274887, "learning_rate": 1e-05, "loss": 0.9933, "step": 85100 }, { "epoch": 75.3808680248007, "grad_norm": 0.22046083211898804, "learning_rate": 1e-05, "loss": 0.991, "step": 85105 }, { "epoch": 75.38529672276351, "grad_norm": 0.31256505846977234, "learning_rate": 1e-05, "loss": 0.9707, "step": 85110 }, { "epoch": 75.38972542072631, "grad_norm": 0.2579330801963806, "learning_rate": 1e-05, "loss": 0.9722, "step": 85115 }, { "epoch": 75.3941541186891, "grad_norm": 0.27714377641677856, "learning_rate": 1e-05, "loss": 0.9418, "step": 85120 }, { "epoch": 75.39858281665191, "grad_norm": 0.29912069439888, "learning_rate": 1e-05, "loss": 0.96, "step": 85125 }, { "epoch": 75.4030115146147, "grad_norm": 0.23450332880020142, "learning_rate": 1e-05, "loss": 0.9719, "step": 85130 }, { "epoch": 75.4074402125775, "grad_norm": 0.27591490745544434, "learning_rate": 1e-05, "loss": 0.9529, "step": 85135 }, { "epoch": 75.4118689105403, "grad_norm": 0.23395629227161407, "learning_rate": 1e-05, "loss": 0.9227, "step": 85140 }, { "epoch": 75.4162976085031, "grad_norm": 0.29644298553466797, "learning_rate": 1e-05, "loss": 0.9399, "step": 85145 }, { "epoch": 75.4207263064659, "grad_norm": 0.23182235658168793, "learning_rate": 1e-05, "loss": 0.9368, "step": 85150 }, { "epoch": 75.4251550044287, "grad_norm": 0.21023684740066528, "learning_rate": 1e-05, "loss": 0.9481, "step": 85155 }, { "epoch": 75.4295837023915, "grad_norm": 0.22046741843223572, "learning_rate": 1e-05, "loss": 1.0186, "step": 85160 }, { "epoch": 75.43401240035429, "grad_norm": 0.2222185879945755, "learning_rate": 1e-05, "loss": 0.9599, "step": 85165 }, { "epoch": 75.4384410983171, "grad_norm": 0.2118159830570221, "learning_rate": 1e-05, "loss": 0.9354, "step": 85170 }, { "epoch": 75.4428697962799, "grad_norm": 0.22569133341312408, "learning_rate": 1e-05, "loss": 0.9905, "step": 85175 }, { "epoch": 75.44729849424269, "grad_norm": 0.22361229360103607, "learning_rate": 1e-05, "loss": 0.932, "step": 85180 }, { "epoch": 75.4517271922055, "grad_norm": 0.2534103989601135, "learning_rate": 1e-05, "loss": 0.9537, "step": 85185 }, { "epoch": 75.45615589016829, "grad_norm": 0.2374987155199051, "learning_rate": 1e-05, "loss": 0.9637, "step": 85190 }, { "epoch": 75.46058458813108, "grad_norm": 0.27647802233695984, "learning_rate": 1e-05, "loss": 0.979, "step": 85195 }, { "epoch": 75.46501328609389, "grad_norm": 0.23736269772052765, "learning_rate": 1e-05, "loss": 1.0019, "step": 85200 }, { "epoch": 75.46944198405669, "grad_norm": 0.27589935064315796, "learning_rate": 1e-05, "loss": 1.0131, "step": 85205 }, { "epoch": 75.47387068201948, "grad_norm": 0.24836444854736328, "learning_rate": 1e-05, "loss": 1.025, "step": 85210 }, { "epoch": 75.47829937998229, "grad_norm": 0.270071417093277, "learning_rate": 1e-05, "loss": 0.9483, "step": 85215 }, { "epoch": 75.48272807794508, "grad_norm": 0.23580753803253174, "learning_rate": 1e-05, "loss": 0.985, "step": 85220 }, { "epoch": 75.48715677590788, "grad_norm": 0.23322385549545288, "learning_rate": 1e-05, "loss": 0.9293, "step": 85225 }, { "epoch": 75.49158547387069, "grad_norm": 0.2386915385723114, "learning_rate": 1e-05, "loss": 0.9645, "step": 85230 }, { "epoch": 75.49601417183348, "grad_norm": 0.2402641326189041, "learning_rate": 1e-05, "loss": 0.9703, "step": 85235 }, { "epoch": 75.50044286979627, "grad_norm": 0.25126728415489197, "learning_rate": 1e-05, "loss": 1.0234, "step": 85240 }, { "epoch": 75.50487156775908, "grad_norm": 0.3170195519924164, "learning_rate": 1e-05, "loss": 0.9272, "step": 85245 }, { "epoch": 75.50930026572188, "grad_norm": 0.22746683657169342, "learning_rate": 1e-05, "loss": 0.9337, "step": 85250 }, { "epoch": 75.51372896368467, "grad_norm": 0.24262942373752594, "learning_rate": 1e-05, "loss": 0.958, "step": 85255 }, { "epoch": 75.51815766164748, "grad_norm": 0.2949182093143463, "learning_rate": 1e-05, "loss": 0.9784, "step": 85260 }, { "epoch": 75.52258635961027, "grad_norm": 0.2304728776216507, "learning_rate": 1e-05, "loss": 1.0231, "step": 85265 }, { "epoch": 75.52701505757307, "grad_norm": 0.2478756606578827, "learning_rate": 1e-05, "loss": 0.9524, "step": 85270 }, { "epoch": 75.53144375553588, "grad_norm": 0.2361639440059662, "learning_rate": 1e-05, "loss": 0.9661, "step": 85275 }, { "epoch": 75.53587245349867, "grad_norm": 0.2413511872291565, "learning_rate": 1e-05, "loss": 0.9754, "step": 85280 }, { "epoch": 75.54030115146146, "grad_norm": 0.2701733708381653, "learning_rate": 1e-05, "loss": 0.9603, "step": 85285 }, { "epoch": 75.54472984942427, "grad_norm": 0.21128009259700775, "learning_rate": 1e-05, "loss": 0.9842, "step": 85290 }, { "epoch": 75.54915854738707, "grad_norm": 0.22484073042869568, "learning_rate": 1e-05, "loss": 0.8959, "step": 85295 }, { "epoch": 75.55358724534987, "grad_norm": 0.2168583869934082, "learning_rate": 1e-05, "loss": 0.9402, "step": 85300 }, { "epoch": 75.55801594331267, "grad_norm": 0.22390127182006836, "learning_rate": 1e-05, "loss": 0.9566, "step": 85305 }, { "epoch": 75.56244464127546, "grad_norm": 0.258327454328537, "learning_rate": 1e-05, "loss": 0.9839, "step": 85310 }, { "epoch": 75.56687333923827, "grad_norm": 0.23413020372390747, "learning_rate": 1e-05, "loss": 1.0047, "step": 85315 }, { "epoch": 75.57130203720106, "grad_norm": 0.24476855993270874, "learning_rate": 1e-05, "loss": 1.0202, "step": 85320 }, { "epoch": 75.57573073516386, "grad_norm": 0.23538783192634583, "learning_rate": 1e-05, "loss": 0.9539, "step": 85325 }, { "epoch": 75.58015943312667, "grad_norm": 0.26011019945144653, "learning_rate": 1e-05, "loss": 0.9963, "step": 85330 }, { "epoch": 75.58458813108946, "grad_norm": 0.2031121551990509, "learning_rate": 1e-05, "loss": 0.9807, "step": 85335 }, { "epoch": 75.58901682905226, "grad_norm": 0.23786626756191254, "learning_rate": 1e-05, "loss": 0.9778, "step": 85340 }, { "epoch": 75.59344552701506, "grad_norm": 0.24605628848075867, "learning_rate": 1e-05, "loss": 0.9841, "step": 85345 }, { "epoch": 75.59787422497786, "grad_norm": 0.2458154261112213, "learning_rate": 1e-05, "loss": 0.9349, "step": 85350 }, { "epoch": 75.60230292294065, "grad_norm": 0.22241619229316711, "learning_rate": 1e-05, "loss": 0.9729, "step": 85355 }, { "epoch": 75.60673162090346, "grad_norm": 0.22901132702827454, "learning_rate": 1e-05, "loss": 0.9811, "step": 85360 }, { "epoch": 75.61116031886625, "grad_norm": 0.23304331302642822, "learning_rate": 1e-05, "loss": 0.9136, "step": 85365 }, { "epoch": 75.61558901682905, "grad_norm": 0.22018621861934662, "learning_rate": 1e-05, "loss": 0.994, "step": 85370 }, { "epoch": 75.62001771479186, "grad_norm": 0.25559377670288086, "learning_rate": 1e-05, "loss": 0.9789, "step": 85375 }, { "epoch": 75.62444641275465, "grad_norm": 0.25102365016937256, "learning_rate": 1e-05, "loss": 0.9581, "step": 85380 }, { "epoch": 75.62887511071744, "grad_norm": 0.208938866853714, "learning_rate": 1e-05, "loss": 0.9379, "step": 85385 }, { "epoch": 75.63330380868025, "grad_norm": 0.21545442938804626, "learning_rate": 1e-05, "loss": 0.9806, "step": 85390 }, { "epoch": 75.63773250664305, "grad_norm": 0.1989172399044037, "learning_rate": 1e-05, "loss": 0.9543, "step": 85395 }, { "epoch": 75.64216120460584, "grad_norm": 0.23315006494522095, "learning_rate": 1e-05, "loss": 0.9709, "step": 85400 }, { "epoch": 75.64658990256865, "grad_norm": 0.21885494887828827, "learning_rate": 1e-05, "loss": 0.925, "step": 85405 }, { "epoch": 75.65101860053144, "grad_norm": 0.23954246938228607, "learning_rate": 1e-05, "loss": 0.9221, "step": 85410 }, { "epoch": 75.65544729849424, "grad_norm": 0.2486918568611145, "learning_rate": 1e-05, "loss": 0.9319, "step": 85415 }, { "epoch": 75.65987599645705, "grad_norm": 0.2685340940952301, "learning_rate": 1e-05, "loss": 0.9429, "step": 85420 }, { "epoch": 75.66430469441984, "grad_norm": 0.259987473487854, "learning_rate": 1e-05, "loss": 0.9195, "step": 85425 }, { "epoch": 75.66873339238263, "grad_norm": 0.22894006967544556, "learning_rate": 1e-05, "loss": 0.96, "step": 85430 }, { "epoch": 75.67316209034544, "grad_norm": 0.2500392198562622, "learning_rate": 1e-05, "loss": 0.976, "step": 85435 }, { "epoch": 75.67759078830824, "grad_norm": 0.24679076671600342, "learning_rate": 1e-05, "loss": 0.9412, "step": 85440 }, { "epoch": 75.68201948627103, "grad_norm": 0.23897510766983032, "learning_rate": 1e-05, "loss": 0.9378, "step": 85445 }, { "epoch": 75.68644818423384, "grad_norm": 0.24865782260894775, "learning_rate": 1e-05, "loss": 0.9308, "step": 85450 }, { "epoch": 75.69087688219663, "grad_norm": 0.21969492733478546, "learning_rate": 1e-05, "loss": 0.9423, "step": 85455 }, { "epoch": 75.69530558015943, "grad_norm": 0.22037707269191742, "learning_rate": 1e-05, "loss": 0.97, "step": 85460 }, { "epoch": 75.69973427812224, "grad_norm": 0.2533146142959595, "learning_rate": 1e-05, "loss": 0.8999, "step": 85465 }, { "epoch": 75.70416297608503, "grad_norm": 0.2391863316297531, "learning_rate": 1e-05, "loss": 0.9469, "step": 85470 }, { "epoch": 75.70859167404782, "grad_norm": 0.23583179712295532, "learning_rate": 1e-05, "loss": 1.0309, "step": 85475 }, { "epoch": 75.71302037201063, "grad_norm": 0.3049645721912384, "learning_rate": 1e-05, "loss": 0.9746, "step": 85480 }, { "epoch": 75.71744906997343, "grad_norm": 0.2273280769586563, "learning_rate": 1e-05, "loss": 0.9439, "step": 85485 }, { "epoch": 75.72187776793622, "grad_norm": 0.2354247123003006, "learning_rate": 1e-05, "loss": 0.9878, "step": 85490 }, { "epoch": 75.72630646589903, "grad_norm": 0.21188464760780334, "learning_rate": 1e-05, "loss": 0.9933, "step": 85495 }, { "epoch": 75.73073516386182, "grad_norm": 0.307801216840744, "learning_rate": 1e-05, "loss": 0.9739, "step": 85500 }, { "epoch": 75.73516386182462, "grad_norm": 0.27640068531036377, "learning_rate": 1e-05, "loss": 0.9441, "step": 85505 }, { "epoch": 75.73959255978743, "grad_norm": 0.24601681530475616, "learning_rate": 1e-05, "loss": 1.0698, "step": 85510 }, { "epoch": 75.74402125775022, "grad_norm": 0.20356449484825134, "learning_rate": 1e-05, "loss": 0.9644, "step": 85515 }, { "epoch": 75.74844995571301, "grad_norm": 0.23101134598255157, "learning_rate": 1e-05, "loss": 0.9026, "step": 85520 }, { "epoch": 75.75287865367582, "grad_norm": 0.2281644493341446, "learning_rate": 1e-05, "loss": 0.9414, "step": 85525 }, { "epoch": 75.75730735163862, "grad_norm": 0.24147437512874603, "learning_rate": 1e-05, "loss": 0.9816, "step": 85530 }, { "epoch": 75.76173604960141, "grad_norm": 0.24739526212215424, "learning_rate": 1e-05, "loss": 0.9547, "step": 85535 }, { "epoch": 75.76616474756422, "grad_norm": 0.22871942818164825, "learning_rate": 1e-05, "loss": 0.9417, "step": 85540 }, { "epoch": 75.77059344552701, "grad_norm": 0.2706684172153473, "learning_rate": 1e-05, "loss": 0.9191, "step": 85545 }, { "epoch": 75.77502214348982, "grad_norm": 0.24718569219112396, "learning_rate": 1e-05, "loss": 0.9751, "step": 85550 }, { "epoch": 75.77945084145261, "grad_norm": 0.2716258466243744, "learning_rate": 1e-05, "loss": 0.9776, "step": 85555 }, { "epoch": 75.78387953941541, "grad_norm": 0.25657913088798523, "learning_rate": 1e-05, "loss": 1.0142, "step": 85560 }, { "epoch": 75.78830823737822, "grad_norm": 0.24244873225688934, "learning_rate": 1e-05, "loss": 0.9974, "step": 85565 }, { "epoch": 75.79273693534101, "grad_norm": 0.24041496217250824, "learning_rate": 1e-05, "loss": 0.945, "step": 85570 }, { "epoch": 75.7971656333038, "grad_norm": 0.2565837502479553, "learning_rate": 1e-05, "loss": 0.964, "step": 85575 }, { "epoch": 75.80159433126661, "grad_norm": 0.2787538468837738, "learning_rate": 1e-05, "loss": 0.9918, "step": 85580 }, { "epoch": 75.80602302922941, "grad_norm": 0.2501988112926483, "learning_rate": 1e-05, "loss": 0.9741, "step": 85585 }, { "epoch": 75.8104517271922, "grad_norm": 0.2141208052635193, "learning_rate": 1e-05, "loss": 0.93, "step": 85590 }, { "epoch": 75.81488042515501, "grad_norm": 0.25114116072654724, "learning_rate": 1e-05, "loss": 0.9745, "step": 85595 }, { "epoch": 75.8193091231178, "grad_norm": 0.24835629761219025, "learning_rate": 1e-05, "loss": 0.9828, "step": 85600 }, { "epoch": 75.8237378210806, "grad_norm": 0.21733909845352173, "learning_rate": 1e-05, "loss": 0.9759, "step": 85605 }, { "epoch": 75.8281665190434, "grad_norm": 0.20196415483951569, "learning_rate": 1e-05, "loss": 1.0031, "step": 85610 }, { "epoch": 75.8325952170062, "grad_norm": 0.30430835485458374, "learning_rate": 1e-05, "loss": 0.9938, "step": 85615 }, { "epoch": 75.837023914969, "grad_norm": 0.24052922427654266, "learning_rate": 1e-05, "loss": 0.9639, "step": 85620 }, { "epoch": 75.8414526129318, "grad_norm": 0.24825434386730194, "learning_rate": 1e-05, "loss": 0.975, "step": 85625 }, { "epoch": 75.8458813108946, "grad_norm": 0.2518892288208008, "learning_rate": 1e-05, "loss": 1.011, "step": 85630 }, { "epoch": 75.85031000885739, "grad_norm": 0.20691558718681335, "learning_rate": 1e-05, "loss": 0.9616, "step": 85635 }, { "epoch": 75.8547387068202, "grad_norm": 0.2416483461856842, "learning_rate": 1e-05, "loss": 0.9894, "step": 85640 }, { "epoch": 75.859167404783, "grad_norm": 0.24811483919620514, "learning_rate": 1e-05, "loss": 0.956, "step": 85645 }, { "epoch": 75.86359610274579, "grad_norm": 0.23103493452072144, "learning_rate": 1e-05, "loss": 0.9892, "step": 85650 }, { "epoch": 75.8680248007086, "grad_norm": 0.2679225206375122, "learning_rate": 1e-05, "loss": 0.9757, "step": 85655 }, { "epoch": 75.87245349867139, "grad_norm": 0.2682875692844391, "learning_rate": 1e-05, "loss": 0.9165, "step": 85660 }, { "epoch": 75.87688219663418, "grad_norm": 0.20635049045085907, "learning_rate": 1e-05, "loss": 0.9818, "step": 85665 }, { "epoch": 75.881310894597, "grad_norm": 0.19470936059951782, "learning_rate": 1e-05, "loss": 0.9647, "step": 85670 }, { "epoch": 75.88573959255979, "grad_norm": 0.24073436856269836, "learning_rate": 1e-05, "loss": 0.9739, "step": 85675 }, { "epoch": 75.89016829052258, "grad_norm": 0.2124311476945877, "learning_rate": 1e-05, "loss": 0.9838, "step": 85680 }, { "epoch": 75.89459698848539, "grad_norm": 0.2729822099208832, "learning_rate": 1e-05, "loss": 0.9648, "step": 85685 }, { "epoch": 75.89902568644818, "grad_norm": 0.22725269198417664, "learning_rate": 1e-05, "loss": 0.9938, "step": 85690 }, { "epoch": 75.90345438441098, "grad_norm": 0.27036288380622864, "learning_rate": 1e-05, "loss": 0.9211, "step": 85695 }, { "epoch": 75.90788308237379, "grad_norm": 0.22100743651390076, "learning_rate": 1e-05, "loss": 0.9819, "step": 85700 }, { "epoch": 75.91231178033658, "grad_norm": 0.24741415679454803, "learning_rate": 1e-05, "loss": 0.9973, "step": 85705 }, { "epoch": 75.91674047829937, "grad_norm": 0.20851606130599976, "learning_rate": 1e-05, "loss": 0.9183, "step": 85710 }, { "epoch": 75.92116917626218, "grad_norm": 0.24909481406211853, "learning_rate": 1e-05, "loss": 0.9739, "step": 85715 }, { "epoch": 75.92559787422498, "grad_norm": 0.2866688668727875, "learning_rate": 1e-05, "loss": 0.948, "step": 85720 }, { "epoch": 75.93002657218777, "grad_norm": 0.22481217980384827, "learning_rate": 1e-05, "loss": 0.9512, "step": 85725 }, { "epoch": 75.93445527015058, "grad_norm": 0.24417854845523834, "learning_rate": 1e-05, "loss": 0.9655, "step": 85730 }, { "epoch": 75.93888396811337, "grad_norm": 0.203885018825531, "learning_rate": 1e-05, "loss": 0.8979, "step": 85735 }, { "epoch": 75.94331266607617, "grad_norm": 0.22813384234905243, "learning_rate": 1e-05, "loss": 0.9185, "step": 85740 }, { "epoch": 75.94774136403898, "grad_norm": 0.26505497097969055, "learning_rate": 1e-05, "loss": 0.9418, "step": 85745 }, { "epoch": 75.95217006200177, "grad_norm": 0.24845972657203674, "learning_rate": 1e-05, "loss": 0.9763, "step": 85750 }, { "epoch": 75.95659875996456, "grad_norm": 0.23623836040496826, "learning_rate": 1e-05, "loss": 0.9578, "step": 85755 }, { "epoch": 75.96102745792737, "grad_norm": 0.24243469536304474, "learning_rate": 1e-05, "loss": 0.9954, "step": 85760 }, { "epoch": 75.96545615589017, "grad_norm": 0.33199119567871094, "learning_rate": 1e-05, "loss": 0.9846, "step": 85765 }, { "epoch": 75.96988485385296, "grad_norm": 0.21568551659584045, "learning_rate": 1e-05, "loss": 1.0114, "step": 85770 }, { "epoch": 75.97431355181577, "grad_norm": 0.23183706402778625, "learning_rate": 1e-05, "loss": 1.0006, "step": 85775 }, { "epoch": 75.97874224977856, "grad_norm": 0.23481948673725128, "learning_rate": 1e-05, "loss": 0.9488, "step": 85780 }, { "epoch": 75.98317094774137, "grad_norm": 0.26898589730262756, "learning_rate": 1e-05, "loss": 0.9667, "step": 85785 }, { "epoch": 75.98759964570417, "grad_norm": 0.2503397762775421, "learning_rate": 1e-05, "loss": 0.9401, "step": 85790 }, { "epoch": 75.99202834366696, "grad_norm": 0.24289657175540924, "learning_rate": 1e-05, "loss": 1.0244, "step": 85795 }, { "epoch": 75.99645704162977, "grad_norm": 0.2242513746023178, "learning_rate": 1e-05, "loss": 0.9291, "step": 85800 }, { "epoch": 76.00088573959256, "grad_norm": 0.2130116969347, "learning_rate": 1e-05, "loss": 1.0249, "step": 85805 }, { "epoch": 76.00531443755536, "grad_norm": 0.22323259711265564, "learning_rate": 1e-05, "loss": 0.9801, "step": 85810 }, { "epoch": 76.00974313551816, "grad_norm": 0.23010407388210297, "learning_rate": 1e-05, "loss": 0.9837, "step": 85815 }, { "epoch": 76.01417183348096, "grad_norm": 0.222050741314888, "learning_rate": 1e-05, "loss": 0.9669, "step": 85820 }, { "epoch": 76.01860053144375, "grad_norm": 0.26141923666000366, "learning_rate": 1e-05, "loss": 0.9193, "step": 85825 }, { "epoch": 76.02302922940656, "grad_norm": 0.29551437497138977, "learning_rate": 1e-05, "loss": 0.9395, "step": 85830 }, { "epoch": 76.02745792736935, "grad_norm": 0.2618325650691986, "learning_rate": 1e-05, "loss": 0.939, "step": 85835 }, { "epoch": 76.03188662533215, "grad_norm": 0.24414639174938202, "learning_rate": 1e-05, "loss": 0.9902, "step": 85840 }, { "epoch": 76.03631532329496, "grad_norm": 0.2505650520324707, "learning_rate": 1e-05, "loss": 1.0024, "step": 85845 }, { "epoch": 76.04074402125775, "grad_norm": 0.23258769512176514, "learning_rate": 1e-05, "loss": 1.0183, "step": 85850 }, { "epoch": 76.04517271922055, "grad_norm": 0.24757912755012512, "learning_rate": 1e-05, "loss": 0.9991, "step": 85855 }, { "epoch": 76.04960141718335, "grad_norm": 0.25555816292762756, "learning_rate": 1e-05, "loss": 1.0208, "step": 85860 }, { "epoch": 76.05403011514615, "grad_norm": 0.2425612360239029, "learning_rate": 1e-05, "loss": 1.0147, "step": 85865 }, { "epoch": 76.05845881310894, "grad_norm": 0.20050077140331268, "learning_rate": 1e-05, "loss": 0.9854, "step": 85870 }, { "epoch": 76.06288751107175, "grad_norm": 0.23308445513248444, "learning_rate": 1e-05, "loss": 0.985, "step": 85875 }, { "epoch": 76.06731620903454, "grad_norm": 0.22100616991519928, "learning_rate": 1e-05, "loss": 0.97, "step": 85880 }, { "epoch": 76.07174490699734, "grad_norm": 0.2164248824119568, "learning_rate": 1e-05, "loss": 0.914, "step": 85885 }, { "epoch": 76.07617360496015, "grad_norm": 0.24151375889778137, "learning_rate": 1e-05, "loss": 0.9019, "step": 85890 }, { "epoch": 76.08060230292294, "grad_norm": 0.2733105421066284, "learning_rate": 1e-05, "loss": 0.9388, "step": 85895 }, { "epoch": 76.08503100088573, "grad_norm": 0.25998198986053467, "learning_rate": 1e-05, "loss": 0.9555, "step": 85900 }, { "epoch": 76.08945969884854, "grad_norm": 0.28050321340560913, "learning_rate": 1e-05, "loss": 0.9545, "step": 85905 }, { "epoch": 76.09388839681134, "grad_norm": 0.22001048922538757, "learning_rate": 1e-05, "loss": 0.9938, "step": 85910 }, { "epoch": 76.09831709477413, "grad_norm": 0.2429816722869873, "learning_rate": 1e-05, "loss": 0.9542, "step": 85915 }, { "epoch": 76.10274579273694, "grad_norm": 0.2649989426136017, "learning_rate": 1e-05, "loss": 0.9471, "step": 85920 }, { "epoch": 76.10717449069973, "grad_norm": 0.27369949221611023, "learning_rate": 1e-05, "loss": 1.0125, "step": 85925 }, { "epoch": 76.11160318866253, "grad_norm": 0.2501014471054077, "learning_rate": 1e-05, "loss": 0.9399, "step": 85930 }, { "epoch": 76.11603188662534, "grad_norm": 0.25790613889694214, "learning_rate": 1e-05, "loss": 0.9385, "step": 85935 }, { "epoch": 76.12046058458813, "grad_norm": 0.2612212002277374, "learning_rate": 1e-05, "loss": 0.965, "step": 85940 }, { "epoch": 76.12488928255092, "grad_norm": 0.2193792462348938, "learning_rate": 1e-05, "loss": 0.9453, "step": 85945 }, { "epoch": 76.12931798051373, "grad_norm": 0.24109342694282532, "learning_rate": 1e-05, "loss": 0.9625, "step": 85950 }, { "epoch": 76.13374667847653, "grad_norm": 0.2330726534128189, "learning_rate": 1e-05, "loss": 0.9262, "step": 85955 }, { "epoch": 76.13817537643932, "grad_norm": 0.2948412597179413, "learning_rate": 1e-05, "loss": 0.9476, "step": 85960 }, { "epoch": 76.14260407440213, "grad_norm": 0.24219980835914612, "learning_rate": 1e-05, "loss": 0.952, "step": 85965 }, { "epoch": 76.14703277236492, "grad_norm": 0.26734235882759094, "learning_rate": 1e-05, "loss": 0.994, "step": 85970 }, { "epoch": 76.15146147032772, "grad_norm": 0.2455536276102066, "learning_rate": 1e-05, "loss": 1.0017, "step": 85975 }, { "epoch": 76.15589016829053, "grad_norm": 0.30622678995132446, "learning_rate": 1e-05, "loss": 1.0361, "step": 85980 }, { "epoch": 76.16031886625332, "grad_norm": 0.2327612340450287, "learning_rate": 1e-05, "loss": 0.975, "step": 85985 }, { "epoch": 76.16474756421611, "grad_norm": 0.23492680490016937, "learning_rate": 1e-05, "loss": 0.9487, "step": 85990 }, { "epoch": 76.16917626217892, "grad_norm": 0.220939502120018, "learning_rate": 1e-05, "loss": 0.936, "step": 85995 }, { "epoch": 76.17360496014172, "grad_norm": 0.20230375230312347, "learning_rate": 1e-05, "loss": 0.9624, "step": 86000 }, { "epoch": 76.17803365810451, "grad_norm": 0.25440168380737305, "learning_rate": 1e-05, "loss": 0.967, "step": 86005 }, { "epoch": 76.18246235606732, "grad_norm": 0.27093505859375, "learning_rate": 1e-05, "loss": 0.987, "step": 86010 }, { "epoch": 76.18689105403011, "grad_norm": 0.23706993460655212, "learning_rate": 1e-05, "loss": 0.9368, "step": 86015 }, { "epoch": 76.1913197519929, "grad_norm": 0.23958848416805267, "learning_rate": 1e-05, "loss": 0.9845, "step": 86020 }, { "epoch": 76.19574844995572, "grad_norm": 0.2547640800476074, "learning_rate": 1e-05, "loss": 0.9454, "step": 86025 }, { "epoch": 76.20017714791851, "grad_norm": 0.2764376699924469, "learning_rate": 1e-05, "loss": 0.9623, "step": 86030 }, { "epoch": 76.20460584588132, "grad_norm": 0.22230862081050873, "learning_rate": 1e-05, "loss": 0.994, "step": 86035 }, { "epoch": 76.20903454384411, "grad_norm": 0.24868865311145782, "learning_rate": 1e-05, "loss": 1.0189, "step": 86040 }, { "epoch": 76.2134632418069, "grad_norm": 0.2329602837562561, "learning_rate": 1e-05, "loss": 0.9382, "step": 86045 }, { "epoch": 76.21789193976971, "grad_norm": 0.22833050787448883, "learning_rate": 1e-05, "loss": 0.944, "step": 86050 }, { "epoch": 76.22232063773251, "grad_norm": 0.24775633215904236, "learning_rate": 1e-05, "loss": 1.031, "step": 86055 }, { "epoch": 76.2267493356953, "grad_norm": 0.2267179787158966, "learning_rate": 1e-05, "loss": 0.9118, "step": 86060 }, { "epoch": 76.23117803365811, "grad_norm": 0.22376282513141632, "learning_rate": 1e-05, "loss": 0.9718, "step": 86065 }, { "epoch": 76.2356067316209, "grad_norm": 0.24551080167293549, "learning_rate": 1e-05, "loss": 0.995, "step": 86070 }, { "epoch": 76.2400354295837, "grad_norm": 0.2261231392621994, "learning_rate": 1e-05, "loss": 1.0263, "step": 86075 }, { "epoch": 76.24446412754651, "grad_norm": 0.23337920010089874, "learning_rate": 1e-05, "loss": 0.9734, "step": 86080 }, { "epoch": 76.2488928255093, "grad_norm": 0.3091772198677063, "learning_rate": 1e-05, "loss": 0.9749, "step": 86085 }, { "epoch": 76.2533215234721, "grad_norm": 0.2600855231285095, "learning_rate": 1e-05, "loss": 0.9785, "step": 86090 }, { "epoch": 76.2577502214349, "grad_norm": 0.28874391317367554, "learning_rate": 1e-05, "loss": 0.9562, "step": 86095 }, { "epoch": 76.2621789193977, "grad_norm": 0.26457393169403076, "learning_rate": 1e-05, "loss": 0.9828, "step": 86100 }, { "epoch": 76.26660761736049, "grad_norm": 0.22813531756401062, "learning_rate": 1e-05, "loss": 0.9402, "step": 86105 }, { "epoch": 76.2710363153233, "grad_norm": 0.22116704285144806, "learning_rate": 1e-05, "loss": 0.9934, "step": 86110 }, { "epoch": 76.2754650132861, "grad_norm": 0.2731364369392395, "learning_rate": 1e-05, "loss": 0.9691, "step": 86115 }, { "epoch": 76.27989371124889, "grad_norm": 0.21815571188926697, "learning_rate": 1e-05, "loss": 0.9038, "step": 86120 }, { "epoch": 76.2843224092117, "grad_norm": 0.23345430195331573, "learning_rate": 1e-05, "loss": 0.9595, "step": 86125 }, { "epoch": 76.28875110717449, "grad_norm": 0.29991385340690613, "learning_rate": 1e-05, "loss": 0.9899, "step": 86130 }, { "epoch": 76.29317980513729, "grad_norm": 0.24409854412078857, "learning_rate": 1e-05, "loss": 0.9895, "step": 86135 }, { "epoch": 76.2976085031001, "grad_norm": 0.25381743907928467, "learning_rate": 1e-05, "loss": 0.9555, "step": 86140 }, { "epoch": 76.30203720106289, "grad_norm": 0.23244820535182953, "learning_rate": 1e-05, "loss": 0.9798, "step": 86145 }, { "epoch": 76.30646589902568, "grad_norm": 0.29370757937431335, "learning_rate": 1e-05, "loss": 0.9335, "step": 86150 }, { "epoch": 76.31089459698849, "grad_norm": 0.22407659888267517, "learning_rate": 1e-05, "loss": 0.9404, "step": 86155 }, { "epoch": 76.31532329495128, "grad_norm": 0.23379559814929962, "learning_rate": 1e-05, "loss": 0.9592, "step": 86160 }, { "epoch": 76.31975199291408, "grad_norm": 0.21955782175064087, "learning_rate": 1e-05, "loss": 0.994, "step": 86165 }, { "epoch": 76.32418069087689, "grad_norm": 0.25068891048431396, "learning_rate": 1e-05, "loss": 0.9535, "step": 86170 }, { "epoch": 76.32860938883968, "grad_norm": 0.23807179927825928, "learning_rate": 1e-05, "loss": 0.9633, "step": 86175 }, { "epoch": 76.33303808680247, "grad_norm": 0.23320156335830688, "learning_rate": 1e-05, "loss": 0.9655, "step": 86180 }, { "epoch": 76.33746678476528, "grad_norm": 0.21294766664505005, "learning_rate": 1e-05, "loss": 0.9821, "step": 86185 }, { "epoch": 76.34189548272808, "grad_norm": 0.2287573665380478, "learning_rate": 1e-05, "loss": 0.9099, "step": 86190 }, { "epoch": 76.34632418069087, "grad_norm": 0.21637177467346191, "learning_rate": 1e-05, "loss": 0.9588, "step": 86195 }, { "epoch": 76.35075287865368, "grad_norm": 0.25280267000198364, "learning_rate": 1e-05, "loss": 0.937, "step": 86200 }, { "epoch": 76.35518157661647, "grad_norm": 0.21149703860282898, "learning_rate": 1e-05, "loss": 0.9442, "step": 86205 }, { "epoch": 76.35961027457927, "grad_norm": 0.22107967734336853, "learning_rate": 1e-05, "loss": 0.9811, "step": 86210 }, { "epoch": 76.36403897254208, "grad_norm": 0.23441214859485626, "learning_rate": 1e-05, "loss": 0.9749, "step": 86215 }, { "epoch": 76.36846767050487, "grad_norm": 0.24445748329162598, "learning_rate": 1e-05, "loss": 0.9366, "step": 86220 }, { "epoch": 76.37289636846766, "grad_norm": 0.22310857474803925, "learning_rate": 1e-05, "loss": 0.938, "step": 86225 }, { "epoch": 76.37732506643047, "grad_norm": 0.24026520550251007, "learning_rate": 1e-05, "loss": 0.9856, "step": 86230 }, { "epoch": 76.38175376439327, "grad_norm": 0.2497514933347702, "learning_rate": 1e-05, "loss": 0.9569, "step": 86235 }, { "epoch": 76.38618246235606, "grad_norm": 0.3094234764575958, "learning_rate": 1e-05, "loss": 0.9588, "step": 86240 }, { "epoch": 76.39061116031887, "grad_norm": 0.28352686762809753, "learning_rate": 1e-05, "loss": 0.9469, "step": 86245 }, { "epoch": 76.39503985828166, "grad_norm": 0.22026361525058746, "learning_rate": 1e-05, "loss": 0.9225, "step": 86250 }, { "epoch": 76.39946855624446, "grad_norm": 0.22804172337055206, "learning_rate": 1e-05, "loss": 0.9929, "step": 86255 }, { "epoch": 76.40389725420727, "grad_norm": 0.21392962336540222, "learning_rate": 1e-05, "loss": 1.0445, "step": 86260 }, { "epoch": 76.40832595217006, "grad_norm": 0.24531374871730804, "learning_rate": 1e-05, "loss": 0.993, "step": 86265 }, { "epoch": 76.41275465013285, "grad_norm": 0.23857592046260834, "learning_rate": 1e-05, "loss": 0.9241, "step": 86270 }, { "epoch": 76.41718334809566, "grad_norm": 0.20373912155628204, "learning_rate": 1e-05, "loss": 0.9366, "step": 86275 }, { "epoch": 76.42161204605846, "grad_norm": 0.24145077168941498, "learning_rate": 1e-05, "loss": 0.9511, "step": 86280 }, { "epoch": 76.42604074402126, "grad_norm": 0.23858612775802612, "learning_rate": 1e-05, "loss": 0.945, "step": 86285 }, { "epoch": 76.43046944198406, "grad_norm": 0.247470885515213, "learning_rate": 1e-05, "loss": 0.9728, "step": 86290 }, { "epoch": 76.43489813994685, "grad_norm": 0.23213699460029602, "learning_rate": 1e-05, "loss": 0.9637, "step": 86295 }, { "epoch": 76.43932683790966, "grad_norm": 0.2299901843070984, "learning_rate": 1e-05, "loss": 0.9994, "step": 86300 }, { "epoch": 76.44375553587246, "grad_norm": 0.2666286826133728, "learning_rate": 1e-05, "loss": 0.9605, "step": 86305 }, { "epoch": 76.44818423383525, "grad_norm": 0.22047480940818787, "learning_rate": 1e-05, "loss": 0.9528, "step": 86310 }, { "epoch": 76.45261293179806, "grad_norm": 0.24304699897766113, "learning_rate": 1e-05, "loss": 0.9638, "step": 86315 }, { "epoch": 76.45704162976085, "grad_norm": 0.2529726028442383, "learning_rate": 1e-05, "loss": 0.9856, "step": 86320 }, { "epoch": 76.46147032772365, "grad_norm": 0.22365185618400574, "learning_rate": 1e-05, "loss": 0.9491, "step": 86325 }, { "epoch": 76.46589902568645, "grad_norm": 0.23253794014453888, "learning_rate": 1e-05, "loss": 0.9706, "step": 86330 }, { "epoch": 76.47032772364925, "grad_norm": 0.2619085907936096, "learning_rate": 1e-05, "loss": 0.9871, "step": 86335 }, { "epoch": 76.47475642161204, "grad_norm": 0.22694091498851776, "learning_rate": 1e-05, "loss": 0.9696, "step": 86340 }, { "epoch": 76.47918511957485, "grad_norm": 0.2369011640548706, "learning_rate": 1e-05, "loss": 0.9628, "step": 86345 }, { "epoch": 76.48361381753764, "grad_norm": 0.22451049089431763, "learning_rate": 1e-05, "loss": 0.9498, "step": 86350 }, { "epoch": 76.48804251550044, "grad_norm": 0.2325989156961441, "learning_rate": 1e-05, "loss": 0.9499, "step": 86355 }, { "epoch": 76.49247121346325, "grad_norm": 0.227561354637146, "learning_rate": 1e-05, "loss": 0.9476, "step": 86360 }, { "epoch": 76.49689991142604, "grad_norm": 0.22985471785068512, "learning_rate": 1e-05, "loss": 0.9614, "step": 86365 }, { "epoch": 76.50132860938884, "grad_norm": 0.19532828032970428, "learning_rate": 1e-05, "loss": 1.0057, "step": 86370 }, { "epoch": 76.50575730735164, "grad_norm": 0.2506723999977112, "learning_rate": 1e-05, "loss": 0.9207, "step": 86375 }, { "epoch": 76.51018600531444, "grad_norm": 0.2511369287967682, "learning_rate": 1e-05, "loss": 0.9446, "step": 86380 }, { "epoch": 76.51461470327723, "grad_norm": 0.3116391897201538, "learning_rate": 1e-05, "loss": 0.971, "step": 86385 }, { "epoch": 76.51904340124004, "grad_norm": 0.2753237187862396, "learning_rate": 1e-05, "loss": 0.904, "step": 86390 }, { "epoch": 76.52347209920283, "grad_norm": 0.27732256054878235, "learning_rate": 1e-05, "loss": 1.0038, "step": 86395 }, { "epoch": 76.52790079716563, "grad_norm": 0.25070759654045105, "learning_rate": 1e-05, "loss": 0.9279, "step": 86400 }, { "epoch": 76.53232949512844, "grad_norm": 0.2630537748336792, "learning_rate": 1e-05, "loss": 0.9046, "step": 86405 }, { "epoch": 76.53675819309123, "grad_norm": 0.24246330559253693, "learning_rate": 1e-05, "loss": 0.985, "step": 86410 }, { "epoch": 76.54118689105402, "grad_norm": 0.20930306613445282, "learning_rate": 1e-05, "loss": 0.969, "step": 86415 }, { "epoch": 76.54561558901683, "grad_norm": 0.2310047447681427, "learning_rate": 1e-05, "loss": 0.9957, "step": 86420 }, { "epoch": 76.55004428697963, "grad_norm": 0.26790744066238403, "learning_rate": 1e-05, "loss": 1.024, "step": 86425 }, { "epoch": 76.55447298494242, "grad_norm": 0.2147175818681717, "learning_rate": 1e-05, "loss": 0.9498, "step": 86430 }, { "epoch": 76.55890168290523, "grad_norm": 0.298657089471817, "learning_rate": 1e-05, "loss": 0.9494, "step": 86435 }, { "epoch": 76.56333038086802, "grad_norm": 0.2431868463754654, "learning_rate": 1e-05, "loss": 0.9478, "step": 86440 }, { "epoch": 76.56775907883082, "grad_norm": 0.33144888281822205, "learning_rate": 1e-05, "loss": 0.917, "step": 86445 }, { "epoch": 76.57218777679363, "grad_norm": 0.24151641130447388, "learning_rate": 1e-05, "loss": 0.9552, "step": 86450 }, { "epoch": 76.57661647475642, "grad_norm": 0.23077145218849182, "learning_rate": 1e-05, "loss": 0.9579, "step": 86455 }, { "epoch": 76.58104517271921, "grad_norm": 0.3118334710597992, "learning_rate": 1e-05, "loss": 0.9987, "step": 86460 }, { "epoch": 76.58547387068202, "grad_norm": 0.24082234501838684, "learning_rate": 1e-05, "loss": 0.986, "step": 86465 }, { "epoch": 76.58990256864482, "grad_norm": 0.25131234526634216, "learning_rate": 1e-05, "loss": 1.0197, "step": 86470 }, { "epoch": 76.59433126660761, "grad_norm": 0.25931814312934875, "learning_rate": 1e-05, "loss": 1.0039, "step": 86475 }, { "epoch": 76.59875996457042, "grad_norm": 0.2307201772928238, "learning_rate": 1e-05, "loss": 0.9851, "step": 86480 }, { "epoch": 76.60318866253321, "grad_norm": 0.28370875120162964, "learning_rate": 1e-05, "loss": 1.0143, "step": 86485 }, { "epoch": 76.60761736049601, "grad_norm": 0.25544503331184387, "learning_rate": 1e-05, "loss": 0.9266, "step": 86490 }, { "epoch": 76.61204605845882, "grad_norm": 0.1927243173122406, "learning_rate": 1e-05, "loss": 0.9725, "step": 86495 }, { "epoch": 76.61647475642161, "grad_norm": 0.2231311947107315, "learning_rate": 1e-05, "loss": 0.9214, "step": 86500 }, { "epoch": 76.6209034543844, "grad_norm": 0.21392396092414856, "learning_rate": 1e-05, "loss": 0.9227, "step": 86505 }, { "epoch": 76.62533215234721, "grad_norm": 0.20430269837379456, "learning_rate": 1e-05, "loss": 0.966, "step": 86510 }, { "epoch": 76.62976085031, "grad_norm": 0.25092628598213196, "learning_rate": 1e-05, "loss": 0.9838, "step": 86515 }, { "epoch": 76.63418954827281, "grad_norm": 0.23163533210754395, "learning_rate": 1e-05, "loss": 0.9417, "step": 86520 }, { "epoch": 76.63861824623561, "grad_norm": 0.23117592930793762, "learning_rate": 1e-05, "loss": 0.9436, "step": 86525 }, { "epoch": 76.6430469441984, "grad_norm": 0.20848457515239716, "learning_rate": 1e-05, "loss": 0.9187, "step": 86530 }, { "epoch": 76.64747564216121, "grad_norm": 0.24528324604034424, "learning_rate": 1e-05, "loss": 1.01, "step": 86535 }, { "epoch": 76.651904340124, "grad_norm": 0.23941963911056519, "learning_rate": 1e-05, "loss": 0.9538, "step": 86540 }, { "epoch": 76.6563330380868, "grad_norm": 0.22736983001232147, "learning_rate": 1e-05, "loss": 0.9786, "step": 86545 }, { "epoch": 76.66076173604961, "grad_norm": 0.25687968730926514, "learning_rate": 1e-05, "loss": 0.9665, "step": 86550 }, { "epoch": 76.6651904340124, "grad_norm": 0.21230284869670868, "learning_rate": 1e-05, "loss": 0.9436, "step": 86555 }, { "epoch": 76.6696191319752, "grad_norm": 0.24746738374233246, "learning_rate": 1e-05, "loss": 0.9663, "step": 86560 }, { "epoch": 76.674047829938, "grad_norm": 0.2330058366060257, "learning_rate": 1e-05, "loss": 0.9793, "step": 86565 }, { "epoch": 76.6784765279008, "grad_norm": 0.2323242723941803, "learning_rate": 1e-05, "loss": 0.9481, "step": 86570 }, { "epoch": 76.68290522586359, "grad_norm": 0.23065844178199768, "learning_rate": 1e-05, "loss": 0.9508, "step": 86575 }, { "epoch": 76.6873339238264, "grad_norm": 0.2441716492176056, "learning_rate": 1e-05, "loss": 1.002, "step": 86580 }, { "epoch": 76.6917626217892, "grad_norm": 0.25631096959114075, "learning_rate": 1e-05, "loss": 0.9467, "step": 86585 }, { "epoch": 76.69619131975199, "grad_norm": 0.2615143954753876, "learning_rate": 1e-05, "loss": 0.9235, "step": 86590 }, { "epoch": 76.7006200177148, "grad_norm": 0.2249242663383484, "learning_rate": 1e-05, "loss": 0.9134, "step": 86595 }, { "epoch": 76.70504871567759, "grad_norm": 0.24702699482440948, "learning_rate": 1e-05, "loss": 0.9318, "step": 86600 }, { "epoch": 76.70947741364039, "grad_norm": 0.23183965682983398, "learning_rate": 1e-05, "loss": 0.9565, "step": 86605 }, { "epoch": 76.7139061116032, "grad_norm": 0.2163151055574417, "learning_rate": 1e-05, "loss": 0.9656, "step": 86610 }, { "epoch": 76.71833480956599, "grad_norm": 0.20894266664981842, "learning_rate": 1e-05, "loss": 0.9571, "step": 86615 }, { "epoch": 76.72276350752878, "grad_norm": 0.2231171727180481, "learning_rate": 1e-05, "loss": 0.9526, "step": 86620 }, { "epoch": 76.72719220549159, "grad_norm": 0.24931246042251587, "learning_rate": 1e-05, "loss": 0.9749, "step": 86625 }, { "epoch": 76.73162090345438, "grad_norm": 0.22917351126670837, "learning_rate": 1e-05, "loss": 1.0038, "step": 86630 }, { "epoch": 76.73604960141718, "grad_norm": 0.2686276137828827, "learning_rate": 1e-05, "loss": 0.9516, "step": 86635 }, { "epoch": 76.74047829937999, "grad_norm": 0.225505530834198, "learning_rate": 1e-05, "loss": 0.959, "step": 86640 }, { "epoch": 76.74490699734278, "grad_norm": 0.2655288577079773, "learning_rate": 1e-05, "loss": 0.943, "step": 86645 }, { "epoch": 76.74933569530558, "grad_norm": 0.2716740071773529, "learning_rate": 1e-05, "loss": 0.9326, "step": 86650 }, { "epoch": 76.75376439326838, "grad_norm": 0.23587769269943237, "learning_rate": 1e-05, "loss": 0.9234, "step": 86655 }, { "epoch": 76.75819309123118, "grad_norm": 0.2702116072177887, "learning_rate": 1e-05, "loss": 0.9694, "step": 86660 }, { "epoch": 76.76262178919397, "grad_norm": 0.26375889778137207, "learning_rate": 1e-05, "loss": 0.928, "step": 86665 }, { "epoch": 76.76705048715678, "grad_norm": 0.2620989680290222, "learning_rate": 1e-05, "loss": 0.9618, "step": 86670 }, { "epoch": 76.77147918511957, "grad_norm": 0.2296069711446762, "learning_rate": 1e-05, "loss": 0.9324, "step": 86675 }, { "epoch": 76.77590788308237, "grad_norm": 0.2209530770778656, "learning_rate": 1e-05, "loss": 0.9736, "step": 86680 }, { "epoch": 76.78033658104518, "grad_norm": 0.236846461892128, "learning_rate": 1e-05, "loss": 0.9243, "step": 86685 }, { "epoch": 76.78476527900797, "grad_norm": 0.23009677231311798, "learning_rate": 1e-05, "loss": 0.9436, "step": 86690 }, { "epoch": 76.78919397697076, "grad_norm": 0.2381734699010849, "learning_rate": 1e-05, "loss": 0.9581, "step": 86695 }, { "epoch": 76.79362267493357, "grad_norm": 0.24124813079833984, "learning_rate": 1e-05, "loss": 0.9844, "step": 86700 }, { "epoch": 76.79805137289637, "grad_norm": 0.22239917516708374, "learning_rate": 1e-05, "loss": 0.9072, "step": 86705 }, { "epoch": 76.80248007085916, "grad_norm": 0.22841840982437134, "learning_rate": 1e-05, "loss": 0.9181, "step": 86710 }, { "epoch": 76.80690876882197, "grad_norm": 0.22420372068881989, "learning_rate": 1e-05, "loss": 0.9693, "step": 86715 }, { "epoch": 76.81133746678476, "grad_norm": 0.22873982787132263, "learning_rate": 1e-05, "loss": 0.936, "step": 86720 }, { "epoch": 76.81576616474756, "grad_norm": 0.2477078139781952, "learning_rate": 1e-05, "loss": 0.9778, "step": 86725 }, { "epoch": 76.82019486271037, "grad_norm": 0.2596331238746643, "learning_rate": 1e-05, "loss": 0.9221, "step": 86730 }, { "epoch": 76.82462356067316, "grad_norm": 0.25076568126678467, "learning_rate": 1e-05, "loss": 0.903, "step": 86735 }, { "epoch": 76.82905225863595, "grad_norm": 0.2529015839099884, "learning_rate": 1e-05, "loss": 0.9461, "step": 86740 }, { "epoch": 76.83348095659876, "grad_norm": 0.27693265676498413, "learning_rate": 1e-05, "loss": 0.9697, "step": 86745 }, { "epoch": 76.83790965456156, "grad_norm": 0.22784750163555145, "learning_rate": 1e-05, "loss": 0.9505, "step": 86750 }, { "epoch": 76.84233835252435, "grad_norm": 0.2062016874551773, "learning_rate": 1e-05, "loss": 0.9517, "step": 86755 }, { "epoch": 76.84676705048716, "grad_norm": 0.2384243607521057, "learning_rate": 1e-05, "loss": 0.9413, "step": 86760 }, { "epoch": 76.85119574844995, "grad_norm": 0.23561438918113708, "learning_rate": 1e-05, "loss": 0.9223, "step": 86765 }, { "epoch": 76.85562444641276, "grad_norm": 0.23912149667739868, "learning_rate": 1e-05, "loss": 0.9762, "step": 86770 }, { "epoch": 76.86005314437556, "grad_norm": 0.25323787331581116, "learning_rate": 1e-05, "loss": 0.9467, "step": 86775 }, { "epoch": 76.86448184233835, "grad_norm": 0.23842278122901917, "learning_rate": 1e-05, "loss": 0.9182, "step": 86780 }, { "epoch": 76.86891054030116, "grad_norm": 0.2314378321170807, "learning_rate": 1e-05, "loss": 0.9817, "step": 86785 }, { "epoch": 76.87333923826395, "grad_norm": 0.2654232680797577, "learning_rate": 1e-05, "loss": 0.9179, "step": 86790 }, { "epoch": 76.87776793622675, "grad_norm": 0.27247875928878784, "learning_rate": 1e-05, "loss": 0.9745, "step": 86795 }, { "epoch": 76.88219663418955, "grad_norm": 0.220348060131073, "learning_rate": 1e-05, "loss": 0.9574, "step": 86800 }, { "epoch": 76.88662533215235, "grad_norm": 0.225404292345047, "learning_rate": 1e-05, "loss": 0.9728, "step": 86805 }, { "epoch": 76.89105403011514, "grad_norm": 0.2505453824996948, "learning_rate": 1e-05, "loss": 0.995, "step": 86810 }, { "epoch": 76.89548272807795, "grad_norm": 0.2077699899673462, "learning_rate": 1e-05, "loss": 0.9321, "step": 86815 }, { "epoch": 76.89991142604075, "grad_norm": 0.2339121550321579, "learning_rate": 1e-05, "loss": 0.9628, "step": 86820 }, { "epoch": 76.90434012400354, "grad_norm": 0.2175145298242569, "learning_rate": 1e-05, "loss": 0.9631, "step": 86825 }, { "epoch": 76.90876882196635, "grad_norm": 0.23567888140678406, "learning_rate": 1e-05, "loss": 0.9652, "step": 86830 }, { "epoch": 76.91319751992914, "grad_norm": 0.23579953610897064, "learning_rate": 1e-05, "loss": 0.9514, "step": 86835 }, { "epoch": 76.91762621789194, "grad_norm": 0.20956461131572723, "learning_rate": 1e-05, "loss": 1.0361, "step": 86840 }, { "epoch": 76.92205491585474, "grad_norm": 0.24520070850849152, "learning_rate": 1e-05, "loss": 0.9765, "step": 86845 }, { "epoch": 76.92648361381754, "grad_norm": 0.23382751643657684, "learning_rate": 1e-05, "loss": 0.9521, "step": 86850 }, { "epoch": 76.93091231178033, "grad_norm": 0.27794188261032104, "learning_rate": 1e-05, "loss": 0.9552, "step": 86855 }, { "epoch": 76.93534100974314, "grad_norm": 0.22913743555545807, "learning_rate": 1e-05, "loss": 0.9747, "step": 86860 }, { "epoch": 76.93976970770593, "grad_norm": 0.2015327662229538, "learning_rate": 1e-05, "loss": 0.9304, "step": 86865 }, { "epoch": 76.94419840566873, "grad_norm": 0.2482973039150238, "learning_rate": 1e-05, "loss": 0.9593, "step": 86870 }, { "epoch": 76.94862710363154, "grad_norm": 0.23543602228164673, "learning_rate": 1e-05, "loss": 0.9653, "step": 86875 }, { "epoch": 76.95305580159433, "grad_norm": 0.26965153217315674, "learning_rate": 1e-05, "loss": 0.9875, "step": 86880 }, { "epoch": 76.95748449955713, "grad_norm": 0.23308025300502777, "learning_rate": 1e-05, "loss": 0.979, "step": 86885 }, { "epoch": 76.96191319751993, "grad_norm": 0.2172469049692154, "learning_rate": 1e-05, "loss": 0.9366, "step": 86890 }, { "epoch": 76.96634189548273, "grad_norm": 0.2006201595067978, "learning_rate": 1e-05, "loss": 0.9341, "step": 86895 }, { "epoch": 76.97077059344552, "grad_norm": 0.23705631494522095, "learning_rate": 1e-05, "loss": 0.9422, "step": 86900 }, { "epoch": 76.97519929140833, "grad_norm": 0.2509170174598694, "learning_rate": 1e-05, "loss": 0.9628, "step": 86905 }, { "epoch": 76.97962798937112, "grad_norm": 0.31459057331085205, "learning_rate": 1e-05, "loss": 0.9952, "step": 86910 }, { "epoch": 76.98405668733392, "grad_norm": 0.24824321269989014, "learning_rate": 1e-05, "loss": 0.8998, "step": 86915 }, { "epoch": 76.98848538529673, "grad_norm": 0.2622279226779938, "learning_rate": 1e-05, "loss": 0.9746, "step": 86920 }, { "epoch": 76.99291408325952, "grad_norm": 0.2236255705356598, "learning_rate": 1e-05, "loss": 0.9576, "step": 86925 }, { "epoch": 76.99734278122232, "grad_norm": 0.24968622624874115, "learning_rate": 1e-05, "loss": 0.9281, "step": 86930 }, { "epoch": 77.00177147918512, "grad_norm": 0.25333523750305176, "learning_rate": 1e-05, "loss": 0.9406, "step": 86935 }, { "epoch": 77.00620017714792, "grad_norm": 0.2364991158246994, "learning_rate": 1e-05, "loss": 0.9759, "step": 86940 }, { "epoch": 77.01062887511071, "grad_norm": 0.2518218755722046, "learning_rate": 1e-05, "loss": 0.9314, "step": 86945 }, { "epoch": 77.01505757307352, "grad_norm": 0.21034954488277435, "learning_rate": 1e-05, "loss": 1.0352, "step": 86950 }, { "epoch": 77.01948627103631, "grad_norm": 0.26711803674697876, "learning_rate": 1e-05, "loss": 0.9801, "step": 86955 }, { "epoch": 77.02391496899911, "grad_norm": 0.28568345308303833, "learning_rate": 1e-05, "loss": 0.9438, "step": 86960 }, { "epoch": 77.02834366696192, "grad_norm": 0.22148174047470093, "learning_rate": 1e-05, "loss": 0.9746, "step": 86965 }, { "epoch": 77.03277236492471, "grad_norm": 0.1994667500257492, "learning_rate": 1e-05, "loss": 0.973, "step": 86970 }, { "epoch": 77.0372010628875, "grad_norm": 0.22593165934085846, "learning_rate": 1e-05, "loss": 1.0304, "step": 86975 }, { "epoch": 77.04162976085031, "grad_norm": 0.2594856023788452, "learning_rate": 1e-05, "loss": 0.9463, "step": 86980 }, { "epoch": 77.0460584588131, "grad_norm": 0.2465016096830368, "learning_rate": 1e-05, "loss": 0.9421, "step": 86985 }, { "epoch": 77.0504871567759, "grad_norm": 0.21591578423976898, "learning_rate": 1e-05, "loss": 1.0139, "step": 86990 }, { "epoch": 77.05491585473871, "grad_norm": 0.28775516152381897, "learning_rate": 1e-05, "loss": 0.9642, "step": 86995 }, { "epoch": 77.0593445527015, "grad_norm": 0.2348496913909912, "learning_rate": 1e-05, "loss": 0.9708, "step": 87000 }, { "epoch": 77.0637732506643, "grad_norm": 0.23258076608181, "learning_rate": 1e-05, "loss": 1.0016, "step": 87005 }, { "epoch": 77.0682019486271, "grad_norm": 0.21153271198272705, "learning_rate": 1e-05, "loss": 0.9836, "step": 87010 }, { "epoch": 77.0726306465899, "grad_norm": 0.2139168083667755, "learning_rate": 1e-05, "loss": 0.9757, "step": 87015 }, { "epoch": 77.07705934455271, "grad_norm": 0.2559416890144348, "learning_rate": 1e-05, "loss": 0.9874, "step": 87020 }, { "epoch": 77.0814880425155, "grad_norm": 0.26610124111175537, "learning_rate": 1e-05, "loss": 1.0, "step": 87025 }, { "epoch": 77.0859167404783, "grad_norm": 0.2436743825674057, "learning_rate": 1e-05, "loss": 0.9255, "step": 87030 }, { "epoch": 77.0903454384411, "grad_norm": 0.2805301547050476, "learning_rate": 1e-05, "loss": 0.9579, "step": 87035 }, { "epoch": 77.0947741364039, "grad_norm": 0.2683490812778473, "learning_rate": 1e-05, "loss": 0.9278, "step": 87040 }, { "epoch": 77.0992028343667, "grad_norm": 0.21891073882579803, "learning_rate": 1e-05, "loss": 1.0377, "step": 87045 }, { "epoch": 77.1036315323295, "grad_norm": 0.25783663988113403, "learning_rate": 1e-05, "loss": 0.8835, "step": 87050 }, { "epoch": 77.1080602302923, "grad_norm": 0.23595960438251495, "learning_rate": 1e-05, "loss": 0.992, "step": 87055 }, { "epoch": 77.11248892825509, "grad_norm": 0.2305869609117508, "learning_rate": 1e-05, "loss": 0.9234, "step": 87060 }, { "epoch": 77.1169176262179, "grad_norm": 0.23492711782455444, "learning_rate": 1e-05, "loss": 0.9691, "step": 87065 }, { "epoch": 77.12134632418069, "grad_norm": 0.233218714594841, "learning_rate": 1e-05, "loss": 0.9558, "step": 87070 }, { "epoch": 77.12577502214349, "grad_norm": 0.2314988374710083, "learning_rate": 1e-05, "loss": 0.9451, "step": 87075 }, { "epoch": 77.1302037201063, "grad_norm": 0.2374245524406433, "learning_rate": 1e-05, "loss": 0.9709, "step": 87080 }, { "epoch": 77.13463241806909, "grad_norm": 0.25992587208747864, "learning_rate": 1e-05, "loss": 0.9325, "step": 87085 }, { "epoch": 77.13906111603188, "grad_norm": 0.233268603682518, "learning_rate": 1e-05, "loss": 0.9512, "step": 87090 }, { "epoch": 77.14348981399469, "grad_norm": 0.2239455282688141, "learning_rate": 1e-05, "loss": 0.9837, "step": 87095 }, { "epoch": 77.14791851195749, "grad_norm": 0.24732601642608643, "learning_rate": 1e-05, "loss": 0.9761, "step": 87100 }, { "epoch": 77.15234720992028, "grad_norm": 0.26330986618995667, "learning_rate": 1e-05, "loss": 0.9309, "step": 87105 }, { "epoch": 77.15677590788309, "grad_norm": 0.3085660934448242, "learning_rate": 1e-05, "loss": 0.923, "step": 87110 }, { "epoch": 77.16120460584588, "grad_norm": 0.2828408181667328, "learning_rate": 1e-05, "loss": 0.964, "step": 87115 }, { "epoch": 77.16563330380868, "grad_norm": 0.23471574485301971, "learning_rate": 1e-05, "loss": 0.9701, "step": 87120 }, { "epoch": 77.17006200177148, "grad_norm": 0.22224895656108856, "learning_rate": 1e-05, "loss": 0.8937, "step": 87125 }, { "epoch": 77.17449069973428, "grad_norm": 0.24026210606098175, "learning_rate": 1e-05, "loss": 0.8974, "step": 87130 }, { "epoch": 77.17891939769707, "grad_norm": 0.2387019544839859, "learning_rate": 1e-05, "loss": 0.9654, "step": 87135 }, { "epoch": 77.18334809565988, "grad_norm": 0.248616024851799, "learning_rate": 1e-05, "loss": 0.9588, "step": 87140 }, { "epoch": 77.18777679362267, "grad_norm": 0.25219297409057617, "learning_rate": 1e-05, "loss": 0.9292, "step": 87145 }, { "epoch": 77.19220549158547, "grad_norm": 0.2501124143600464, "learning_rate": 1e-05, "loss": 0.9676, "step": 87150 }, { "epoch": 77.19663418954828, "grad_norm": 0.2432943433523178, "learning_rate": 1e-05, "loss": 0.9096, "step": 87155 }, { "epoch": 77.20106288751107, "grad_norm": 0.26558417081832886, "learning_rate": 1e-05, "loss": 0.987, "step": 87160 }, { "epoch": 77.20549158547387, "grad_norm": 0.2285250723361969, "learning_rate": 1e-05, "loss": 0.8989, "step": 87165 }, { "epoch": 77.20992028343667, "grad_norm": 0.2780630588531494, "learning_rate": 1e-05, "loss": 0.9697, "step": 87170 }, { "epoch": 77.21434898139947, "grad_norm": 0.2030693143606186, "learning_rate": 1e-05, "loss": 0.9901, "step": 87175 }, { "epoch": 77.21877767936226, "grad_norm": 0.26875707507133484, "learning_rate": 1e-05, "loss": 0.9376, "step": 87180 }, { "epoch": 77.22320637732507, "grad_norm": 0.2510926127433777, "learning_rate": 1e-05, "loss": 0.9674, "step": 87185 }, { "epoch": 77.22763507528786, "grad_norm": 0.24003753066062927, "learning_rate": 1e-05, "loss": 0.9545, "step": 87190 }, { "epoch": 77.23206377325066, "grad_norm": 0.24508704245090485, "learning_rate": 1e-05, "loss": 0.949, "step": 87195 }, { "epoch": 77.23649247121347, "grad_norm": 0.2086341381072998, "learning_rate": 1e-05, "loss": 0.996, "step": 87200 }, { "epoch": 77.24092116917626, "grad_norm": 0.26420485973358154, "learning_rate": 1e-05, "loss": 0.9659, "step": 87205 }, { "epoch": 77.24534986713905, "grad_norm": 0.25184863805770874, "learning_rate": 1e-05, "loss": 0.9411, "step": 87210 }, { "epoch": 77.24977856510186, "grad_norm": 0.23711846768856049, "learning_rate": 1e-05, "loss": 1.0242, "step": 87215 }, { "epoch": 77.25420726306466, "grad_norm": 0.2293325662612915, "learning_rate": 1e-05, "loss": 0.9422, "step": 87220 }, { "epoch": 77.25863596102745, "grad_norm": 0.2318987250328064, "learning_rate": 1e-05, "loss": 0.9869, "step": 87225 }, { "epoch": 77.26306465899026, "grad_norm": 0.26365166902542114, "learning_rate": 1e-05, "loss": 0.9799, "step": 87230 }, { "epoch": 77.26749335695305, "grad_norm": 0.22320283949375153, "learning_rate": 1e-05, "loss": 1.0335, "step": 87235 }, { "epoch": 77.27192205491585, "grad_norm": 0.26045191287994385, "learning_rate": 1e-05, "loss": 0.9747, "step": 87240 }, { "epoch": 77.27635075287866, "grad_norm": 0.2685241401195526, "learning_rate": 1e-05, "loss": 0.9678, "step": 87245 }, { "epoch": 77.28077945084145, "grad_norm": 0.2308633178472519, "learning_rate": 1e-05, "loss": 1.0128, "step": 87250 }, { "epoch": 77.28520814880426, "grad_norm": 0.23745161294937134, "learning_rate": 1e-05, "loss": 0.9534, "step": 87255 }, { "epoch": 77.28963684676705, "grad_norm": 0.21297231316566467, "learning_rate": 1e-05, "loss": 0.9632, "step": 87260 }, { "epoch": 77.29406554472985, "grad_norm": 0.29279035329818726, "learning_rate": 1e-05, "loss": 1.0112, "step": 87265 }, { "epoch": 77.29849424269266, "grad_norm": 0.29635563492774963, "learning_rate": 1e-05, "loss": 0.9655, "step": 87270 }, { "epoch": 77.30292294065545, "grad_norm": 0.2522418797016144, "learning_rate": 1e-05, "loss": 0.9194, "step": 87275 }, { "epoch": 77.30735163861824, "grad_norm": 0.23736627399921417, "learning_rate": 1e-05, "loss": 0.9696, "step": 87280 }, { "epoch": 77.31178033658105, "grad_norm": 0.21685829758644104, "learning_rate": 1e-05, "loss": 0.9904, "step": 87285 }, { "epoch": 77.31620903454385, "grad_norm": 0.24377422034740448, "learning_rate": 1e-05, "loss": 0.9785, "step": 87290 }, { "epoch": 77.32063773250664, "grad_norm": 0.23714256286621094, "learning_rate": 1e-05, "loss": 0.9635, "step": 87295 }, { "epoch": 77.32506643046945, "grad_norm": 0.20305867493152618, "learning_rate": 1e-05, "loss": 0.9345, "step": 87300 }, { "epoch": 77.32949512843224, "grad_norm": 0.22839532792568207, "learning_rate": 1e-05, "loss": 0.999, "step": 87305 }, { "epoch": 77.33392382639504, "grad_norm": 0.2554134726524353, "learning_rate": 1e-05, "loss": 0.9337, "step": 87310 }, { "epoch": 77.33835252435784, "grad_norm": 0.20826709270477295, "learning_rate": 1e-05, "loss": 0.9623, "step": 87315 }, { "epoch": 77.34278122232064, "grad_norm": 0.25881198048591614, "learning_rate": 1e-05, "loss": 0.9858, "step": 87320 }, { "epoch": 77.34720992028343, "grad_norm": 0.21859923005104065, "learning_rate": 1e-05, "loss": 0.9694, "step": 87325 }, { "epoch": 77.35163861824624, "grad_norm": 0.22646629810333252, "learning_rate": 1e-05, "loss": 0.9517, "step": 87330 }, { "epoch": 77.35606731620904, "grad_norm": 0.22934813797473907, "learning_rate": 1e-05, "loss": 0.9378, "step": 87335 }, { "epoch": 77.36049601417183, "grad_norm": 0.2469106912612915, "learning_rate": 1e-05, "loss": 0.9216, "step": 87340 }, { "epoch": 77.36492471213464, "grad_norm": 0.23186412453651428, "learning_rate": 1e-05, "loss": 0.8968, "step": 87345 }, { "epoch": 77.36935341009743, "grad_norm": 0.25305822491645813, "learning_rate": 1e-05, "loss": 0.9195, "step": 87350 }, { "epoch": 77.37378210806023, "grad_norm": 0.2502666711807251, "learning_rate": 1e-05, "loss": 0.9366, "step": 87355 }, { "epoch": 77.37821080602303, "grad_norm": 0.22845736145973206, "learning_rate": 1e-05, "loss": 0.9644, "step": 87360 }, { "epoch": 77.38263950398583, "grad_norm": 0.1960151642560959, "learning_rate": 1e-05, "loss": 0.9371, "step": 87365 }, { "epoch": 77.38706820194862, "grad_norm": 0.22671368718147278, "learning_rate": 1e-05, "loss": 0.9371, "step": 87370 }, { "epoch": 77.39149689991143, "grad_norm": 0.2935572564601898, "learning_rate": 1e-05, "loss": 0.9683, "step": 87375 }, { "epoch": 77.39592559787422, "grad_norm": 0.2797524034976959, "learning_rate": 1e-05, "loss": 0.9925, "step": 87380 }, { "epoch": 77.40035429583702, "grad_norm": 0.24903050065040588, "learning_rate": 1e-05, "loss": 0.9403, "step": 87385 }, { "epoch": 77.40478299379983, "grad_norm": 0.26196572184562683, "learning_rate": 1e-05, "loss": 0.9091, "step": 87390 }, { "epoch": 77.40921169176262, "grad_norm": 0.2857091724872589, "learning_rate": 1e-05, "loss": 0.9706, "step": 87395 }, { "epoch": 77.41364038972542, "grad_norm": 0.2554527223110199, "learning_rate": 1e-05, "loss": 1.0039, "step": 87400 }, { "epoch": 77.41806908768822, "grad_norm": 0.214559406042099, "learning_rate": 1e-05, "loss": 0.9899, "step": 87405 }, { "epoch": 77.42249778565102, "grad_norm": 0.20117905735969543, "learning_rate": 1e-05, "loss": 0.9719, "step": 87410 }, { "epoch": 77.42692648361381, "grad_norm": 0.22684235870838165, "learning_rate": 1e-05, "loss": 0.9694, "step": 87415 }, { "epoch": 77.43135518157662, "grad_norm": 0.22124417126178741, "learning_rate": 1e-05, "loss": 0.9549, "step": 87420 }, { "epoch": 77.43578387953941, "grad_norm": 0.22114130854606628, "learning_rate": 1e-05, "loss": 0.9834, "step": 87425 }, { "epoch": 77.44021257750221, "grad_norm": 0.22874243557453156, "learning_rate": 1e-05, "loss": 0.9776, "step": 87430 }, { "epoch": 77.44464127546502, "grad_norm": 0.2213953584432602, "learning_rate": 1e-05, "loss": 0.9386, "step": 87435 }, { "epoch": 77.44906997342781, "grad_norm": 0.2078544646501541, "learning_rate": 1e-05, "loss": 0.9637, "step": 87440 }, { "epoch": 77.4534986713906, "grad_norm": 0.280277281999588, "learning_rate": 1e-05, "loss": 0.94, "step": 87445 }, { "epoch": 77.45792736935341, "grad_norm": 0.20978453755378723, "learning_rate": 1e-05, "loss": 0.9609, "step": 87450 }, { "epoch": 77.46235606731621, "grad_norm": 0.2221294343471527, "learning_rate": 1e-05, "loss": 0.9784, "step": 87455 }, { "epoch": 77.466784765279, "grad_norm": 0.2494630664587021, "learning_rate": 1e-05, "loss": 0.9556, "step": 87460 }, { "epoch": 77.47121346324181, "grad_norm": 0.2390281707048416, "learning_rate": 1e-05, "loss": 0.9458, "step": 87465 }, { "epoch": 77.4756421612046, "grad_norm": 0.2283698320388794, "learning_rate": 1e-05, "loss": 0.9466, "step": 87470 }, { "epoch": 77.4800708591674, "grad_norm": 0.21567218005657196, "learning_rate": 1e-05, "loss": 0.9749, "step": 87475 }, { "epoch": 77.4844995571302, "grad_norm": 0.24495169520378113, "learning_rate": 1e-05, "loss": 1.0283, "step": 87480 }, { "epoch": 77.488928255093, "grad_norm": 0.30073457956314087, "learning_rate": 1e-05, "loss": 0.9971, "step": 87485 }, { "epoch": 77.4933569530558, "grad_norm": 0.2280602902173996, "learning_rate": 1e-05, "loss": 0.9576, "step": 87490 }, { "epoch": 77.4977856510186, "grad_norm": 0.24839480221271515, "learning_rate": 1e-05, "loss": 0.9448, "step": 87495 }, { "epoch": 77.5022143489814, "grad_norm": 0.26367878913879395, "learning_rate": 1e-05, "loss": 0.9811, "step": 87500 }, { "epoch": 77.5066430469442, "grad_norm": 0.22025717794895172, "learning_rate": 1e-05, "loss": 1.0018, "step": 87505 }, { "epoch": 77.511071744907, "grad_norm": 0.21963772177696228, "learning_rate": 1e-05, "loss": 0.9859, "step": 87510 }, { "epoch": 77.5155004428698, "grad_norm": 0.22366157174110413, "learning_rate": 1e-05, "loss": 1.0051, "step": 87515 }, { "epoch": 77.5199291408326, "grad_norm": 0.22449831664562225, "learning_rate": 1e-05, "loss": 0.9335, "step": 87520 }, { "epoch": 77.5243578387954, "grad_norm": 0.23920750617980957, "learning_rate": 1e-05, "loss": 0.9303, "step": 87525 }, { "epoch": 77.52878653675819, "grad_norm": 0.21678584814071655, "learning_rate": 1e-05, "loss": 0.9422, "step": 87530 }, { "epoch": 77.533215234721, "grad_norm": 0.2494162917137146, "learning_rate": 1e-05, "loss": 0.9344, "step": 87535 }, { "epoch": 77.53764393268379, "grad_norm": 0.2327771931886673, "learning_rate": 1e-05, "loss": 0.9253, "step": 87540 }, { "epoch": 77.54207263064659, "grad_norm": 0.22130730748176575, "learning_rate": 1e-05, "loss": 0.9209, "step": 87545 }, { "epoch": 77.5465013286094, "grad_norm": 0.24990203976631165, "learning_rate": 1e-05, "loss": 1.0109, "step": 87550 }, { "epoch": 77.55093002657219, "grad_norm": 0.23619632422924042, "learning_rate": 1e-05, "loss": 1.0117, "step": 87555 }, { "epoch": 77.55535872453498, "grad_norm": 0.22352677583694458, "learning_rate": 1e-05, "loss": 0.9994, "step": 87560 }, { "epoch": 77.55978742249779, "grad_norm": 0.24528370797634125, "learning_rate": 1e-05, "loss": 1.0044, "step": 87565 }, { "epoch": 77.56421612046059, "grad_norm": 0.2725163996219635, "learning_rate": 1e-05, "loss": 1.0187, "step": 87570 }, { "epoch": 77.56864481842338, "grad_norm": 0.208133265376091, "learning_rate": 1e-05, "loss": 0.9779, "step": 87575 }, { "epoch": 77.57307351638619, "grad_norm": 0.23830865323543549, "learning_rate": 1e-05, "loss": 0.9169, "step": 87580 }, { "epoch": 77.57750221434898, "grad_norm": 0.23724903166294098, "learning_rate": 1e-05, "loss": 0.9991, "step": 87585 }, { "epoch": 77.58193091231178, "grad_norm": 0.2735835909843445, "learning_rate": 1e-05, "loss": 0.9818, "step": 87590 }, { "epoch": 77.58635961027458, "grad_norm": 0.23597557842731476, "learning_rate": 1e-05, "loss": 1.0213, "step": 87595 }, { "epoch": 77.59078830823738, "grad_norm": 0.267072856426239, "learning_rate": 1e-05, "loss": 1.0178, "step": 87600 }, { "epoch": 77.59521700620017, "grad_norm": 0.2662096917629242, "learning_rate": 1e-05, "loss": 0.9565, "step": 87605 }, { "epoch": 77.59964570416298, "grad_norm": 0.2577826976776123, "learning_rate": 1e-05, "loss": 1.0185, "step": 87610 }, { "epoch": 77.60407440212578, "grad_norm": 0.23772192001342773, "learning_rate": 1e-05, "loss": 0.9928, "step": 87615 }, { "epoch": 77.60850310008857, "grad_norm": 0.2308945655822754, "learning_rate": 1e-05, "loss": 0.9369, "step": 87620 }, { "epoch": 77.61293179805138, "grad_norm": 0.2857731878757477, "learning_rate": 1e-05, "loss": 0.9385, "step": 87625 }, { "epoch": 77.61736049601417, "grad_norm": 0.2752941846847534, "learning_rate": 1e-05, "loss": 0.9246, "step": 87630 }, { "epoch": 77.62178919397697, "grad_norm": 0.22603636980056763, "learning_rate": 1e-05, "loss": 0.9538, "step": 87635 }, { "epoch": 77.62621789193977, "grad_norm": 0.2126711905002594, "learning_rate": 1e-05, "loss": 0.9779, "step": 87640 }, { "epoch": 77.63064658990257, "grad_norm": 0.2467328906059265, "learning_rate": 1e-05, "loss": 0.9781, "step": 87645 }, { "epoch": 77.63507528786536, "grad_norm": 0.2128833383321762, "learning_rate": 1e-05, "loss": 0.9394, "step": 87650 }, { "epoch": 77.63950398582817, "grad_norm": 0.22866934537887573, "learning_rate": 1e-05, "loss": 0.9976, "step": 87655 }, { "epoch": 77.64393268379096, "grad_norm": 0.25474923849105835, "learning_rate": 1e-05, "loss": 0.9435, "step": 87660 }, { "epoch": 77.64836138175376, "grad_norm": 0.28724879026412964, "learning_rate": 1e-05, "loss": 1.0425, "step": 87665 }, { "epoch": 77.65279007971657, "grad_norm": 0.28412649035453796, "learning_rate": 1e-05, "loss": 0.9615, "step": 87670 }, { "epoch": 77.65721877767936, "grad_norm": 0.24339643120765686, "learning_rate": 1e-05, "loss": 0.8986, "step": 87675 }, { "epoch": 77.66164747564216, "grad_norm": 0.26179274916648865, "learning_rate": 1e-05, "loss": 0.9152, "step": 87680 }, { "epoch": 77.66607617360496, "grad_norm": 0.20889487862586975, "learning_rate": 1e-05, "loss": 0.9587, "step": 87685 }, { "epoch": 77.67050487156776, "grad_norm": 0.22540608048439026, "learning_rate": 1e-05, "loss": 0.9281, "step": 87690 }, { "epoch": 77.67493356953055, "grad_norm": 0.23712977766990662, "learning_rate": 1e-05, "loss": 0.9328, "step": 87695 }, { "epoch": 77.67936226749336, "grad_norm": 0.23204006254673004, "learning_rate": 1e-05, "loss": 0.9313, "step": 87700 }, { "epoch": 77.68379096545615, "grad_norm": 0.22438377141952515, "learning_rate": 1e-05, "loss": 0.8877, "step": 87705 }, { "epoch": 77.68821966341895, "grad_norm": 0.24378086626529694, "learning_rate": 1e-05, "loss": 0.9898, "step": 87710 }, { "epoch": 77.69264836138176, "grad_norm": 0.23039083182811737, "learning_rate": 1e-05, "loss": 0.9795, "step": 87715 }, { "epoch": 77.69707705934455, "grad_norm": 0.21411454677581787, "learning_rate": 1e-05, "loss": 0.9227, "step": 87720 }, { "epoch": 77.70150575730734, "grad_norm": 0.23939155042171478, "learning_rate": 1e-05, "loss": 0.9389, "step": 87725 }, { "epoch": 77.70593445527015, "grad_norm": 0.24701708555221558, "learning_rate": 1e-05, "loss": 0.994, "step": 87730 }, { "epoch": 77.71036315323295, "grad_norm": 0.26469677686691284, "learning_rate": 1e-05, "loss": 0.9769, "step": 87735 }, { "epoch": 77.71479185119574, "grad_norm": 0.1970253437757492, "learning_rate": 1e-05, "loss": 0.9831, "step": 87740 }, { "epoch": 77.71922054915855, "grad_norm": 0.23921014368534088, "learning_rate": 1e-05, "loss": 0.9546, "step": 87745 }, { "epoch": 77.72364924712134, "grad_norm": 0.2604734003543854, "learning_rate": 1e-05, "loss": 0.9618, "step": 87750 }, { "epoch": 77.72807794508415, "grad_norm": 0.2523438632488251, "learning_rate": 1e-05, "loss": 0.9812, "step": 87755 }, { "epoch": 77.73250664304695, "grad_norm": 0.23461085557937622, "learning_rate": 1e-05, "loss": 0.9724, "step": 87760 }, { "epoch": 77.73693534100974, "grad_norm": 0.2170139104127884, "learning_rate": 1e-05, "loss": 0.9429, "step": 87765 }, { "epoch": 77.74136403897255, "grad_norm": 0.21661920845508575, "learning_rate": 1e-05, "loss": 0.9644, "step": 87770 }, { "epoch": 77.74579273693534, "grad_norm": 0.20358291268348694, "learning_rate": 1e-05, "loss": 0.9614, "step": 87775 }, { "epoch": 77.75022143489814, "grad_norm": 0.23301099240779877, "learning_rate": 1e-05, "loss": 0.9629, "step": 87780 }, { "epoch": 77.75465013286095, "grad_norm": 0.2260369211435318, "learning_rate": 1e-05, "loss": 0.9469, "step": 87785 }, { "epoch": 77.75907883082374, "grad_norm": 0.22996878623962402, "learning_rate": 1e-05, "loss": 0.9622, "step": 87790 }, { "epoch": 77.76350752878653, "grad_norm": 0.25925150513648987, "learning_rate": 1e-05, "loss": 0.9689, "step": 87795 }, { "epoch": 77.76793622674934, "grad_norm": 0.23372302949428558, "learning_rate": 1e-05, "loss": 0.9521, "step": 87800 }, { "epoch": 77.77236492471214, "grad_norm": 0.2515539526939392, "learning_rate": 1e-05, "loss": 0.9307, "step": 87805 }, { "epoch": 77.77679362267493, "grad_norm": 0.22752253711223602, "learning_rate": 1e-05, "loss": 0.9747, "step": 87810 }, { "epoch": 77.78122232063774, "grad_norm": 0.21126540005207062, "learning_rate": 1e-05, "loss": 0.9854, "step": 87815 }, { "epoch": 77.78565101860053, "grad_norm": 0.2386217713356018, "learning_rate": 1e-05, "loss": 1.0236, "step": 87820 }, { "epoch": 77.79007971656333, "grad_norm": 0.2570544183254242, "learning_rate": 1e-05, "loss": 0.9521, "step": 87825 }, { "epoch": 77.79450841452613, "grad_norm": 0.20882780849933624, "learning_rate": 1e-05, "loss": 0.9736, "step": 87830 }, { "epoch": 77.79893711248893, "grad_norm": 0.20831246674060822, "learning_rate": 1e-05, "loss": 0.9698, "step": 87835 }, { "epoch": 77.80336581045172, "grad_norm": 0.2543601393699646, "learning_rate": 1e-05, "loss": 0.9296, "step": 87840 }, { "epoch": 77.80779450841453, "grad_norm": 0.23801063001155853, "learning_rate": 1e-05, "loss": 0.9911, "step": 87845 }, { "epoch": 77.81222320637733, "grad_norm": 0.26623669266700745, "learning_rate": 1e-05, "loss": 0.9528, "step": 87850 }, { "epoch": 77.81665190434012, "grad_norm": 0.26402807235717773, "learning_rate": 1e-05, "loss": 0.9226, "step": 87855 }, { "epoch": 77.82108060230293, "grad_norm": 0.26201727986335754, "learning_rate": 1e-05, "loss": 0.9868, "step": 87860 }, { "epoch": 77.82550930026572, "grad_norm": 0.26105183362960815, "learning_rate": 1e-05, "loss": 0.9536, "step": 87865 }, { "epoch": 77.82993799822852, "grad_norm": 0.2734915316104889, "learning_rate": 1e-05, "loss": 0.9616, "step": 87870 }, { "epoch": 77.83436669619132, "grad_norm": 0.22708305716514587, "learning_rate": 1e-05, "loss": 0.9687, "step": 87875 }, { "epoch": 77.83879539415412, "grad_norm": 0.24792613089084625, "learning_rate": 1e-05, "loss": 0.9509, "step": 87880 }, { "epoch": 77.84322409211691, "grad_norm": 0.2871337831020355, "learning_rate": 1e-05, "loss": 0.9233, "step": 87885 }, { "epoch": 77.84765279007972, "grad_norm": 0.26960790157318115, "learning_rate": 1e-05, "loss": 0.9454, "step": 87890 }, { "epoch": 77.85208148804251, "grad_norm": 0.2633669376373291, "learning_rate": 1e-05, "loss": 1.0288, "step": 87895 }, { "epoch": 77.85651018600531, "grad_norm": 0.22795478999614716, "learning_rate": 1e-05, "loss": 0.9833, "step": 87900 }, { "epoch": 77.86093888396812, "grad_norm": 0.22482727468013763, "learning_rate": 1e-05, "loss": 0.9456, "step": 87905 }, { "epoch": 77.86536758193091, "grad_norm": 0.21699407696723938, "learning_rate": 1e-05, "loss": 0.9231, "step": 87910 }, { "epoch": 77.8697962798937, "grad_norm": 0.22380073368549347, "learning_rate": 1e-05, "loss": 0.9945, "step": 87915 }, { "epoch": 77.87422497785651, "grad_norm": 0.2231159210205078, "learning_rate": 1e-05, "loss": 0.9665, "step": 87920 }, { "epoch": 77.87865367581931, "grad_norm": 0.22316156327724457, "learning_rate": 1e-05, "loss": 0.9373, "step": 87925 }, { "epoch": 77.8830823737821, "grad_norm": 0.24263930320739746, "learning_rate": 1e-05, "loss": 0.9785, "step": 87930 }, { "epoch": 77.88751107174491, "grad_norm": 0.22376778721809387, "learning_rate": 1e-05, "loss": 0.991, "step": 87935 }, { "epoch": 77.8919397697077, "grad_norm": 0.23587343096733093, "learning_rate": 1e-05, "loss": 0.9388, "step": 87940 }, { "epoch": 77.8963684676705, "grad_norm": 0.23348736763000488, "learning_rate": 1e-05, "loss": 0.9704, "step": 87945 }, { "epoch": 77.9007971656333, "grad_norm": 0.2668285071849823, "learning_rate": 1e-05, "loss": 1.0384, "step": 87950 }, { "epoch": 77.9052258635961, "grad_norm": 0.24204878509044647, "learning_rate": 1e-05, "loss": 0.9275, "step": 87955 }, { "epoch": 77.9096545615589, "grad_norm": 0.24683523178100586, "learning_rate": 1e-05, "loss": 0.951, "step": 87960 }, { "epoch": 77.9140832595217, "grad_norm": 0.27237313985824585, "learning_rate": 1e-05, "loss": 0.983, "step": 87965 }, { "epoch": 77.9185119574845, "grad_norm": 0.220925971865654, "learning_rate": 1e-05, "loss": 0.9644, "step": 87970 }, { "epoch": 77.92294065544729, "grad_norm": 0.2781432271003723, "learning_rate": 1e-05, "loss": 0.993, "step": 87975 }, { "epoch": 77.9273693534101, "grad_norm": 0.22386308014392853, "learning_rate": 1e-05, "loss": 1.0352, "step": 87980 }, { "epoch": 77.9317980513729, "grad_norm": 0.2146822065114975, "learning_rate": 1e-05, "loss": 0.9846, "step": 87985 }, { "epoch": 77.9362267493357, "grad_norm": 0.21760590374469757, "learning_rate": 1e-05, "loss": 0.948, "step": 87990 }, { "epoch": 77.9406554472985, "grad_norm": 0.23479552567005157, "learning_rate": 1e-05, "loss": 0.977, "step": 87995 }, { "epoch": 77.94508414526129, "grad_norm": 0.2356594353914261, "learning_rate": 1e-05, "loss": 0.9182, "step": 88000 }, { "epoch": 77.9495128432241, "grad_norm": 0.261869877576828, "learning_rate": 1e-05, "loss": 0.9725, "step": 88005 }, { "epoch": 77.9539415411869, "grad_norm": 0.2639724314212799, "learning_rate": 1e-05, "loss": 0.9662, "step": 88010 }, { "epoch": 77.95837023914969, "grad_norm": 0.2590380012989044, "learning_rate": 1e-05, "loss": 0.9755, "step": 88015 }, { "epoch": 77.9627989371125, "grad_norm": 0.2274603396654129, "learning_rate": 1e-05, "loss": 1.0029, "step": 88020 }, { "epoch": 77.96722763507529, "grad_norm": 0.2468695491552353, "learning_rate": 1e-05, "loss": 0.9518, "step": 88025 }, { "epoch": 77.97165633303808, "grad_norm": 0.2372131049633026, "learning_rate": 1e-05, "loss": 0.981, "step": 88030 }, { "epoch": 77.97608503100089, "grad_norm": 0.22166943550109863, "learning_rate": 1e-05, "loss": 0.9677, "step": 88035 }, { "epoch": 77.98051372896369, "grad_norm": 0.23885931074619293, "learning_rate": 1e-05, "loss": 0.9721, "step": 88040 }, { "epoch": 77.98494242692648, "grad_norm": 0.23855599761009216, "learning_rate": 1e-05, "loss": 1.0206, "step": 88045 }, { "epoch": 77.98937112488929, "grad_norm": 0.22656738758087158, "learning_rate": 1e-05, "loss": 0.9248, "step": 88050 }, { "epoch": 77.99379982285208, "grad_norm": 0.2272365689277649, "learning_rate": 1e-05, "loss": 0.9926, "step": 88055 }, { "epoch": 77.99822852081488, "grad_norm": 0.24719080328941345, "learning_rate": 1e-05, "loss": 0.9562, "step": 88060 }, { "epoch": 78.00265721877768, "grad_norm": 0.21056294441223145, "learning_rate": 1e-05, "loss": 0.98, "step": 88065 }, { "epoch": 78.00708591674048, "grad_norm": 0.21819378435611725, "learning_rate": 1e-05, "loss": 0.9226, "step": 88070 }, { "epoch": 78.01151461470327, "grad_norm": 0.23172403872013092, "learning_rate": 1e-05, "loss": 0.9361, "step": 88075 }, { "epoch": 78.01594331266608, "grad_norm": 0.2348961979150772, "learning_rate": 1e-05, "loss": 0.9324, "step": 88080 }, { "epoch": 78.02037201062888, "grad_norm": 0.25585222244262695, "learning_rate": 1e-05, "loss": 0.9818, "step": 88085 }, { "epoch": 78.02480070859167, "grad_norm": 0.2880314290523529, "learning_rate": 1e-05, "loss": 0.9823, "step": 88090 }, { "epoch": 78.02922940655448, "grad_norm": 0.3272036612033844, "learning_rate": 1e-05, "loss": 0.924, "step": 88095 }, { "epoch": 78.03365810451727, "grad_norm": 0.2883589565753937, "learning_rate": 1e-05, "loss": 0.971, "step": 88100 }, { "epoch": 78.03808680248007, "grad_norm": 0.2714685797691345, "learning_rate": 1e-05, "loss": 0.9735, "step": 88105 }, { "epoch": 78.04251550044287, "grad_norm": 0.24066340923309326, "learning_rate": 1e-05, "loss": 0.9515, "step": 88110 }, { "epoch": 78.04694419840567, "grad_norm": 0.23197868466377258, "learning_rate": 1e-05, "loss": 0.9124, "step": 88115 }, { "epoch": 78.05137289636846, "grad_norm": 0.21554987132549286, "learning_rate": 1e-05, "loss": 0.9923, "step": 88120 }, { "epoch": 78.05580159433127, "grad_norm": 0.2372073233127594, "learning_rate": 1e-05, "loss": 0.9985, "step": 88125 }, { "epoch": 78.06023029229407, "grad_norm": 0.24696719646453857, "learning_rate": 1e-05, "loss": 0.9398, "step": 88130 }, { "epoch": 78.06465899025686, "grad_norm": 0.26418161392211914, "learning_rate": 1e-05, "loss": 0.9765, "step": 88135 }, { "epoch": 78.06908768821967, "grad_norm": 0.2248615324497223, "learning_rate": 1e-05, "loss": 1.0026, "step": 88140 }, { "epoch": 78.07351638618246, "grad_norm": 0.2294880449771881, "learning_rate": 1e-05, "loss": 0.9963, "step": 88145 }, { "epoch": 78.07794508414526, "grad_norm": 0.2494615912437439, "learning_rate": 1e-05, "loss": 0.9673, "step": 88150 }, { "epoch": 78.08237378210806, "grad_norm": 0.25067639350891113, "learning_rate": 1e-05, "loss": 0.9937, "step": 88155 }, { "epoch": 78.08680248007086, "grad_norm": 0.27057117223739624, "learning_rate": 1e-05, "loss": 0.9951, "step": 88160 }, { "epoch": 78.09123117803365, "grad_norm": 0.2219180017709732, "learning_rate": 1e-05, "loss": 1.0089, "step": 88165 }, { "epoch": 78.09565987599646, "grad_norm": 0.2449643760919571, "learning_rate": 1e-05, "loss": 0.9489, "step": 88170 }, { "epoch": 78.10008857395925, "grad_norm": 0.24394257366657257, "learning_rate": 1e-05, "loss": 0.9803, "step": 88175 }, { "epoch": 78.10451727192205, "grad_norm": 0.28916069865226746, "learning_rate": 1e-05, "loss": 0.9278, "step": 88180 }, { "epoch": 78.10894596988486, "grad_norm": 0.21044951677322388, "learning_rate": 1e-05, "loss": 0.9677, "step": 88185 }, { "epoch": 78.11337466784765, "grad_norm": 0.21388471126556396, "learning_rate": 1e-05, "loss": 0.9349, "step": 88190 }, { "epoch": 78.11780336581045, "grad_norm": 0.26471683382987976, "learning_rate": 1e-05, "loss": 1.0282, "step": 88195 }, { "epoch": 78.12223206377325, "grad_norm": 0.23909151554107666, "learning_rate": 1e-05, "loss": 0.9921, "step": 88200 }, { "epoch": 78.12666076173605, "grad_norm": 0.2227104902267456, "learning_rate": 1e-05, "loss": 0.9578, "step": 88205 }, { "epoch": 78.13108945969884, "grad_norm": 0.2222926765680313, "learning_rate": 1e-05, "loss": 0.9017, "step": 88210 }, { "epoch": 78.13551815766165, "grad_norm": 0.27274656295776367, "learning_rate": 1e-05, "loss": 0.983, "step": 88215 }, { "epoch": 78.13994685562444, "grad_norm": 0.2666155695915222, "learning_rate": 1e-05, "loss": 0.9469, "step": 88220 }, { "epoch": 78.14437555358724, "grad_norm": 0.2284383624792099, "learning_rate": 1e-05, "loss": 0.9464, "step": 88225 }, { "epoch": 78.14880425155005, "grad_norm": 0.22449897229671478, "learning_rate": 1e-05, "loss": 0.9629, "step": 88230 }, { "epoch": 78.15323294951284, "grad_norm": 0.24238385260105133, "learning_rate": 1e-05, "loss": 0.9587, "step": 88235 }, { "epoch": 78.15766164747565, "grad_norm": 0.23849505186080933, "learning_rate": 1e-05, "loss": 0.9343, "step": 88240 }, { "epoch": 78.16209034543844, "grad_norm": 0.21940478682518005, "learning_rate": 1e-05, "loss": 1.0036, "step": 88245 }, { "epoch": 78.16651904340124, "grad_norm": 0.2441585212945938, "learning_rate": 1e-05, "loss": 0.9276, "step": 88250 }, { "epoch": 78.17094774136405, "grad_norm": 0.2955837845802307, "learning_rate": 1e-05, "loss": 0.9471, "step": 88255 }, { "epoch": 78.17537643932684, "grad_norm": 0.2114020437002182, "learning_rate": 1e-05, "loss": 0.9436, "step": 88260 }, { "epoch": 78.17980513728963, "grad_norm": 0.27439990639686584, "learning_rate": 1e-05, "loss": 0.9507, "step": 88265 }, { "epoch": 78.18423383525244, "grad_norm": 0.21725498139858246, "learning_rate": 1e-05, "loss": 0.9804, "step": 88270 }, { "epoch": 78.18866253321524, "grad_norm": 0.2442336231470108, "learning_rate": 1e-05, "loss": 0.995, "step": 88275 }, { "epoch": 78.19309123117803, "grad_norm": 0.2618340253829956, "learning_rate": 1e-05, "loss": 0.9683, "step": 88280 }, { "epoch": 78.19751992914084, "grad_norm": 0.25004422664642334, "learning_rate": 1e-05, "loss": 0.9412, "step": 88285 }, { "epoch": 78.20194862710363, "grad_norm": 0.2553393840789795, "learning_rate": 1e-05, "loss": 0.9509, "step": 88290 }, { "epoch": 78.20637732506643, "grad_norm": 0.23919129371643066, "learning_rate": 1e-05, "loss": 0.9431, "step": 88295 }, { "epoch": 78.21080602302924, "grad_norm": 0.2585943043231964, "learning_rate": 1e-05, "loss": 0.9599, "step": 88300 }, { "epoch": 78.21523472099203, "grad_norm": 0.2585223317146301, "learning_rate": 1e-05, "loss": 0.9545, "step": 88305 }, { "epoch": 78.21966341895482, "grad_norm": 0.22464485466480255, "learning_rate": 1e-05, "loss": 0.9155, "step": 88310 }, { "epoch": 78.22409211691763, "grad_norm": 0.22997532784938812, "learning_rate": 1e-05, "loss": 0.9691, "step": 88315 }, { "epoch": 78.22852081488043, "grad_norm": 0.24111147224903107, "learning_rate": 1e-05, "loss": 0.9556, "step": 88320 }, { "epoch": 78.23294951284322, "grad_norm": 0.25492793321609497, "learning_rate": 1e-05, "loss": 0.9362, "step": 88325 }, { "epoch": 78.23737821080603, "grad_norm": 0.23459693789482117, "learning_rate": 1e-05, "loss": 0.9208, "step": 88330 }, { "epoch": 78.24180690876882, "grad_norm": 0.2433767020702362, "learning_rate": 1e-05, "loss": 1.0233, "step": 88335 }, { "epoch": 78.24623560673162, "grad_norm": 0.23111091554164886, "learning_rate": 1e-05, "loss": 1.013, "step": 88340 }, { "epoch": 78.25066430469442, "grad_norm": 0.2255808711051941, "learning_rate": 1e-05, "loss": 0.9904, "step": 88345 }, { "epoch": 78.25509300265722, "grad_norm": 0.2177070677280426, "learning_rate": 1e-05, "loss": 0.9154, "step": 88350 }, { "epoch": 78.25952170062001, "grad_norm": 0.22173449397087097, "learning_rate": 1e-05, "loss": 0.9954, "step": 88355 }, { "epoch": 78.26395039858282, "grad_norm": 0.22008760273456573, "learning_rate": 1e-05, "loss": 0.9787, "step": 88360 }, { "epoch": 78.26837909654562, "grad_norm": 0.22661566734313965, "learning_rate": 1e-05, "loss": 0.9389, "step": 88365 }, { "epoch": 78.27280779450841, "grad_norm": 0.2508631646633148, "learning_rate": 1e-05, "loss": 0.9017, "step": 88370 }, { "epoch": 78.27723649247122, "grad_norm": 0.25236210227012634, "learning_rate": 1e-05, "loss": 0.9418, "step": 88375 }, { "epoch": 78.28166519043401, "grad_norm": 0.20679466426372528, "learning_rate": 1e-05, "loss": 0.9354, "step": 88380 }, { "epoch": 78.2860938883968, "grad_norm": 0.21976430714130402, "learning_rate": 1e-05, "loss": 0.9033, "step": 88385 }, { "epoch": 78.29052258635961, "grad_norm": 0.3356921374797821, "learning_rate": 1e-05, "loss": 0.9323, "step": 88390 }, { "epoch": 78.29495128432241, "grad_norm": 0.25843536853790283, "learning_rate": 1e-05, "loss": 0.9223, "step": 88395 }, { "epoch": 78.2993799822852, "grad_norm": 0.22746503353118896, "learning_rate": 1e-05, "loss": 0.9679, "step": 88400 }, { "epoch": 78.30380868024801, "grad_norm": 0.22590909898281097, "learning_rate": 1e-05, "loss": 0.9725, "step": 88405 }, { "epoch": 78.3082373782108, "grad_norm": 0.2101229727268219, "learning_rate": 1e-05, "loss": 0.9509, "step": 88410 }, { "epoch": 78.3126660761736, "grad_norm": 0.25208696722984314, "learning_rate": 1e-05, "loss": 0.9731, "step": 88415 }, { "epoch": 78.31709477413641, "grad_norm": 0.2421363741159439, "learning_rate": 1e-05, "loss": 0.933, "step": 88420 }, { "epoch": 78.3215234720992, "grad_norm": 0.22763632237911224, "learning_rate": 1e-05, "loss": 0.9593, "step": 88425 }, { "epoch": 78.325952170062, "grad_norm": 0.2504916787147522, "learning_rate": 1e-05, "loss": 0.9534, "step": 88430 }, { "epoch": 78.3303808680248, "grad_norm": 0.24096882343292236, "learning_rate": 1e-05, "loss": 0.9407, "step": 88435 }, { "epoch": 78.3348095659876, "grad_norm": 0.22622767090797424, "learning_rate": 1e-05, "loss": 0.9321, "step": 88440 }, { "epoch": 78.33923826395039, "grad_norm": 0.2609744966030121, "learning_rate": 1e-05, "loss": 0.9625, "step": 88445 }, { "epoch": 78.3436669619132, "grad_norm": 0.2167573720216751, "learning_rate": 1e-05, "loss": 0.9244, "step": 88450 }, { "epoch": 78.348095659876, "grad_norm": 0.1996101289987564, "learning_rate": 1e-05, "loss": 0.9474, "step": 88455 }, { "epoch": 78.35252435783879, "grad_norm": 0.22567802667617798, "learning_rate": 1e-05, "loss": 0.9113, "step": 88460 }, { "epoch": 78.3569530558016, "grad_norm": 0.2525363266468048, "learning_rate": 1e-05, "loss": 0.9337, "step": 88465 }, { "epoch": 78.36138175376439, "grad_norm": 0.23195043206214905, "learning_rate": 1e-05, "loss": 0.9696, "step": 88470 }, { "epoch": 78.36581045172719, "grad_norm": 0.21244372427463531, "learning_rate": 1e-05, "loss": 0.975, "step": 88475 }, { "epoch": 78.37023914969, "grad_norm": 0.25897324085235596, "learning_rate": 1e-05, "loss": 0.9632, "step": 88480 }, { "epoch": 78.37466784765279, "grad_norm": 0.27224600315093994, "learning_rate": 1e-05, "loss": 0.9384, "step": 88485 }, { "epoch": 78.3790965456156, "grad_norm": 0.27718424797058105, "learning_rate": 1e-05, "loss": 0.9446, "step": 88490 }, { "epoch": 78.38352524357839, "grad_norm": 0.26623815298080444, "learning_rate": 1e-05, "loss": 0.9622, "step": 88495 }, { "epoch": 78.38795394154118, "grad_norm": 0.23037631809711456, "learning_rate": 1e-05, "loss": 0.9928, "step": 88500 }, { "epoch": 78.39238263950399, "grad_norm": 0.22158603370189667, "learning_rate": 1e-05, "loss": 0.9467, "step": 88505 }, { "epoch": 78.39681133746679, "grad_norm": 0.22727711498737335, "learning_rate": 1e-05, "loss": 0.9687, "step": 88510 }, { "epoch": 78.40124003542958, "grad_norm": 0.2231655716896057, "learning_rate": 1e-05, "loss": 0.966, "step": 88515 }, { "epoch": 78.40566873339239, "grad_norm": 0.25223758816719055, "learning_rate": 1e-05, "loss": 0.9402, "step": 88520 }, { "epoch": 78.41009743135518, "grad_norm": 0.24551533162593842, "learning_rate": 1e-05, "loss": 0.9856, "step": 88525 }, { "epoch": 78.41452612931798, "grad_norm": 0.266607403755188, "learning_rate": 1e-05, "loss": 0.9602, "step": 88530 }, { "epoch": 78.41895482728079, "grad_norm": 0.234235018491745, "learning_rate": 1e-05, "loss": 0.9724, "step": 88535 }, { "epoch": 78.42338352524358, "grad_norm": 0.22250519692897797, "learning_rate": 1e-05, "loss": 0.9062, "step": 88540 }, { "epoch": 78.42781222320637, "grad_norm": 0.2604774534702301, "learning_rate": 1e-05, "loss": 0.9268, "step": 88545 }, { "epoch": 78.43224092116918, "grad_norm": 0.24216750264167786, "learning_rate": 1e-05, "loss": 0.9265, "step": 88550 }, { "epoch": 78.43666961913198, "grad_norm": 0.22254213690757751, "learning_rate": 1e-05, "loss": 0.9764, "step": 88555 }, { "epoch": 78.44109831709477, "grad_norm": 0.2351011037826538, "learning_rate": 1e-05, "loss": 0.9499, "step": 88560 }, { "epoch": 78.44552701505758, "grad_norm": 0.23696167767047882, "learning_rate": 1e-05, "loss": 0.952, "step": 88565 }, { "epoch": 78.44995571302037, "grad_norm": 0.2576257586479187, "learning_rate": 1e-05, "loss": 0.9835, "step": 88570 }, { "epoch": 78.45438441098317, "grad_norm": 0.24720187485218048, "learning_rate": 1e-05, "loss": 0.95, "step": 88575 }, { "epoch": 78.45881310894598, "grad_norm": 0.238107830286026, "learning_rate": 1e-05, "loss": 0.9563, "step": 88580 }, { "epoch": 78.46324180690877, "grad_norm": 0.3204800486564636, "learning_rate": 1e-05, "loss": 1.0321, "step": 88585 }, { "epoch": 78.46767050487156, "grad_norm": 0.24230603873729706, "learning_rate": 1e-05, "loss": 0.9845, "step": 88590 }, { "epoch": 78.47209920283437, "grad_norm": 0.21399644017219543, "learning_rate": 1e-05, "loss": 1.0099, "step": 88595 }, { "epoch": 78.47652790079717, "grad_norm": 0.2422972172498703, "learning_rate": 1e-05, "loss": 0.9583, "step": 88600 }, { "epoch": 78.48095659875996, "grad_norm": 0.24130190908908844, "learning_rate": 1e-05, "loss": 0.9508, "step": 88605 }, { "epoch": 78.48538529672277, "grad_norm": 0.25818291306495667, "learning_rate": 1e-05, "loss": 0.947, "step": 88610 }, { "epoch": 78.48981399468556, "grad_norm": 0.28703704476356506, "learning_rate": 1e-05, "loss": 1.0217, "step": 88615 }, { "epoch": 78.49424269264836, "grad_norm": 0.22022441029548645, "learning_rate": 1e-05, "loss": 0.9913, "step": 88620 }, { "epoch": 78.49867139061116, "grad_norm": 0.25715476274490356, "learning_rate": 1e-05, "loss": 0.9694, "step": 88625 }, { "epoch": 78.50310008857396, "grad_norm": 0.2681129574775696, "learning_rate": 1e-05, "loss": 0.9673, "step": 88630 }, { "epoch": 78.50752878653675, "grad_norm": 0.24399589002132416, "learning_rate": 1e-05, "loss": 0.9923, "step": 88635 }, { "epoch": 78.51195748449956, "grad_norm": 0.28183767199516296, "learning_rate": 1e-05, "loss": 0.9325, "step": 88640 }, { "epoch": 78.51638618246236, "grad_norm": 0.27202925086021423, "learning_rate": 1e-05, "loss": 0.9332, "step": 88645 }, { "epoch": 78.52081488042515, "grad_norm": 0.2646104693412781, "learning_rate": 1e-05, "loss": 0.899, "step": 88650 }, { "epoch": 78.52524357838796, "grad_norm": 0.24787229299545288, "learning_rate": 1e-05, "loss": 0.9852, "step": 88655 }, { "epoch": 78.52967227635075, "grad_norm": 0.25754332542419434, "learning_rate": 1e-05, "loss": 0.9594, "step": 88660 }, { "epoch": 78.53410097431355, "grad_norm": 0.24591226875782013, "learning_rate": 1e-05, "loss": 0.97, "step": 88665 }, { "epoch": 78.53852967227635, "grad_norm": 0.267474502325058, "learning_rate": 1e-05, "loss": 0.9603, "step": 88670 }, { "epoch": 78.54295837023915, "grad_norm": 0.2907086908817291, "learning_rate": 1e-05, "loss": 0.9451, "step": 88675 }, { "epoch": 78.54738706820194, "grad_norm": 0.2724979817867279, "learning_rate": 1e-05, "loss": 0.9468, "step": 88680 }, { "epoch": 78.55181576616475, "grad_norm": 0.2637103497982025, "learning_rate": 1e-05, "loss": 0.9377, "step": 88685 }, { "epoch": 78.55624446412754, "grad_norm": 0.2124089002609253, "learning_rate": 1e-05, "loss": 0.9496, "step": 88690 }, { "epoch": 78.56067316209034, "grad_norm": 0.2602483034133911, "learning_rate": 1e-05, "loss": 0.954, "step": 88695 }, { "epoch": 78.56510186005315, "grad_norm": 0.23150505125522614, "learning_rate": 1e-05, "loss": 0.9217, "step": 88700 }, { "epoch": 78.56953055801594, "grad_norm": 0.2326960265636444, "learning_rate": 1e-05, "loss": 1.0055, "step": 88705 }, { "epoch": 78.57395925597874, "grad_norm": 0.24118410050868988, "learning_rate": 1e-05, "loss": 0.9587, "step": 88710 }, { "epoch": 78.57838795394154, "grad_norm": 0.24178457260131836, "learning_rate": 1e-05, "loss": 0.9635, "step": 88715 }, { "epoch": 78.58281665190434, "grad_norm": 0.23243384063243866, "learning_rate": 1e-05, "loss": 0.98, "step": 88720 }, { "epoch": 78.58724534986715, "grad_norm": 0.24737320840358734, "learning_rate": 1e-05, "loss": 0.952, "step": 88725 }, { "epoch": 78.59167404782994, "grad_norm": 0.2641957104206085, "learning_rate": 1e-05, "loss": 0.9359, "step": 88730 }, { "epoch": 78.59610274579273, "grad_norm": 0.27690625190734863, "learning_rate": 1e-05, "loss": 0.9368, "step": 88735 }, { "epoch": 78.60053144375554, "grad_norm": 0.23812811076641083, "learning_rate": 1e-05, "loss": 0.9611, "step": 88740 }, { "epoch": 78.60496014171834, "grad_norm": 0.22169415652751923, "learning_rate": 1e-05, "loss": 0.9426, "step": 88745 }, { "epoch": 78.60938883968113, "grad_norm": 0.2443220168352127, "learning_rate": 1e-05, "loss": 0.924, "step": 88750 }, { "epoch": 78.61381753764394, "grad_norm": 0.2266269475221634, "learning_rate": 1e-05, "loss": 0.9633, "step": 88755 }, { "epoch": 78.61824623560673, "grad_norm": 0.2168651968240738, "learning_rate": 1e-05, "loss": 0.9583, "step": 88760 }, { "epoch": 78.62267493356953, "grad_norm": 0.2248748391866684, "learning_rate": 1e-05, "loss": 0.9376, "step": 88765 }, { "epoch": 78.62710363153234, "grad_norm": 0.26974332332611084, "learning_rate": 1e-05, "loss": 0.985, "step": 88770 }, { "epoch": 78.63153232949513, "grad_norm": 0.2683171033859253, "learning_rate": 1e-05, "loss": 1.0393, "step": 88775 }, { "epoch": 78.63596102745792, "grad_norm": 0.23703140020370483, "learning_rate": 1e-05, "loss": 0.9517, "step": 88780 }, { "epoch": 78.64038972542073, "grad_norm": 0.21404774487018585, "learning_rate": 1e-05, "loss": 0.9619, "step": 88785 }, { "epoch": 78.64481842338353, "grad_norm": 0.23793858289718628, "learning_rate": 1e-05, "loss": 0.9847, "step": 88790 }, { "epoch": 78.64924712134632, "grad_norm": 0.23274613916873932, "learning_rate": 1e-05, "loss": 1.0216, "step": 88795 }, { "epoch": 78.65367581930913, "grad_norm": 0.24821344017982483, "learning_rate": 1e-05, "loss": 0.8647, "step": 88800 }, { "epoch": 78.65810451727192, "grad_norm": 0.2392818033695221, "learning_rate": 1e-05, "loss": 0.9937, "step": 88805 }, { "epoch": 78.66253321523472, "grad_norm": 0.290387898683548, "learning_rate": 1e-05, "loss": 0.875, "step": 88810 }, { "epoch": 78.66696191319753, "grad_norm": 0.24052433669567108, "learning_rate": 1e-05, "loss": 0.9956, "step": 88815 }, { "epoch": 78.67139061116032, "grad_norm": 0.24712617695331573, "learning_rate": 1e-05, "loss": 0.9621, "step": 88820 }, { "epoch": 78.67581930912311, "grad_norm": 0.24385656416416168, "learning_rate": 1e-05, "loss": 0.9922, "step": 88825 }, { "epoch": 78.68024800708592, "grad_norm": 0.22187002003192902, "learning_rate": 1e-05, "loss": 0.9753, "step": 88830 }, { "epoch": 78.68467670504872, "grad_norm": 0.27289679646492004, "learning_rate": 1e-05, "loss": 1.001, "step": 88835 }, { "epoch": 78.68910540301151, "grad_norm": 0.2206193059682846, "learning_rate": 1e-05, "loss": 0.9363, "step": 88840 }, { "epoch": 78.69353410097432, "grad_norm": 0.209039106965065, "learning_rate": 1e-05, "loss": 0.954, "step": 88845 }, { "epoch": 78.69796279893711, "grad_norm": 0.24443653225898743, "learning_rate": 1e-05, "loss": 0.9674, "step": 88850 }, { "epoch": 78.7023914968999, "grad_norm": 0.23161883652210236, "learning_rate": 1e-05, "loss": 0.9538, "step": 88855 }, { "epoch": 78.70682019486271, "grad_norm": 0.21250134706497192, "learning_rate": 1e-05, "loss": 0.9805, "step": 88860 }, { "epoch": 78.71124889282551, "grad_norm": 0.2488052397966385, "learning_rate": 1e-05, "loss": 1.0001, "step": 88865 }, { "epoch": 78.7156775907883, "grad_norm": 0.2144518792629242, "learning_rate": 1e-05, "loss": 0.8974, "step": 88870 }, { "epoch": 78.72010628875111, "grad_norm": 0.23893864452838898, "learning_rate": 1e-05, "loss": 0.9853, "step": 88875 }, { "epoch": 78.7245349867139, "grad_norm": 0.24880045652389526, "learning_rate": 1e-05, "loss": 0.9821, "step": 88880 }, { "epoch": 78.7289636846767, "grad_norm": 0.25378575921058655, "learning_rate": 1e-05, "loss": 0.9379, "step": 88885 }, { "epoch": 78.73339238263951, "grad_norm": 0.21793386340141296, "learning_rate": 1e-05, "loss": 0.9945, "step": 88890 }, { "epoch": 78.7378210806023, "grad_norm": 0.24666571617126465, "learning_rate": 1e-05, "loss": 0.9533, "step": 88895 }, { "epoch": 78.7422497785651, "grad_norm": 0.21823570132255554, "learning_rate": 1e-05, "loss": 0.9953, "step": 88900 }, { "epoch": 78.7466784765279, "grad_norm": 0.2418294996023178, "learning_rate": 1e-05, "loss": 0.9753, "step": 88905 }, { "epoch": 78.7511071744907, "grad_norm": 0.20839330554008484, "learning_rate": 1e-05, "loss": 0.9416, "step": 88910 }, { "epoch": 78.75553587245349, "grad_norm": 0.2121295928955078, "learning_rate": 1e-05, "loss": 1.0032, "step": 88915 }, { "epoch": 78.7599645704163, "grad_norm": 0.2436417043209076, "learning_rate": 1e-05, "loss": 1.0352, "step": 88920 }, { "epoch": 78.7643932683791, "grad_norm": 0.2525129020214081, "learning_rate": 1e-05, "loss": 1.0188, "step": 88925 }, { "epoch": 78.76882196634189, "grad_norm": 0.2668607831001282, "learning_rate": 1e-05, "loss": 0.9202, "step": 88930 }, { "epoch": 78.7732506643047, "grad_norm": 0.2485848218202591, "learning_rate": 1e-05, "loss": 0.9468, "step": 88935 }, { "epoch": 78.77767936226749, "grad_norm": 0.3061036467552185, "learning_rate": 1e-05, "loss": 1.0125, "step": 88940 }, { "epoch": 78.78210806023029, "grad_norm": 0.23484215140342712, "learning_rate": 1e-05, "loss": 0.9536, "step": 88945 }, { "epoch": 78.7865367581931, "grad_norm": 0.22473669052124023, "learning_rate": 1e-05, "loss": 0.9184, "step": 88950 }, { "epoch": 78.79096545615589, "grad_norm": 0.2281215339899063, "learning_rate": 1e-05, "loss": 0.9821, "step": 88955 }, { "epoch": 78.79539415411868, "grad_norm": 0.2707948684692383, "learning_rate": 1e-05, "loss": 0.9481, "step": 88960 }, { "epoch": 78.79982285208149, "grad_norm": 0.22884972393512726, "learning_rate": 1e-05, "loss": 0.9736, "step": 88965 }, { "epoch": 78.80425155004428, "grad_norm": 0.238788440823555, "learning_rate": 1e-05, "loss": 0.9935, "step": 88970 }, { "epoch": 78.8086802480071, "grad_norm": 0.320576936006546, "learning_rate": 1e-05, "loss": 0.9439, "step": 88975 }, { "epoch": 78.81310894596989, "grad_norm": 0.2474735826253891, "learning_rate": 1e-05, "loss": 0.9611, "step": 88980 }, { "epoch": 78.81753764393268, "grad_norm": 0.2555488049983978, "learning_rate": 1e-05, "loss": 0.9323, "step": 88985 }, { "epoch": 78.82196634189549, "grad_norm": 0.2540493309497833, "learning_rate": 1e-05, "loss": 0.9756, "step": 88990 }, { "epoch": 78.82639503985828, "grad_norm": 0.2371339201927185, "learning_rate": 1e-05, "loss": 0.9827, "step": 88995 }, { "epoch": 78.83082373782108, "grad_norm": 0.23167012631893158, "learning_rate": 1e-05, "loss": 0.9684, "step": 89000 }, { "epoch": 78.83525243578389, "grad_norm": 0.23635420203208923, "learning_rate": 1e-05, "loss": 0.9387, "step": 89005 }, { "epoch": 78.83968113374668, "grad_norm": 0.2356945425271988, "learning_rate": 1e-05, "loss": 0.9286, "step": 89010 }, { "epoch": 78.84410983170947, "grad_norm": 0.22508975863456726, "learning_rate": 1e-05, "loss": 0.9801, "step": 89015 }, { "epoch": 78.84853852967228, "grad_norm": 0.2304401993751526, "learning_rate": 1e-05, "loss": 0.9857, "step": 89020 }, { "epoch": 78.85296722763508, "grad_norm": 0.2298077642917633, "learning_rate": 1e-05, "loss": 0.9627, "step": 89025 }, { "epoch": 78.85739592559787, "grad_norm": 0.32508131861686707, "learning_rate": 1e-05, "loss": 0.9747, "step": 89030 }, { "epoch": 78.86182462356068, "grad_norm": 0.22980186343193054, "learning_rate": 1e-05, "loss": 1.019, "step": 89035 }, { "epoch": 78.86625332152347, "grad_norm": 0.23897279798984528, "learning_rate": 1e-05, "loss": 0.9533, "step": 89040 }, { "epoch": 78.87068201948627, "grad_norm": 0.2118704915046692, "learning_rate": 1e-05, "loss": 0.9762, "step": 89045 }, { "epoch": 78.87511071744908, "grad_norm": 0.21781527996063232, "learning_rate": 1e-05, "loss": 1.0096, "step": 89050 }, { "epoch": 78.87953941541187, "grad_norm": 0.22839125990867615, "learning_rate": 1e-05, "loss": 0.952, "step": 89055 }, { "epoch": 78.88396811337466, "grad_norm": 0.26798373460769653, "learning_rate": 1e-05, "loss": 0.9763, "step": 89060 }, { "epoch": 78.88839681133747, "grad_norm": 0.2478044480085373, "learning_rate": 1e-05, "loss": 0.9423, "step": 89065 }, { "epoch": 78.89282550930027, "grad_norm": 0.23872819542884827, "learning_rate": 1e-05, "loss": 0.9868, "step": 89070 }, { "epoch": 78.89725420726306, "grad_norm": 0.21047410368919373, "learning_rate": 1e-05, "loss": 0.9621, "step": 89075 }, { "epoch": 78.90168290522587, "grad_norm": 0.26323169469833374, "learning_rate": 1e-05, "loss": 0.992, "step": 89080 }, { "epoch": 78.90611160318866, "grad_norm": 0.2525649070739746, "learning_rate": 1e-05, "loss": 0.9627, "step": 89085 }, { "epoch": 78.91054030115146, "grad_norm": 0.23054446280002594, "learning_rate": 1e-05, "loss": 0.9816, "step": 89090 }, { "epoch": 78.91496899911427, "grad_norm": 0.2454458326101303, "learning_rate": 1e-05, "loss": 0.9557, "step": 89095 }, { "epoch": 78.91939769707706, "grad_norm": 0.21247033774852753, "learning_rate": 1e-05, "loss": 0.9143, "step": 89100 }, { "epoch": 78.92382639503985, "grad_norm": 0.24538767337799072, "learning_rate": 1e-05, "loss": 0.8958, "step": 89105 }, { "epoch": 78.92825509300266, "grad_norm": 0.20474044978618622, "learning_rate": 1e-05, "loss": 0.944, "step": 89110 }, { "epoch": 78.93268379096546, "grad_norm": 0.25186586380004883, "learning_rate": 1e-05, "loss": 0.9782, "step": 89115 }, { "epoch": 78.93711248892825, "grad_norm": 0.19835858047008514, "learning_rate": 1e-05, "loss": 0.9645, "step": 89120 }, { "epoch": 78.94154118689106, "grad_norm": 0.22860778868198395, "learning_rate": 1e-05, "loss": 0.9866, "step": 89125 }, { "epoch": 78.94596988485385, "grad_norm": 0.26187822222709656, "learning_rate": 1e-05, "loss": 0.9856, "step": 89130 }, { "epoch": 78.95039858281665, "grad_norm": 0.2351790964603424, "learning_rate": 1e-05, "loss": 0.9237, "step": 89135 }, { "epoch": 78.95482728077945, "grad_norm": 0.27152693271636963, "learning_rate": 1e-05, "loss": 0.9537, "step": 89140 }, { "epoch": 78.95925597874225, "grad_norm": 0.24576568603515625, "learning_rate": 1e-05, "loss": 0.9982, "step": 89145 }, { "epoch": 78.96368467670504, "grad_norm": 0.21399107575416565, "learning_rate": 1e-05, "loss": 0.9155, "step": 89150 }, { "epoch": 78.96811337466785, "grad_norm": 0.24784275889396667, "learning_rate": 1e-05, "loss": 0.9176, "step": 89155 }, { "epoch": 78.97254207263065, "grad_norm": 0.26372650265693665, "learning_rate": 1e-05, "loss": 0.9697, "step": 89160 }, { "epoch": 78.97697077059344, "grad_norm": 0.2719539403915405, "learning_rate": 1e-05, "loss": 0.9792, "step": 89165 }, { "epoch": 78.98139946855625, "grad_norm": 0.247012197971344, "learning_rate": 1e-05, "loss": 0.9321, "step": 89170 }, { "epoch": 78.98582816651904, "grad_norm": 0.24623580276966095, "learning_rate": 1e-05, "loss": 0.9571, "step": 89175 }, { "epoch": 78.99025686448184, "grad_norm": 0.27150431275367737, "learning_rate": 1e-05, "loss": 0.9408, "step": 89180 }, { "epoch": 78.99468556244464, "grad_norm": 0.23377108573913574, "learning_rate": 1e-05, "loss": 0.9945, "step": 89185 }, { "epoch": 78.99911426040744, "grad_norm": 0.22719402611255646, "learning_rate": 1e-05, "loss": 0.9441, "step": 89190 }, { "epoch": 79.00354295837023, "grad_norm": 0.2191123515367508, "learning_rate": 1e-05, "loss": 1.0303, "step": 89195 }, { "epoch": 79.00797165633304, "grad_norm": 0.19615575671195984, "learning_rate": 1e-05, "loss": 0.9453, "step": 89200 }, { "epoch": 79.01240035429583, "grad_norm": 0.23900577425956726, "learning_rate": 1e-05, "loss": 0.975, "step": 89205 }, { "epoch": 79.01682905225863, "grad_norm": 0.24274350702762604, "learning_rate": 1e-05, "loss": 0.9781, "step": 89210 }, { "epoch": 79.02125775022144, "grad_norm": 0.23424094915390015, "learning_rate": 1e-05, "loss": 0.9713, "step": 89215 }, { "epoch": 79.02568644818423, "grad_norm": 0.29628610610961914, "learning_rate": 1e-05, "loss": 0.8842, "step": 89220 }, { "epoch": 79.03011514614704, "grad_norm": 0.26593416929244995, "learning_rate": 1e-05, "loss": 0.9944, "step": 89225 }, { "epoch": 79.03454384410983, "grad_norm": 0.2791079580783844, "learning_rate": 1e-05, "loss": 0.9498, "step": 89230 }, { "epoch": 79.03897254207263, "grad_norm": 0.2200455665588379, "learning_rate": 1e-05, "loss": 0.9963, "step": 89235 }, { "epoch": 79.04340124003544, "grad_norm": 0.25736767053604126, "learning_rate": 1e-05, "loss": 0.9485, "step": 89240 }, { "epoch": 79.04782993799823, "grad_norm": 0.24660871922969818, "learning_rate": 1e-05, "loss": 0.9748, "step": 89245 }, { "epoch": 79.05225863596102, "grad_norm": 0.23847870528697968, "learning_rate": 1e-05, "loss": 1.0079, "step": 89250 }, { "epoch": 79.05668733392383, "grad_norm": 0.23797206580638885, "learning_rate": 1e-05, "loss": 0.9612, "step": 89255 }, { "epoch": 79.06111603188663, "grad_norm": 0.24937275052070618, "learning_rate": 1e-05, "loss": 0.9908, "step": 89260 }, { "epoch": 79.06554472984942, "grad_norm": 0.24184469878673553, "learning_rate": 1e-05, "loss": 0.9779, "step": 89265 }, { "epoch": 79.06997342781223, "grad_norm": 0.27600204944610596, "learning_rate": 1e-05, "loss": 0.9651, "step": 89270 }, { "epoch": 79.07440212577502, "grad_norm": 0.24106377363204956, "learning_rate": 1e-05, "loss": 0.9548, "step": 89275 }, { "epoch": 79.07883082373782, "grad_norm": 0.27480489015579224, "learning_rate": 1e-05, "loss": 0.9821, "step": 89280 }, { "epoch": 79.08325952170063, "grad_norm": 0.22212105989456177, "learning_rate": 1e-05, "loss": 0.9508, "step": 89285 }, { "epoch": 79.08768821966342, "grad_norm": 0.26639440655708313, "learning_rate": 1e-05, "loss": 0.9693, "step": 89290 }, { "epoch": 79.09211691762621, "grad_norm": 0.2756419777870178, "learning_rate": 1e-05, "loss": 1.015, "step": 89295 }, { "epoch": 79.09654561558902, "grad_norm": 0.2521350085735321, "learning_rate": 1e-05, "loss": 1.0134, "step": 89300 }, { "epoch": 79.10097431355182, "grad_norm": 0.25528621673583984, "learning_rate": 1e-05, "loss": 0.9108, "step": 89305 }, { "epoch": 79.10540301151461, "grad_norm": 0.25457897782325745, "learning_rate": 1e-05, "loss": 0.948, "step": 89310 }, { "epoch": 79.10983170947742, "grad_norm": 0.24718253314495087, "learning_rate": 1e-05, "loss": 0.9705, "step": 89315 }, { "epoch": 79.11426040744021, "grad_norm": 0.22772423923015594, "learning_rate": 1e-05, "loss": 0.9807, "step": 89320 }, { "epoch": 79.118689105403, "grad_norm": 0.19612038135528564, "learning_rate": 1e-05, "loss": 0.988, "step": 89325 }, { "epoch": 79.12311780336582, "grad_norm": 0.2685089409351349, "learning_rate": 1e-05, "loss": 0.9437, "step": 89330 }, { "epoch": 79.12754650132861, "grad_norm": 0.2330874800682068, "learning_rate": 1e-05, "loss": 0.938, "step": 89335 }, { "epoch": 79.1319751992914, "grad_norm": 0.2725229859352112, "learning_rate": 1e-05, "loss": 0.9184, "step": 89340 }, { "epoch": 79.13640389725421, "grad_norm": 0.24011482298374176, "learning_rate": 1e-05, "loss": 0.9971, "step": 89345 }, { "epoch": 79.140832595217, "grad_norm": 0.24903951585292816, "learning_rate": 1e-05, "loss": 0.9593, "step": 89350 }, { "epoch": 79.1452612931798, "grad_norm": 0.24760718643665314, "learning_rate": 1e-05, "loss": 0.9386, "step": 89355 }, { "epoch": 79.14968999114261, "grad_norm": 0.27038514614105225, "learning_rate": 1e-05, "loss": 1.0136, "step": 89360 }, { "epoch": 79.1541186891054, "grad_norm": 0.19365015625953674, "learning_rate": 1e-05, "loss": 0.9902, "step": 89365 }, { "epoch": 79.1585473870682, "grad_norm": 0.24248088896274567, "learning_rate": 1e-05, "loss": 0.9846, "step": 89370 }, { "epoch": 79.162976085031, "grad_norm": 0.272960901260376, "learning_rate": 1e-05, "loss": 0.9363, "step": 89375 }, { "epoch": 79.1674047829938, "grad_norm": 0.22384007275104523, "learning_rate": 1e-05, "loss": 0.9834, "step": 89380 }, { "epoch": 79.1718334809566, "grad_norm": 0.19956332445144653, "learning_rate": 1e-05, "loss": 0.9416, "step": 89385 }, { "epoch": 79.1762621789194, "grad_norm": 0.2596125900745392, "learning_rate": 1e-05, "loss": 0.9777, "step": 89390 }, { "epoch": 79.1806908768822, "grad_norm": 0.2166665643453598, "learning_rate": 1e-05, "loss": 0.9611, "step": 89395 }, { "epoch": 79.18511957484499, "grad_norm": 0.22437381744384766, "learning_rate": 1e-05, "loss": 0.9603, "step": 89400 }, { "epoch": 79.1895482728078, "grad_norm": 0.25173407793045044, "learning_rate": 1e-05, "loss": 0.9807, "step": 89405 }, { "epoch": 79.19397697077059, "grad_norm": 0.21334417164325714, "learning_rate": 1e-05, "loss": 0.8795, "step": 89410 }, { "epoch": 79.19840566873339, "grad_norm": 0.2467496395111084, "learning_rate": 1e-05, "loss": 0.9284, "step": 89415 }, { "epoch": 79.2028343666962, "grad_norm": 0.23701106011867523, "learning_rate": 1e-05, "loss": 1.0608, "step": 89420 }, { "epoch": 79.20726306465899, "grad_norm": 0.2276698350906372, "learning_rate": 1e-05, "loss": 0.9613, "step": 89425 }, { "epoch": 79.21169176262178, "grad_norm": 0.2324695736169815, "learning_rate": 1e-05, "loss": 1.0041, "step": 89430 }, { "epoch": 79.21612046058459, "grad_norm": 0.27067631483078003, "learning_rate": 1e-05, "loss": 0.9839, "step": 89435 }, { "epoch": 79.22054915854739, "grad_norm": 0.25942954421043396, "learning_rate": 1e-05, "loss": 0.961, "step": 89440 }, { "epoch": 79.22497785651018, "grad_norm": 0.2337646782398224, "learning_rate": 1e-05, "loss": 0.9695, "step": 89445 }, { "epoch": 79.22940655447299, "grad_norm": 0.2337169498205185, "learning_rate": 1e-05, "loss": 0.9335, "step": 89450 }, { "epoch": 79.23383525243578, "grad_norm": 0.23848369717597961, "learning_rate": 1e-05, "loss": 0.9876, "step": 89455 }, { "epoch": 79.23826395039858, "grad_norm": 0.23774364590644836, "learning_rate": 1e-05, "loss": 0.9783, "step": 89460 }, { "epoch": 79.24269264836138, "grad_norm": 0.21404683589935303, "learning_rate": 1e-05, "loss": 0.9754, "step": 89465 }, { "epoch": 79.24712134632418, "grad_norm": 0.2269837111234665, "learning_rate": 1e-05, "loss": 0.9538, "step": 89470 }, { "epoch": 79.25155004428699, "grad_norm": 0.2135545313358307, "learning_rate": 1e-05, "loss": 0.9245, "step": 89475 }, { "epoch": 79.25597874224978, "grad_norm": 0.23130248486995697, "learning_rate": 1e-05, "loss": 0.975, "step": 89480 }, { "epoch": 79.26040744021257, "grad_norm": 0.29714348912239075, "learning_rate": 1e-05, "loss": 0.9987, "step": 89485 }, { "epoch": 79.26483613817538, "grad_norm": 0.2454877495765686, "learning_rate": 1e-05, "loss": 0.9802, "step": 89490 }, { "epoch": 79.26926483613818, "grad_norm": 0.21029037237167358, "learning_rate": 1e-05, "loss": 0.9441, "step": 89495 }, { "epoch": 79.27369353410097, "grad_norm": 0.265084445476532, "learning_rate": 1e-05, "loss": 0.9444, "step": 89500 }, { "epoch": 79.27812223206378, "grad_norm": 0.282025009393692, "learning_rate": 1e-05, "loss": 0.9966, "step": 89505 }, { "epoch": 79.28255093002657, "grad_norm": 0.24618028104305267, "learning_rate": 1e-05, "loss": 0.9553, "step": 89510 }, { "epoch": 79.28697962798937, "grad_norm": 0.21301627159118652, "learning_rate": 1e-05, "loss": 0.9618, "step": 89515 }, { "epoch": 79.29140832595218, "grad_norm": 0.22592532634735107, "learning_rate": 1e-05, "loss": 0.9138, "step": 89520 }, { "epoch": 79.29583702391497, "grad_norm": 0.21918508410453796, "learning_rate": 1e-05, "loss": 0.9491, "step": 89525 }, { "epoch": 79.30026572187776, "grad_norm": 0.24886052310466766, "learning_rate": 1e-05, "loss": 0.9596, "step": 89530 }, { "epoch": 79.30469441984057, "grad_norm": 0.26327425241470337, "learning_rate": 1e-05, "loss": 0.9506, "step": 89535 }, { "epoch": 79.30912311780337, "grad_norm": 0.23520930111408234, "learning_rate": 1e-05, "loss": 0.9842, "step": 89540 }, { "epoch": 79.31355181576616, "grad_norm": 0.2018265277147293, "learning_rate": 1e-05, "loss": 0.9395, "step": 89545 }, { "epoch": 79.31798051372897, "grad_norm": 0.2244083285331726, "learning_rate": 1e-05, "loss": 0.9579, "step": 89550 }, { "epoch": 79.32240921169176, "grad_norm": 0.2708916664123535, "learning_rate": 1e-05, "loss": 0.9346, "step": 89555 }, { "epoch": 79.32683790965456, "grad_norm": 0.23382501304149628, "learning_rate": 1e-05, "loss": 0.9437, "step": 89560 }, { "epoch": 79.33126660761737, "grad_norm": 0.22604914009571075, "learning_rate": 1e-05, "loss": 0.933, "step": 89565 }, { "epoch": 79.33569530558016, "grad_norm": 0.2737271785736084, "learning_rate": 1e-05, "loss": 0.9284, "step": 89570 }, { "epoch": 79.34012400354295, "grad_norm": 0.21776720881462097, "learning_rate": 1e-05, "loss": 0.9279, "step": 89575 }, { "epoch": 79.34455270150576, "grad_norm": 0.25181880593299866, "learning_rate": 1e-05, "loss": 0.9387, "step": 89580 }, { "epoch": 79.34898139946856, "grad_norm": 0.24273978173732758, "learning_rate": 1e-05, "loss": 0.9659, "step": 89585 }, { "epoch": 79.35341009743135, "grad_norm": 0.26046350598335266, "learning_rate": 1e-05, "loss": 0.9682, "step": 89590 }, { "epoch": 79.35783879539416, "grad_norm": 0.2510797083377838, "learning_rate": 1e-05, "loss": 0.9363, "step": 89595 }, { "epoch": 79.36226749335695, "grad_norm": 0.22702626883983612, "learning_rate": 1e-05, "loss": 0.942, "step": 89600 }, { "epoch": 79.36669619131975, "grad_norm": 0.28290918469429016, "learning_rate": 1e-05, "loss": 0.9646, "step": 89605 }, { "epoch": 79.37112488928256, "grad_norm": 0.23685164749622345, "learning_rate": 1e-05, "loss": 0.9624, "step": 89610 }, { "epoch": 79.37555358724535, "grad_norm": 0.25025680661201477, "learning_rate": 1e-05, "loss": 0.9588, "step": 89615 }, { "epoch": 79.37998228520814, "grad_norm": 0.2567918598651886, "learning_rate": 1e-05, "loss": 0.9435, "step": 89620 }, { "epoch": 79.38441098317095, "grad_norm": 0.2924160361289978, "learning_rate": 1e-05, "loss": 0.9351, "step": 89625 }, { "epoch": 79.38883968113375, "grad_norm": 0.30412012338638306, "learning_rate": 1e-05, "loss": 0.8983, "step": 89630 }, { "epoch": 79.39326837909654, "grad_norm": 0.20977526903152466, "learning_rate": 1e-05, "loss": 0.9627, "step": 89635 }, { "epoch": 79.39769707705935, "grad_norm": 0.2971060872077942, "learning_rate": 1e-05, "loss": 0.9303, "step": 89640 }, { "epoch": 79.40212577502214, "grad_norm": 0.22963666915893555, "learning_rate": 1e-05, "loss": 0.9754, "step": 89645 }, { "epoch": 79.40655447298494, "grad_norm": 0.2330242544412613, "learning_rate": 1e-05, "loss": 0.9614, "step": 89650 }, { "epoch": 79.41098317094774, "grad_norm": 0.21970553696155548, "learning_rate": 1e-05, "loss": 0.9832, "step": 89655 }, { "epoch": 79.41541186891054, "grad_norm": 0.22707703709602356, "learning_rate": 1e-05, "loss": 0.9433, "step": 89660 }, { "epoch": 79.41984056687333, "grad_norm": 0.21161366999149323, "learning_rate": 1e-05, "loss": 0.9544, "step": 89665 }, { "epoch": 79.42426926483614, "grad_norm": 0.2178703248500824, "learning_rate": 1e-05, "loss": 0.9681, "step": 89670 }, { "epoch": 79.42869796279894, "grad_norm": 0.23282155394554138, "learning_rate": 1e-05, "loss": 0.9196, "step": 89675 }, { "epoch": 79.43312666076173, "grad_norm": 0.2014344334602356, "learning_rate": 1e-05, "loss": 0.9444, "step": 89680 }, { "epoch": 79.43755535872454, "grad_norm": 0.2859559953212738, "learning_rate": 1e-05, "loss": 0.9345, "step": 89685 }, { "epoch": 79.44198405668733, "grad_norm": 0.23912128806114197, "learning_rate": 1e-05, "loss": 0.9854, "step": 89690 }, { "epoch": 79.44641275465013, "grad_norm": 0.23863229155540466, "learning_rate": 1e-05, "loss": 0.9551, "step": 89695 }, { "epoch": 79.45084145261293, "grad_norm": 0.23816151916980743, "learning_rate": 1e-05, "loss": 0.9801, "step": 89700 }, { "epoch": 79.45527015057573, "grad_norm": 0.2225727140903473, "learning_rate": 1e-05, "loss": 0.9447, "step": 89705 }, { "epoch": 79.45969884853854, "grad_norm": 0.24674266576766968, "learning_rate": 1e-05, "loss": 0.9509, "step": 89710 }, { "epoch": 79.46412754650133, "grad_norm": 0.23209114372730255, "learning_rate": 1e-05, "loss": 0.9776, "step": 89715 }, { "epoch": 79.46855624446412, "grad_norm": 0.24304057657718658, "learning_rate": 1e-05, "loss": 0.9438, "step": 89720 }, { "epoch": 79.47298494242693, "grad_norm": 0.2184872329235077, "learning_rate": 1e-05, "loss": 0.9256, "step": 89725 }, { "epoch": 79.47741364038973, "grad_norm": 0.23874977231025696, "learning_rate": 1e-05, "loss": 0.9177, "step": 89730 }, { "epoch": 79.48184233835252, "grad_norm": 0.24227285385131836, "learning_rate": 1e-05, "loss": 0.991, "step": 89735 }, { "epoch": 79.48627103631533, "grad_norm": 0.23455865681171417, "learning_rate": 1e-05, "loss": 0.9615, "step": 89740 }, { "epoch": 79.49069973427812, "grad_norm": 0.197706401348114, "learning_rate": 1e-05, "loss": 0.957, "step": 89745 }, { "epoch": 79.49512843224092, "grad_norm": 0.2293943166732788, "learning_rate": 1e-05, "loss": 0.9367, "step": 89750 }, { "epoch": 79.49955713020373, "grad_norm": 0.24075208604335785, "learning_rate": 1e-05, "loss": 0.9954, "step": 89755 }, { "epoch": 79.50398582816652, "grad_norm": 0.28364071249961853, "learning_rate": 1e-05, "loss": 0.9529, "step": 89760 }, { "epoch": 79.50841452612931, "grad_norm": 0.24988830089569092, "learning_rate": 1e-05, "loss": 0.9554, "step": 89765 }, { "epoch": 79.51284322409212, "grad_norm": 0.23506776988506317, "learning_rate": 1e-05, "loss": 0.9618, "step": 89770 }, { "epoch": 79.51727192205492, "grad_norm": 0.2698654234409332, "learning_rate": 1e-05, "loss": 0.9353, "step": 89775 }, { "epoch": 79.52170062001771, "grad_norm": 0.24132831394672394, "learning_rate": 1e-05, "loss": 0.9928, "step": 89780 }, { "epoch": 79.52612931798052, "grad_norm": 0.25501906871795654, "learning_rate": 1e-05, "loss": 0.9596, "step": 89785 }, { "epoch": 79.53055801594331, "grad_norm": 0.250794917345047, "learning_rate": 1e-05, "loss": 0.9942, "step": 89790 }, { "epoch": 79.53498671390611, "grad_norm": 0.20638127624988556, "learning_rate": 1e-05, "loss": 1.0012, "step": 89795 }, { "epoch": 79.53941541186892, "grad_norm": 0.20616641640663147, "learning_rate": 1e-05, "loss": 0.9404, "step": 89800 }, { "epoch": 79.54384410983171, "grad_norm": 0.25351670384407043, "learning_rate": 1e-05, "loss": 0.922, "step": 89805 }, { "epoch": 79.5482728077945, "grad_norm": 0.22439146041870117, "learning_rate": 1e-05, "loss": 0.9509, "step": 89810 }, { "epoch": 79.55270150575731, "grad_norm": 0.22937269508838654, "learning_rate": 1e-05, "loss": 0.9755, "step": 89815 }, { "epoch": 79.5571302037201, "grad_norm": 0.22090381383895874, "learning_rate": 1e-05, "loss": 0.9691, "step": 89820 }, { "epoch": 79.5615589016829, "grad_norm": 0.25759562849998474, "learning_rate": 1e-05, "loss": 0.9745, "step": 89825 }, { "epoch": 79.56598759964571, "grad_norm": 0.2240026593208313, "learning_rate": 1e-05, "loss": 0.955, "step": 89830 }, { "epoch": 79.5704162976085, "grad_norm": 0.27752652764320374, "learning_rate": 1e-05, "loss": 0.9541, "step": 89835 }, { "epoch": 79.5748449955713, "grad_norm": 0.2386729121208191, "learning_rate": 1e-05, "loss": 0.9407, "step": 89840 }, { "epoch": 79.5792736935341, "grad_norm": 0.22522301971912384, "learning_rate": 1e-05, "loss": 1.0158, "step": 89845 }, { "epoch": 79.5837023914969, "grad_norm": 0.23391591012477875, "learning_rate": 1e-05, "loss": 0.9507, "step": 89850 }, { "epoch": 79.5881310894597, "grad_norm": 0.22013649344444275, "learning_rate": 1e-05, "loss": 0.9876, "step": 89855 }, { "epoch": 79.5925597874225, "grad_norm": 0.2454390972852707, "learning_rate": 1e-05, "loss": 0.9363, "step": 89860 }, { "epoch": 79.5969884853853, "grad_norm": 0.21828554570674896, "learning_rate": 1e-05, "loss": 0.9488, "step": 89865 }, { "epoch": 79.60141718334809, "grad_norm": 0.24717754125595093, "learning_rate": 1e-05, "loss": 0.9475, "step": 89870 }, { "epoch": 79.6058458813109, "grad_norm": 0.23066185414791107, "learning_rate": 1e-05, "loss": 0.9805, "step": 89875 }, { "epoch": 79.61027457927369, "grad_norm": 0.24032637476921082, "learning_rate": 1e-05, "loss": 0.9903, "step": 89880 }, { "epoch": 79.61470327723649, "grad_norm": 0.23051051795482635, "learning_rate": 1e-05, "loss": 0.9741, "step": 89885 }, { "epoch": 79.6191319751993, "grad_norm": 0.23454685509204865, "learning_rate": 1e-05, "loss": 0.9685, "step": 89890 }, { "epoch": 79.62356067316209, "grad_norm": 0.2410600483417511, "learning_rate": 1e-05, "loss": 1.0078, "step": 89895 }, { "epoch": 79.62798937112488, "grad_norm": 0.23189964890480042, "learning_rate": 1e-05, "loss": 0.9952, "step": 89900 }, { "epoch": 79.63241806908769, "grad_norm": 0.26159584522247314, "learning_rate": 1e-05, "loss": 0.9634, "step": 89905 }, { "epoch": 79.63684676705049, "grad_norm": 0.23677869141101837, "learning_rate": 1e-05, "loss": 0.9497, "step": 89910 }, { "epoch": 79.64127546501328, "grad_norm": 0.21713300049304962, "learning_rate": 1e-05, "loss": 1.0329, "step": 89915 }, { "epoch": 79.64570416297609, "grad_norm": 0.20646561682224274, "learning_rate": 1e-05, "loss": 0.9233, "step": 89920 }, { "epoch": 79.65013286093888, "grad_norm": 0.23404309153556824, "learning_rate": 1e-05, "loss": 0.9384, "step": 89925 }, { "epoch": 79.65456155890168, "grad_norm": 0.20942646265029907, "learning_rate": 1e-05, "loss": 0.9286, "step": 89930 }, { "epoch": 79.65899025686448, "grad_norm": 0.2503487169742584, "learning_rate": 1e-05, "loss": 1.0082, "step": 89935 }, { "epoch": 79.66341895482728, "grad_norm": 0.2490573674440384, "learning_rate": 1e-05, "loss": 0.9575, "step": 89940 }, { "epoch": 79.66784765279007, "grad_norm": 0.2281889170408249, "learning_rate": 1e-05, "loss": 1.0167, "step": 89945 }, { "epoch": 79.67227635075288, "grad_norm": 0.23662573099136353, "learning_rate": 1e-05, "loss": 0.9414, "step": 89950 }, { "epoch": 79.67670504871568, "grad_norm": 0.217342808842659, "learning_rate": 1e-05, "loss": 0.9537, "step": 89955 }, { "epoch": 79.68113374667848, "grad_norm": 0.2023911029100418, "learning_rate": 1e-05, "loss": 0.9591, "step": 89960 }, { "epoch": 79.68556244464128, "grad_norm": 0.23905080556869507, "learning_rate": 1e-05, "loss": 0.9915, "step": 89965 }, { "epoch": 79.68999114260407, "grad_norm": 0.2180573046207428, "learning_rate": 1e-05, "loss": 0.9715, "step": 89970 }, { "epoch": 79.69441984056688, "grad_norm": 0.21884189546108246, "learning_rate": 1e-05, "loss": 0.9474, "step": 89975 }, { "epoch": 79.69884853852967, "grad_norm": 0.23229235410690308, "learning_rate": 1e-05, "loss": 0.932, "step": 89980 }, { "epoch": 79.70327723649247, "grad_norm": 0.25508442521095276, "learning_rate": 1e-05, "loss": 0.9748, "step": 89985 }, { "epoch": 79.70770593445528, "grad_norm": 0.25490033626556396, "learning_rate": 1e-05, "loss": 0.9795, "step": 89990 }, { "epoch": 79.71213463241807, "grad_norm": 0.25662222504615784, "learning_rate": 1e-05, "loss": 0.9498, "step": 89995 }, { "epoch": 79.71656333038086, "grad_norm": 0.20283883810043335, "learning_rate": 1e-05, "loss": 0.954, "step": 90000 }, { "epoch": 79.72099202834367, "grad_norm": 0.2433825433254242, "learning_rate": 1e-05, "loss": 0.9378, "step": 90005 }, { "epoch": 79.72542072630647, "grad_norm": 0.23120273649692535, "learning_rate": 1e-05, "loss": 0.9479, "step": 90010 }, { "epoch": 79.72984942426926, "grad_norm": 0.20766118168830872, "learning_rate": 1e-05, "loss": 0.939, "step": 90015 }, { "epoch": 79.73427812223207, "grad_norm": 0.21366922557353973, "learning_rate": 1e-05, "loss": 0.9344, "step": 90020 }, { "epoch": 79.73870682019486, "grad_norm": 0.2469257116317749, "learning_rate": 1e-05, "loss": 1.0226, "step": 90025 }, { "epoch": 79.74313551815766, "grad_norm": 0.23615744709968567, "learning_rate": 1e-05, "loss": 0.9573, "step": 90030 }, { "epoch": 79.74756421612047, "grad_norm": 0.23682944476604462, "learning_rate": 1e-05, "loss": 0.9371, "step": 90035 }, { "epoch": 79.75199291408326, "grad_norm": 0.27958759665489197, "learning_rate": 1e-05, "loss": 0.9916, "step": 90040 }, { "epoch": 79.75642161204605, "grad_norm": 0.23270191252231598, "learning_rate": 1e-05, "loss": 0.9835, "step": 90045 }, { "epoch": 79.76085031000886, "grad_norm": 0.237318754196167, "learning_rate": 1e-05, "loss": 0.969, "step": 90050 }, { "epoch": 79.76527900797166, "grad_norm": 0.25557467341423035, "learning_rate": 1e-05, "loss": 0.9718, "step": 90055 }, { "epoch": 79.76970770593445, "grad_norm": 0.30670565366744995, "learning_rate": 1e-05, "loss": 0.9677, "step": 90060 }, { "epoch": 79.77413640389726, "grad_norm": 0.2577807903289795, "learning_rate": 1e-05, "loss": 0.891, "step": 90065 }, { "epoch": 79.77856510186005, "grad_norm": 0.2242809236049652, "learning_rate": 1e-05, "loss": 0.9247, "step": 90070 }, { "epoch": 79.78299379982285, "grad_norm": 0.21872539818286896, "learning_rate": 1e-05, "loss": 0.9392, "step": 90075 }, { "epoch": 79.78742249778566, "grad_norm": 0.21904049813747406, "learning_rate": 1e-05, "loss": 0.9638, "step": 90080 }, { "epoch": 79.79185119574845, "grad_norm": 0.28020399808883667, "learning_rate": 1e-05, "loss": 0.9826, "step": 90085 }, { "epoch": 79.79627989371124, "grad_norm": 0.228740856051445, "learning_rate": 1e-05, "loss": 0.9828, "step": 90090 }, { "epoch": 79.80070859167405, "grad_norm": 0.23158478736877441, "learning_rate": 1e-05, "loss": 0.937, "step": 90095 }, { "epoch": 79.80513728963685, "grad_norm": 0.24134615063667297, "learning_rate": 1e-05, "loss": 0.9971, "step": 90100 }, { "epoch": 79.80956598759964, "grad_norm": 0.2543979287147522, "learning_rate": 1e-05, "loss": 0.9247, "step": 90105 }, { "epoch": 79.81399468556245, "grad_norm": 0.23991595208644867, "learning_rate": 1e-05, "loss": 0.9196, "step": 90110 }, { "epoch": 79.81842338352524, "grad_norm": 0.2548162341117859, "learning_rate": 1e-05, "loss": 0.9596, "step": 90115 }, { "epoch": 79.82285208148804, "grad_norm": 0.2417423576116562, "learning_rate": 1e-05, "loss": 0.9245, "step": 90120 }, { "epoch": 79.82728077945085, "grad_norm": 0.2563302218914032, "learning_rate": 1e-05, "loss": 1.0072, "step": 90125 }, { "epoch": 79.83170947741364, "grad_norm": 0.2482224851846695, "learning_rate": 1e-05, "loss": 1.0028, "step": 90130 }, { "epoch": 79.83613817537643, "grad_norm": 0.2457963079214096, "learning_rate": 1e-05, "loss": 0.9725, "step": 90135 }, { "epoch": 79.84056687333924, "grad_norm": 0.21073241531848907, "learning_rate": 1e-05, "loss": 0.9612, "step": 90140 }, { "epoch": 79.84499557130204, "grad_norm": 0.29018402099609375, "learning_rate": 1e-05, "loss": 0.9596, "step": 90145 }, { "epoch": 79.84942426926483, "grad_norm": 0.24830599129199982, "learning_rate": 1e-05, "loss": 0.9567, "step": 90150 }, { "epoch": 79.85385296722764, "grad_norm": 0.2795458137989044, "learning_rate": 1e-05, "loss": 0.9176, "step": 90155 }, { "epoch": 79.85828166519043, "grad_norm": 0.23765401542186737, "learning_rate": 1e-05, "loss": 0.9392, "step": 90160 }, { "epoch": 79.86271036315323, "grad_norm": 0.22332780063152313, "learning_rate": 1e-05, "loss": 0.9001, "step": 90165 }, { "epoch": 79.86713906111603, "grad_norm": 0.2536269426345825, "learning_rate": 1e-05, "loss": 0.9353, "step": 90170 }, { "epoch": 79.87156775907883, "grad_norm": 0.27211257815361023, "learning_rate": 1e-05, "loss": 0.9406, "step": 90175 }, { "epoch": 79.87599645704162, "grad_norm": 0.2315283715724945, "learning_rate": 1e-05, "loss": 0.9633, "step": 90180 }, { "epoch": 79.88042515500443, "grad_norm": 0.2842007279396057, "learning_rate": 1e-05, "loss": 0.9699, "step": 90185 }, { "epoch": 79.88485385296723, "grad_norm": 0.27521586418151855, "learning_rate": 1e-05, "loss": 0.9388, "step": 90190 }, { "epoch": 79.88928255093003, "grad_norm": 0.25162190198898315, "learning_rate": 1e-05, "loss": 0.9819, "step": 90195 }, { "epoch": 79.89371124889283, "grad_norm": 0.2608662545681, "learning_rate": 1e-05, "loss": 1.0089, "step": 90200 }, { "epoch": 79.89813994685562, "grad_norm": 0.2480461448431015, "learning_rate": 1e-05, "loss": 0.9236, "step": 90205 }, { "epoch": 79.90256864481843, "grad_norm": 0.1931961476802826, "learning_rate": 1e-05, "loss": 0.9397, "step": 90210 }, { "epoch": 79.90699734278122, "grad_norm": 0.22209890186786652, "learning_rate": 1e-05, "loss": 0.9708, "step": 90215 }, { "epoch": 79.91142604074402, "grad_norm": 0.2419925332069397, "learning_rate": 1e-05, "loss": 0.9584, "step": 90220 }, { "epoch": 79.91585473870683, "grad_norm": 0.2507255971431732, "learning_rate": 1e-05, "loss": 0.9865, "step": 90225 }, { "epoch": 79.92028343666962, "grad_norm": 0.2477208822965622, "learning_rate": 1e-05, "loss": 1.015, "step": 90230 }, { "epoch": 79.92471213463241, "grad_norm": 0.24340468645095825, "learning_rate": 1e-05, "loss": 0.9542, "step": 90235 }, { "epoch": 79.92914083259522, "grad_norm": 0.21622225642204285, "learning_rate": 1e-05, "loss": 0.9629, "step": 90240 }, { "epoch": 79.93356953055802, "grad_norm": 0.25533327460289, "learning_rate": 1e-05, "loss": 0.9512, "step": 90245 }, { "epoch": 79.93799822852081, "grad_norm": 0.22512692213058472, "learning_rate": 1e-05, "loss": 0.9746, "step": 90250 }, { "epoch": 79.94242692648362, "grad_norm": 0.19293341040611267, "learning_rate": 1e-05, "loss": 0.9556, "step": 90255 }, { "epoch": 79.94685562444641, "grad_norm": 0.24189257621765137, "learning_rate": 1e-05, "loss": 0.9747, "step": 90260 }, { "epoch": 79.95128432240921, "grad_norm": 0.2521813213825226, "learning_rate": 1e-05, "loss": 0.9845, "step": 90265 }, { "epoch": 79.95571302037202, "grad_norm": 0.2447579801082611, "learning_rate": 1e-05, "loss": 0.9855, "step": 90270 }, { "epoch": 79.96014171833481, "grad_norm": 0.23347577452659607, "learning_rate": 1e-05, "loss": 0.9907, "step": 90275 }, { "epoch": 79.9645704162976, "grad_norm": 0.22960013151168823, "learning_rate": 1e-05, "loss": 0.9668, "step": 90280 }, { "epoch": 79.96899911426041, "grad_norm": 0.30896326899528503, "learning_rate": 1e-05, "loss": 0.9058, "step": 90285 }, { "epoch": 79.9734278122232, "grad_norm": 0.2722979485988617, "learning_rate": 1e-05, "loss": 0.9927, "step": 90290 }, { "epoch": 79.977856510186, "grad_norm": 0.2933483421802521, "learning_rate": 1e-05, "loss": 0.9631, "step": 90295 }, { "epoch": 79.98228520814881, "grad_norm": 0.2473868429660797, "learning_rate": 1e-05, "loss": 1.0145, "step": 90300 }, { "epoch": 79.9867139061116, "grad_norm": 0.22241902351379395, "learning_rate": 1e-05, "loss": 0.9628, "step": 90305 }, { "epoch": 79.9911426040744, "grad_norm": 0.21211975812911987, "learning_rate": 1e-05, "loss": 0.9339, "step": 90310 }, { "epoch": 79.9955713020372, "grad_norm": 0.21840862929821014, "learning_rate": 1e-05, "loss": 0.9742, "step": 90315 }, { "epoch": 80.0, "grad_norm": 0.2095823436975479, "learning_rate": 1e-05, "loss": 0.9974, "step": 90320 }, { "epoch": 80.0044286979628, "grad_norm": 0.26246175169944763, "learning_rate": 1e-05, "loss": 0.9457, "step": 90325 }, { "epoch": 80.0088573959256, "grad_norm": 0.2380438596010208, "learning_rate": 1e-05, "loss": 0.952, "step": 90330 }, { "epoch": 80.0132860938884, "grad_norm": 0.21253980696201324, "learning_rate": 1e-05, "loss": 0.9407, "step": 90335 }, { "epoch": 80.01771479185119, "grad_norm": 0.24193133413791656, "learning_rate": 1e-05, "loss": 0.9532, "step": 90340 }, { "epoch": 80.022143489814, "grad_norm": 0.2500888407230377, "learning_rate": 1e-05, "loss": 0.9889, "step": 90345 }, { "epoch": 80.0265721877768, "grad_norm": 0.24459004402160645, "learning_rate": 1e-05, "loss": 0.9833, "step": 90350 }, { "epoch": 80.03100088573959, "grad_norm": 0.22864751517772675, "learning_rate": 1e-05, "loss": 0.9572, "step": 90355 }, { "epoch": 80.0354295837024, "grad_norm": 0.26301053166389465, "learning_rate": 1e-05, "loss": 0.9843, "step": 90360 }, { "epoch": 80.03985828166519, "grad_norm": 0.21500445902347565, "learning_rate": 1e-05, "loss": 0.9693, "step": 90365 }, { "epoch": 80.04428697962798, "grad_norm": 0.223270446062088, "learning_rate": 1e-05, "loss": 0.9554, "step": 90370 }, { "epoch": 80.04871567759079, "grad_norm": 0.24247729778289795, "learning_rate": 1e-05, "loss": 0.9821, "step": 90375 }, { "epoch": 80.05314437555359, "grad_norm": 0.20041173696517944, "learning_rate": 1e-05, "loss": 0.9667, "step": 90380 }, { "epoch": 80.05757307351638, "grad_norm": 0.23628394305706024, "learning_rate": 1e-05, "loss": 0.943, "step": 90385 }, { "epoch": 80.06200177147919, "grad_norm": 0.2290562391281128, "learning_rate": 1e-05, "loss": 1.0344, "step": 90390 }, { "epoch": 80.06643046944198, "grad_norm": 0.2636483311653137, "learning_rate": 1e-05, "loss": 0.9433, "step": 90395 }, { "epoch": 80.07085916740478, "grad_norm": 0.23545511066913605, "learning_rate": 1e-05, "loss": 0.9231, "step": 90400 }, { "epoch": 80.07528786536759, "grad_norm": 0.23498377203941345, "learning_rate": 1e-05, "loss": 0.9105, "step": 90405 }, { "epoch": 80.07971656333038, "grad_norm": 0.23322664201259613, "learning_rate": 1e-05, "loss": 1.0555, "step": 90410 }, { "epoch": 80.08414526129317, "grad_norm": 0.2805613875389099, "learning_rate": 1e-05, "loss": 0.9499, "step": 90415 }, { "epoch": 80.08857395925598, "grad_norm": 0.25593459606170654, "learning_rate": 1e-05, "loss": 0.9843, "step": 90420 }, { "epoch": 80.09300265721878, "grad_norm": 0.28242868185043335, "learning_rate": 1e-05, "loss": 0.9045, "step": 90425 }, { "epoch": 80.09743135518157, "grad_norm": 0.2630789279937744, "learning_rate": 1e-05, "loss": 0.8962, "step": 90430 }, { "epoch": 80.10186005314438, "grad_norm": 0.23096276819705963, "learning_rate": 1e-05, "loss": 0.9922, "step": 90435 }, { "epoch": 80.10628875110717, "grad_norm": 0.24531184136867523, "learning_rate": 1e-05, "loss": 0.9918, "step": 90440 }, { "epoch": 80.11071744906998, "grad_norm": 0.2260182648897171, "learning_rate": 1e-05, "loss": 0.9238, "step": 90445 }, { "epoch": 80.11514614703277, "grad_norm": 0.24155649542808533, "learning_rate": 1e-05, "loss": 0.9818, "step": 90450 }, { "epoch": 80.11957484499557, "grad_norm": 0.2517586648464203, "learning_rate": 1e-05, "loss": 0.8965, "step": 90455 }, { "epoch": 80.12400354295838, "grad_norm": 0.22883270680904388, "learning_rate": 1e-05, "loss": 0.985, "step": 90460 }, { "epoch": 80.12843224092117, "grad_norm": 0.26872262358665466, "learning_rate": 1e-05, "loss": 0.9902, "step": 90465 }, { "epoch": 80.13286093888397, "grad_norm": 0.2595265805721283, "learning_rate": 1e-05, "loss": 0.9688, "step": 90470 }, { "epoch": 80.13728963684677, "grad_norm": 0.25981834530830383, "learning_rate": 1e-05, "loss": 0.947, "step": 90475 }, { "epoch": 80.14171833480957, "grad_norm": 0.276508092880249, "learning_rate": 1e-05, "loss": 0.9529, "step": 90480 }, { "epoch": 80.14614703277236, "grad_norm": 0.2871341109275818, "learning_rate": 1e-05, "loss": 0.9617, "step": 90485 }, { "epoch": 80.15057573073517, "grad_norm": 0.2640523314476013, "learning_rate": 1e-05, "loss": 1.0249, "step": 90490 }, { "epoch": 80.15500442869796, "grad_norm": 0.25753283500671387, "learning_rate": 1e-05, "loss": 0.9707, "step": 90495 }, { "epoch": 80.15943312666076, "grad_norm": 0.23141242563724518, "learning_rate": 1e-05, "loss": 0.9788, "step": 90500 }, { "epoch": 80.16386182462357, "grad_norm": 0.2090873420238495, "learning_rate": 1e-05, "loss": 1.0254, "step": 90505 }, { "epoch": 80.16829052258636, "grad_norm": 0.21610578894615173, "learning_rate": 1e-05, "loss": 1.0035, "step": 90510 }, { "epoch": 80.17271922054915, "grad_norm": 0.24537496268749237, "learning_rate": 1e-05, "loss": 0.9364, "step": 90515 }, { "epoch": 80.17714791851196, "grad_norm": 0.2676410675048828, "learning_rate": 1e-05, "loss": 0.9444, "step": 90520 }, { "epoch": 80.18157661647476, "grad_norm": 0.2525486946105957, "learning_rate": 1e-05, "loss": 0.9477, "step": 90525 }, { "epoch": 80.18600531443755, "grad_norm": 0.22568634152412415, "learning_rate": 1e-05, "loss": 0.9469, "step": 90530 }, { "epoch": 80.19043401240036, "grad_norm": 0.21275459229946136, "learning_rate": 1e-05, "loss": 0.994, "step": 90535 }, { "epoch": 80.19486271036315, "grad_norm": 0.22479788959026337, "learning_rate": 1e-05, "loss": 0.9764, "step": 90540 }, { "epoch": 80.19929140832595, "grad_norm": 0.23335857689380646, "learning_rate": 1e-05, "loss": 0.9597, "step": 90545 }, { "epoch": 80.20372010628876, "grad_norm": 0.24292509257793427, "learning_rate": 1e-05, "loss": 0.9094, "step": 90550 }, { "epoch": 80.20814880425155, "grad_norm": 0.26341915130615234, "learning_rate": 1e-05, "loss": 0.9735, "step": 90555 }, { "epoch": 80.21257750221434, "grad_norm": 0.22146227955818176, "learning_rate": 1e-05, "loss": 0.9557, "step": 90560 }, { "epoch": 80.21700620017715, "grad_norm": 0.2333306521177292, "learning_rate": 1e-05, "loss": 0.9393, "step": 90565 }, { "epoch": 80.22143489813995, "grad_norm": 0.25761678814888, "learning_rate": 1e-05, "loss": 0.9751, "step": 90570 }, { "epoch": 80.22586359610274, "grad_norm": 0.2671818733215332, "learning_rate": 1e-05, "loss": 0.9631, "step": 90575 }, { "epoch": 80.23029229406555, "grad_norm": 0.25359222292900085, "learning_rate": 1e-05, "loss": 0.9739, "step": 90580 }, { "epoch": 80.23472099202834, "grad_norm": 0.26131007075309753, "learning_rate": 1e-05, "loss": 1.0042, "step": 90585 }, { "epoch": 80.23914968999114, "grad_norm": 0.22572430968284607, "learning_rate": 1e-05, "loss": 0.9714, "step": 90590 }, { "epoch": 80.24357838795395, "grad_norm": 0.2223578542470932, "learning_rate": 1e-05, "loss": 0.9383, "step": 90595 }, { "epoch": 80.24800708591674, "grad_norm": 0.2399706095457077, "learning_rate": 1e-05, "loss": 0.929, "step": 90600 }, { "epoch": 80.25243578387953, "grad_norm": 0.2179998755455017, "learning_rate": 1e-05, "loss": 0.9409, "step": 90605 }, { "epoch": 80.25686448184234, "grad_norm": 0.222193643450737, "learning_rate": 1e-05, "loss": 0.9385, "step": 90610 }, { "epoch": 80.26129317980514, "grad_norm": 0.26568669080734253, "learning_rate": 1e-05, "loss": 0.9598, "step": 90615 }, { "epoch": 80.26572187776793, "grad_norm": 0.2616262137889862, "learning_rate": 1e-05, "loss": 0.9658, "step": 90620 }, { "epoch": 80.27015057573074, "grad_norm": 0.2509314715862274, "learning_rate": 1e-05, "loss": 0.9502, "step": 90625 }, { "epoch": 80.27457927369353, "grad_norm": 0.2570199966430664, "learning_rate": 1e-05, "loss": 0.9488, "step": 90630 }, { "epoch": 80.27900797165633, "grad_norm": 0.23955181241035461, "learning_rate": 1e-05, "loss": 0.9793, "step": 90635 }, { "epoch": 80.28343666961914, "grad_norm": 0.26595553755760193, "learning_rate": 1e-05, "loss": 0.9807, "step": 90640 }, { "epoch": 80.28786536758193, "grad_norm": 0.2420101910829544, "learning_rate": 1e-05, "loss": 0.9425, "step": 90645 }, { "epoch": 80.29229406554472, "grad_norm": 0.23126855492591858, "learning_rate": 1e-05, "loss": 0.947, "step": 90650 }, { "epoch": 80.29672276350753, "grad_norm": 0.2859993577003479, "learning_rate": 1e-05, "loss": 0.9634, "step": 90655 }, { "epoch": 80.30115146147033, "grad_norm": 0.21770113706588745, "learning_rate": 1e-05, "loss": 0.9594, "step": 90660 }, { "epoch": 80.30558015943312, "grad_norm": 0.2505124807357788, "learning_rate": 1e-05, "loss": 1.002, "step": 90665 }, { "epoch": 80.31000885739593, "grad_norm": 0.2358206808567047, "learning_rate": 1e-05, "loss": 0.9422, "step": 90670 }, { "epoch": 80.31443755535872, "grad_norm": 0.2855980396270752, "learning_rate": 1e-05, "loss": 0.9283, "step": 90675 }, { "epoch": 80.31886625332152, "grad_norm": 0.31953322887420654, "learning_rate": 1e-05, "loss": 0.9897, "step": 90680 }, { "epoch": 80.32329495128432, "grad_norm": 0.24071930348873138, "learning_rate": 1e-05, "loss": 0.9482, "step": 90685 }, { "epoch": 80.32772364924712, "grad_norm": 0.2715437710285187, "learning_rate": 1e-05, "loss": 0.9937, "step": 90690 }, { "epoch": 80.33215234720993, "grad_norm": 0.22301892936229706, "learning_rate": 1e-05, "loss": 0.9114, "step": 90695 }, { "epoch": 80.33658104517272, "grad_norm": 0.2870946526527405, "learning_rate": 1e-05, "loss": 0.934, "step": 90700 }, { "epoch": 80.34100974313552, "grad_norm": 0.2254539430141449, "learning_rate": 1e-05, "loss": 0.9707, "step": 90705 }, { "epoch": 80.34543844109832, "grad_norm": 0.22369804978370667, "learning_rate": 1e-05, "loss": 1.007, "step": 90710 }, { "epoch": 80.34986713906112, "grad_norm": 0.2669138014316559, "learning_rate": 1e-05, "loss": 0.9434, "step": 90715 }, { "epoch": 80.35429583702391, "grad_norm": 0.26017987728118896, "learning_rate": 1e-05, "loss": 0.9819, "step": 90720 }, { "epoch": 80.35872453498672, "grad_norm": 0.261029452085495, "learning_rate": 1e-05, "loss": 0.9709, "step": 90725 }, { "epoch": 80.36315323294951, "grad_norm": 0.2585115134716034, "learning_rate": 1e-05, "loss": 0.9213, "step": 90730 }, { "epoch": 80.36758193091231, "grad_norm": 0.22768008708953857, "learning_rate": 1e-05, "loss": 0.9939, "step": 90735 }, { "epoch": 80.37201062887512, "grad_norm": 0.2629387676715851, "learning_rate": 1e-05, "loss": 0.9919, "step": 90740 }, { "epoch": 80.37643932683791, "grad_norm": 0.24866102635860443, "learning_rate": 1e-05, "loss": 0.9678, "step": 90745 }, { "epoch": 80.3808680248007, "grad_norm": 0.27576547861099243, "learning_rate": 1e-05, "loss": 0.9455, "step": 90750 }, { "epoch": 80.38529672276351, "grad_norm": 0.35673296451568604, "learning_rate": 1e-05, "loss": 1.0133, "step": 90755 }, { "epoch": 80.38972542072631, "grad_norm": 0.2631607949733734, "learning_rate": 1e-05, "loss": 0.9533, "step": 90760 }, { "epoch": 80.3941541186891, "grad_norm": 0.24194203317165375, "learning_rate": 1e-05, "loss": 0.9619, "step": 90765 }, { "epoch": 80.39858281665191, "grad_norm": 0.2552587687969208, "learning_rate": 1e-05, "loss": 0.8925, "step": 90770 }, { "epoch": 80.4030115146147, "grad_norm": 0.2479856163263321, "learning_rate": 1e-05, "loss": 0.9635, "step": 90775 }, { "epoch": 80.4074402125775, "grad_norm": 0.2398465871810913, "learning_rate": 1e-05, "loss": 0.9913, "step": 90780 }, { "epoch": 80.4118689105403, "grad_norm": 0.21170029044151306, "learning_rate": 1e-05, "loss": 1.0068, "step": 90785 }, { "epoch": 80.4162976085031, "grad_norm": 0.2215103656053543, "learning_rate": 1e-05, "loss": 1.006, "step": 90790 }, { "epoch": 80.4207263064659, "grad_norm": 0.22934865951538086, "learning_rate": 1e-05, "loss": 0.9469, "step": 90795 }, { "epoch": 80.4251550044287, "grad_norm": 0.2242850959300995, "learning_rate": 1e-05, "loss": 0.8819, "step": 90800 }, { "epoch": 80.4295837023915, "grad_norm": 0.21365399658679962, "learning_rate": 1e-05, "loss": 0.9504, "step": 90805 }, { "epoch": 80.43401240035429, "grad_norm": 0.20895269513130188, "learning_rate": 1e-05, "loss": 0.9503, "step": 90810 }, { "epoch": 80.4384410983171, "grad_norm": 0.22954924404621124, "learning_rate": 1e-05, "loss": 0.93, "step": 90815 }, { "epoch": 80.4428697962799, "grad_norm": 0.2231500893831253, "learning_rate": 1e-05, "loss": 0.9391, "step": 90820 }, { "epoch": 80.44729849424269, "grad_norm": 0.24777980148792267, "learning_rate": 1e-05, "loss": 0.9525, "step": 90825 }, { "epoch": 80.4517271922055, "grad_norm": 0.27447614073753357, "learning_rate": 1e-05, "loss": 0.9653, "step": 90830 }, { "epoch": 80.45615589016829, "grad_norm": 0.23682816326618195, "learning_rate": 1e-05, "loss": 0.9833, "step": 90835 }, { "epoch": 80.46058458813108, "grad_norm": 0.2200881987810135, "learning_rate": 1e-05, "loss": 0.9813, "step": 90840 }, { "epoch": 80.46501328609389, "grad_norm": 0.26538461446762085, "learning_rate": 1e-05, "loss": 0.9198, "step": 90845 }, { "epoch": 80.46944198405669, "grad_norm": 0.2557761073112488, "learning_rate": 1e-05, "loss": 0.9743, "step": 90850 }, { "epoch": 80.47387068201948, "grad_norm": 0.23829787969589233, "learning_rate": 1e-05, "loss": 1.0003, "step": 90855 }, { "epoch": 80.47829937998229, "grad_norm": 0.20655357837677002, "learning_rate": 1e-05, "loss": 0.946, "step": 90860 }, { "epoch": 80.48272807794508, "grad_norm": 0.23293107748031616, "learning_rate": 1e-05, "loss": 0.9398, "step": 90865 }, { "epoch": 80.48715677590788, "grad_norm": 0.2538190484046936, "learning_rate": 1e-05, "loss": 0.9826, "step": 90870 }, { "epoch": 80.49158547387069, "grad_norm": 0.24415796995162964, "learning_rate": 1e-05, "loss": 0.9719, "step": 90875 }, { "epoch": 80.49601417183348, "grad_norm": 0.24092614650726318, "learning_rate": 1e-05, "loss": 1.0034, "step": 90880 }, { "epoch": 80.50044286979627, "grad_norm": 0.2633575201034546, "learning_rate": 1e-05, "loss": 0.8947, "step": 90885 }, { "epoch": 80.50487156775908, "grad_norm": 0.32758960127830505, "learning_rate": 1e-05, "loss": 0.9521, "step": 90890 }, { "epoch": 80.50930026572188, "grad_norm": 0.276058554649353, "learning_rate": 1e-05, "loss": 0.9559, "step": 90895 }, { "epoch": 80.51372896368467, "grad_norm": 0.2731495499610901, "learning_rate": 1e-05, "loss": 0.9553, "step": 90900 }, { "epoch": 80.51815766164748, "grad_norm": 0.270124226808548, "learning_rate": 1e-05, "loss": 0.9668, "step": 90905 }, { "epoch": 80.52258635961027, "grad_norm": 0.23477540910243988, "learning_rate": 1e-05, "loss": 0.9474, "step": 90910 }, { "epoch": 80.52701505757307, "grad_norm": 0.2700737714767456, "learning_rate": 1e-05, "loss": 0.9683, "step": 90915 }, { "epoch": 80.53144375553588, "grad_norm": 0.2259398251771927, "learning_rate": 1e-05, "loss": 0.947, "step": 90920 }, { "epoch": 80.53587245349867, "grad_norm": 0.2582458257675171, "learning_rate": 1e-05, "loss": 0.9997, "step": 90925 }, { "epoch": 80.54030115146146, "grad_norm": 0.31537526845932007, "learning_rate": 1e-05, "loss": 0.9122, "step": 90930 }, { "epoch": 80.54472984942427, "grad_norm": 0.2454194724559784, "learning_rate": 1e-05, "loss": 0.9893, "step": 90935 }, { "epoch": 80.54915854738707, "grad_norm": 0.26461490988731384, "learning_rate": 1e-05, "loss": 0.9934, "step": 90940 }, { "epoch": 80.55358724534987, "grad_norm": 0.2461141049861908, "learning_rate": 1e-05, "loss": 0.9429, "step": 90945 }, { "epoch": 80.55801594331267, "grad_norm": 0.24520158767700195, "learning_rate": 1e-05, "loss": 0.9485, "step": 90950 }, { "epoch": 80.56244464127546, "grad_norm": 0.33144229650497437, "learning_rate": 1e-05, "loss": 0.9629, "step": 90955 }, { "epoch": 80.56687333923827, "grad_norm": 0.2548491060733795, "learning_rate": 1e-05, "loss": 0.9896, "step": 90960 }, { "epoch": 80.57130203720106, "grad_norm": 0.26169726252555847, "learning_rate": 1e-05, "loss": 0.9537, "step": 90965 }, { "epoch": 80.57573073516386, "grad_norm": 0.21496829390525818, "learning_rate": 1e-05, "loss": 1.0027, "step": 90970 }, { "epoch": 80.58015943312667, "grad_norm": 0.258695125579834, "learning_rate": 1e-05, "loss": 0.9624, "step": 90975 }, { "epoch": 80.58458813108946, "grad_norm": 0.22384880483150482, "learning_rate": 1e-05, "loss": 0.9582, "step": 90980 }, { "epoch": 80.58901682905226, "grad_norm": 0.22364690899848938, "learning_rate": 1e-05, "loss": 0.9976, "step": 90985 }, { "epoch": 80.59344552701506, "grad_norm": 0.20511779189109802, "learning_rate": 1e-05, "loss": 0.9578, "step": 90990 }, { "epoch": 80.59787422497786, "grad_norm": 0.25335246324539185, "learning_rate": 1e-05, "loss": 0.9434, "step": 90995 }, { "epoch": 80.60230292294065, "grad_norm": 0.2724718749523163, "learning_rate": 1e-05, "loss": 0.9846, "step": 91000 }, { "epoch": 80.60673162090346, "grad_norm": 0.24197956919670105, "learning_rate": 1e-05, "loss": 0.9642, "step": 91005 }, { "epoch": 80.61116031886625, "grad_norm": 0.2098158895969391, "learning_rate": 1e-05, "loss": 0.953, "step": 91010 }, { "epoch": 80.61558901682905, "grad_norm": 0.27841877937316895, "learning_rate": 1e-05, "loss": 1.0012, "step": 91015 }, { "epoch": 80.62001771479186, "grad_norm": 0.23239122331142426, "learning_rate": 1e-05, "loss": 1.0328, "step": 91020 }, { "epoch": 80.62444641275465, "grad_norm": 0.20474544167518616, "learning_rate": 1e-05, "loss": 0.9955, "step": 91025 }, { "epoch": 80.62887511071744, "grad_norm": 0.21133720874786377, "learning_rate": 1e-05, "loss": 0.9419, "step": 91030 }, { "epoch": 80.63330380868025, "grad_norm": 0.2813125252723694, "learning_rate": 1e-05, "loss": 0.9904, "step": 91035 }, { "epoch": 80.63773250664305, "grad_norm": 0.2161625325679779, "learning_rate": 1e-05, "loss": 0.938, "step": 91040 }, { "epoch": 80.64216120460584, "grad_norm": 0.25862497091293335, "learning_rate": 1e-05, "loss": 0.9873, "step": 91045 }, { "epoch": 80.64658990256865, "grad_norm": 0.24200892448425293, "learning_rate": 1e-05, "loss": 0.9528, "step": 91050 }, { "epoch": 80.65101860053144, "grad_norm": 0.2496143877506256, "learning_rate": 1e-05, "loss": 0.9023, "step": 91055 }, { "epoch": 80.65544729849424, "grad_norm": 0.2194894254207611, "learning_rate": 1e-05, "loss": 0.9187, "step": 91060 }, { "epoch": 80.65987599645705, "grad_norm": 0.23861615359783173, "learning_rate": 1e-05, "loss": 0.9539, "step": 91065 }, { "epoch": 80.66430469441984, "grad_norm": 0.25838911533355713, "learning_rate": 1e-05, "loss": 0.9756, "step": 91070 }, { "epoch": 80.66873339238263, "grad_norm": 0.2183622568845749, "learning_rate": 1e-05, "loss": 0.9714, "step": 91075 }, { "epoch": 80.67316209034544, "grad_norm": 0.231674462556839, "learning_rate": 1e-05, "loss": 0.9355, "step": 91080 }, { "epoch": 80.67759078830824, "grad_norm": 0.22098885476589203, "learning_rate": 1e-05, "loss": 1.0258, "step": 91085 }, { "epoch": 80.68201948627103, "grad_norm": 0.23202075064182281, "learning_rate": 1e-05, "loss": 0.904, "step": 91090 }, { "epoch": 80.68644818423384, "grad_norm": 0.24226707220077515, "learning_rate": 1e-05, "loss": 0.9309, "step": 91095 }, { "epoch": 80.69087688219663, "grad_norm": 0.24776986241340637, "learning_rate": 1e-05, "loss": 0.9619, "step": 91100 }, { "epoch": 80.69530558015943, "grad_norm": 0.2096671462059021, "learning_rate": 1e-05, "loss": 0.9985, "step": 91105 }, { "epoch": 80.69973427812224, "grad_norm": 0.2444484382867813, "learning_rate": 1e-05, "loss": 0.9785, "step": 91110 }, { "epoch": 80.70416297608503, "grad_norm": 0.27736368775367737, "learning_rate": 1e-05, "loss": 0.9814, "step": 91115 }, { "epoch": 80.70859167404782, "grad_norm": 0.261273056268692, "learning_rate": 1e-05, "loss": 0.8861, "step": 91120 }, { "epoch": 80.71302037201063, "grad_norm": 0.2523120641708374, "learning_rate": 1e-05, "loss": 0.94, "step": 91125 }, { "epoch": 80.71744906997343, "grad_norm": 0.2804732024669647, "learning_rate": 1e-05, "loss": 0.9391, "step": 91130 }, { "epoch": 80.72187776793622, "grad_norm": 0.2127504199743271, "learning_rate": 1e-05, "loss": 0.9857, "step": 91135 }, { "epoch": 80.72630646589903, "grad_norm": 0.2552631199359894, "learning_rate": 1e-05, "loss": 1.0076, "step": 91140 }, { "epoch": 80.73073516386182, "grad_norm": 0.24905171990394592, "learning_rate": 1e-05, "loss": 0.9161, "step": 91145 }, { "epoch": 80.73516386182462, "grad_norm": 0.29543524980545044, "learning_rate": 1e-05, "loss": 0.9336, "step": 91150 }, { "epoch": 80.73959255978743, "grad_norm": 0.25018495321273804, "learning_rate": 1e-05, "loss": 0.975, "step": 91155 }, { "epoch": 80.74402125775022, "grad_norm": 0.22952282428741455, "learning_rate": 1e-05, "loss": 0.9277, "step": 91160 }, { "epoch": 80.74844995571301, "grad_norm": 0.21172106266021729, "learning_rate": 1e-05, "loss": 0.9241, "step": 91165 }, { "epoch": 80.75287865367582, "grad_norm": 0.2110438197851181, "learning_rate": 1e-05, "loss": 0.9977, "step": 91170 }, { "epoch": 80.75730735163862, "grad_norm": 0.21945476531982422, "learning_rate": 1e-05, "loss": 0.9615, "step": 91175 }, { "epoch": 80.76173604960141, "grad_norm": 0.23098935186862946, "learning_rate": 1e-05, "loss": 0.9632, "step": 91180 }, { "epoch": 80.76616474756422, "grad_norm": 0.25565969944000244, "learning_rate": 1e-05, "loss": 1.0025, "step": 91185 }, { "epoch": 80.77059344552701, "grad_norm": 0.24035532772541046, "learning_rate": 1e-05, "loss": 1.0108, "step": 91190 }, { "epoch": 80.77502214348982, "grad_norm": 0.24948714673519135, "learning_rate": 1e-05, "loss": 0.9646, "step": 91195 }, { "epoch": 80.77945084145261, "grad_norm": 0.2062489241361618, "learning_rate": 1e-05, "loss": 0.9334, "step": 91200 }, { "epoch": 80.78387953941541, "grad_norm": 0.22135037183761597, "learning_rate": 1e-05, "loss": 0.9857, "step": 91205 }, { "epoch": 80.78830823737822, "grad_norm": 0.1966964602470398, "learning_rate": 1e-05, "loss": 0.9824, "step": 91210 }, { "epoch": 80.79273693534101, "grad_norm": 0.2449837476015091, "learning_rate": 1e-05, "loss": 1.037, "step": 91215 }, { "epoch": 80.7971656333038, "grad_norm": 0.23168903589248657, "learning_rate": 1e-05, "loss": 1.0224, "step": 91220 }, { "epoch": 80.80159433126661, "grad_norm": 0.2194792479276657, "learning_rate": 1e-05, "loss": 0.9061, "step": 91225 }, { "epoch": 80.80602302922941, "grad_norm": 0.25971657037734985, "learning_rate": 1e-05, "loss": 0.9181, "step": 91230 }, { "epoch": 80.8104517271922, "grad_norm": 0.2402009814977646, "learning_rate": 1e-05, "loss": 0.9868, "step": 91235 }, { "epoch": 80.81488042515501, "grad_norm": 0.21228161454200745, "learning_rate": 1e-05, "loss": 0.9761, "step": 91240 }, { "epoch": 80.8193091231178, "grad_norm": 0.23742033541202545, "learning_rate": 1e-05, "loss": 0.9709, "step": 91245 }, { "epoch": 80.8237378210806, "grad_norm": 0.24568568170070648, "learning_rate": 1e-05, "loss": 1.0065, "step": 91250 }, { "epoch": 80.8281665190434, "grad_norm": 0.24253027141094208, "learning_rate": 1e-05, "loss": 0.9258, "step": 91255 }, { "epoch": 80.8325952170062, "grad_norm": 0.276459664106369, "learning_rate": 1e-05, "loss": 0.9799, "step": 91260 }, { "epoch": 80.837023914969, "grad_norm": 0.22146469354629517, "learning_rate": 1e-05, "loss": 0.9358, "step": 91265 }, { "epoch": 80.8414526129318, "grad_norm": 0.22102241218090057, "learning_rate": 1e-05, "loss": 1.0006, "step": 91270 }, { "epoch": 80.8458813108946, "grad_norm": 0.2480599731206894, "learning_rate": 1e-05, "loss": 0.982, "step": 91275 }, { "epoch": 80.85031000885739, "grad_norm": 0.24597778916358948, "learning_rate": 1e-05, "loss": 0.9446, "step": 91280 }, { "epoch": 80.8547387068202, "grad_norm": 0.24654646217823029, "learning_rate": 1e-05, "loss": 0.9446, "step": 91285 }, { "epoch": 80.859167404783, "grad_norm": 0.2694201171398163, "learning_rate": 1e-05, "loss": 0.9819, "step": 91290 }, { "epoch": 80.86359610274579, "grad_norm": 0.23365439474582672, "learning_rate": 1e-05, "loss": 0.9711, "step": 91295 }, { "epoch": 80.8680248007086, "grad_norm": 0.21915245056152344, "learning_rate": 1e-05, "loss": 0.9447, "step": 91300 }, { "epoch": 80.87245349867139, "grad_norm": 0.22658202052116394, "learning_rate": 1e-05, "loss": 0.976, "step": 91305 }, { "epoch": 80.87688219663418, "grad_norm": 0.23377378284931183, "learning_rate": 1e-05, "loss": 0.9822, "step": 91310 }, { "epoch": 80.881310894597, "grad_norm": 0.2150438129901886, "learning_rate": 1e-05, "loss": 0.9572, "step": 91315 }, { "epoch": 80.88573959255979, "grad_norm": 0.2552729547023773, "learning_rate": 1e-05, "loss": 1.0025, "step": 91320 }, { "epoch": 80.89016829052258, "grad_norm": 0.2692682445049286, "learning_rate": 1e-05, "loss": 1.0088, "step": 91325 }, { "epoch": 80.89459698848539, "grad_norm": 0.21927419304847717, "learning_rate": 1e-05, "loss": 0.9632, "step": 91330 }, { "epoch": 80.89902568644818, "grad_norm": 0.2481800615787506, "learning_rate": 1e-05, "loss": 0.9974, "step": 91335 }, { "epoch": 80.90345438441098, "grad_norm": 0.2540706992149353, "learning_rate": 1e-05, "loss": 0.9864, "step": 91340 }, { "epoch": 80.90788308237379, "grad_norm": 0.2587836980819702, "learning_rate": 1e-05, "loss": 0.9943, "step": 91345 }, { "epoch": 80.91231178033658, "grad_norm": 0.23256359994411469, "learning_rate": 1e-05, "loss": 0.9895, "step": 91350 }, { "epoch": 80.91674047829937, "grad_norm": 0.24111385643482208, "learning_rate": 1e-05, "loss": 0.9984, "step": 91355 }, { "epoch": 80.92116917626218, "grad_norm": 0.27664461731910706, "learning_rate": 1e-05, "loss": 1.0123, "step": 91360 }, { "epoch": 80.92559787422498, "grad_norm": 0.22027702629566193, "learning_rate": 1e-05, "loss": 1.0008, "step": 91365 }, { "epoch": 80.93002657218777, "grad_norm": 0.21982650458812714, "learning_rate": 1e-05, "loss": 0.9328, "step": 91370 }, { "epoch": 80.93445527015058, "grad_norm": 0.23025935888290405, "learning_rate": 1e-05, "loss": 0.9947, "step": 91375 }, { "epoch": 80.93888396811337, "grad_norm": 0.21099212765693665, "learning_rate": 1e-05, "loss": 0.9402, "step": 91380 }, { "epoch": 80.94331266607617, "grad_norm": 0.26262176036834717, "learning_rate": 1e-05, "loss": 0.9921, "step": 91385 }, { "epoch": 80.94774136403898, "grad_norm": 0.24932602047920227, "learning_rate": 1e-05, "loss": 0.961, "step": 91390 }, { "epoch": 80.95217006200177, "grad_norm": 0.22671513259410858, "learning_rate": 1e-05, "loss": 0.9447, "step": 91395 }, { "epoch": 80.95659875996456, "grad_norm": 0.27160924673080444, "learning_rate": 1e-05, "loss": 0.9276, "step": 91400 }, { "epoch": 80.96102745792737, "grad_norm": 0.2533956468105316, "learning_rate": 1e-05, "loss": 0.9386, "step": 91405 }, { "epoch": 80.96545615589017, "grad_norm": 0.2485702484846115, "learning_rate": 1e-05, "loss": 0.9161, "step": 91410 }, { "epoch": 80.96988485385296, "grad_norm": 0.258137047290802, "learning_rate": 1e-05, "loss": 0.9176, "step": 91415 }, { "epoch": 80.97431355181577, "grad_norm": 0.2436603605747223, "learning_rate": 1e-05, "loss": 0.943, "step": 91420 }, { "epoch": 80.97874224977856, "grad_norm": 0.29182374477386475, "learning_rate": 1e-05, "loss": 0.9284, "step": 91425 }, { "epoch": 80.98317094774137, "grad_norm": 0.26369819045066833, "learning_rate": 1e-05, "loss": 0.9703, "step": 91430 }, { "epoch": 80.98759964570417, "grad_norm": 0.322521835565567, "learning_rate": 1e-05, "loss": 0.9622, "step": 91435 }, { "epoch": 80.99202834366696, "grad_norm": 0.2459186315536499, "learning_rate": 1e-05, "loss": 0.9676, "step": 91440 }, { "epoch": 80.99645704162977, "grad_norm": 0.26624155044555664, "learning_rate": 1e-05, "loss": 0.965, "step": 91445 }, { "epoch": 81.00088573959256, "grad_norm": 0.2507934272289276, "learning_rate": 1e-05, "loss": 1.0154, "step": 91450 }, { "epoch": 81.00531443755536, "grad_norm": 0.227080300450325, "learning_rate": 1e-05, "loss": 0.9662, "step": 91455 }, { "epoch": 81.00974313551816, "grad_norm": 0.24502043426036835, "learning_rate": 1e-05, "loss": 0.9425, "step": 91460 }, { "epoch": 81.01417183348096, "grad_norm": 0.285030335187912, "learning_rate": 1e-05, "loss": 1.0125, "step": 91465 }, { "epoch": 81.01860053144375, "grad_norm": 0.3109884560108185, "learning_rate": 1e-05, "loss": 0.9483, "step": 91470 }, { "epoch": 81.02302922940656, "grad_norm": 0.24894285202026367, "learning_rate": 1e-05, "loss": 0.9845, "step": 91475 }, { "epoch": 81.02745792736935, "grad_norm": 0.24787092208862305, "learning_rate": 1e-05, "loss": 0.9661, "step": 91480 }, { "epoch": 81.03188662533215, "grad_norm": 0.3745787739753723, "learning_rate": 1e-05, "loss": 0.9772, "step": 91485 }, { "epoch": 81.03631532329496, "grad_norm": 0.30479496717453003, "learning_rate": 1e-05, "loss": 0.9509, "step": 91490 }, { "epoch": 81.04074402125775, "grad_norm": 0.24336491525173187, "learning_rate": 1e-05, "loss": 0.8926, "step": 91495 }, { "epoch": 81.04517271922055, "grad_norm": 0.26006007194519043, "learning_rate": 1e-05, "loss": 0.96, "step": 91500 }, { "epoch": 81.04960141718335, "grad_norm": 0.24037177860736847, "learning_rate": 1e-05, "loss": 0.9598, "step": 91505 }, { "epoch": 81.05403011514615, "grad_norm": 0.2424057424068451, "learning_rate": 1e-05, "loss": 1.0064, "step": 91510 }, { "epoch": 81.05845881310894, "grad_norm": 0.19677449762821198, "learning_rate": 1e-05, "loss": 1.0021, "step": 91515 }, { "epoch": 81.06288751107175, "grad_norm": 0.22905278205871582, "learning_rate": 1e-05, "loss": 0.9881, "step": 91520 }, { "epoch": 81.06731620903454, "grad_norm": 0.2423352301120758, "learning_rate": 1e-05, "loss": 0.928, "step": 91525 }, { "epoch": 81.07174490699734, "grad_norm": 0.20317932963371277, "learning_rate": 1e-05, "loss": 0.9688, "step": 91530 }, { "epoch": 81.07617360496015, "grad_norm": 0.23584222793579102, "learning_rate": 1e-05, "loss": 0.9507, "step": 91535 }, { "epoch": 81.08060230292294, "grad_norm": 0.24752803146839142, "learning_rate": 1e-05, "loss": 0.9612, "step": 91540 }, { "epoch": 81.08503100088573, "grad_norm": 0.2781928479671478, "learning_rate": 1e-05, "loss": 0.9779, "step": 91545 }, { "epoch": 81.08945969884854, "grad_norm": 0.27298372983932495, "learning_rate": 1e-05, "loss": 0.9546, "step": 91550 }, { "epoch": 81.09388839681134, "grad_norm": 0.25532519817352295, "learning_rate": 1e-05, "loss": 0.9257, "step": 91555 }, { "epoch": 81.09831709477413, "grad_norm": 0.25626635551452637, "learning_rate": 1e-05, "loss": 1.0006, "step": 91560 }, { "epoch": 81.10274579273694, "grad_norm": 0.3022654950618744, "learning_rate": 1e-05, "loss": 0.9707, "step": 91565 }, { "epoch": 81.10717449069973, "grad_norm": 0.24486123025417328, "learning_rate": 1e-05, "loss": 0.948, "step": 91570 }, { "epoch": 81.11160318866253, "grad_norm": 0.23280680179595947, "learning_rate": 1e-05, "loss": 0.9295, "step": 91575 }, { "epoch": 81.11603188662534, "grad_norm": 0.25300851464271545, "learning_rate": 1e-05, "loss": 0.9375, "step": 91580 }, { "epoch": 81.12046058458813, "grad_norm": 0.2673119008541107, "learning_rate": 1e-05, "loss": 0.9703, "step": 91585 }, { "epoch": 81.12488928255092, "grad_norm": 0.24592231214046478, "learning_rate": 1e-05, "loss": 0.9939, "step": 91590 }, { "epoch": 81.12931798051373, "grad_norm": 0.33494842052459717, "learning_rate": 1e-05, "loss": 0.9946, "step": 91595 }, { "epoch": 81.13374667847653, "grad_norm": 0.25529637932777405, "learning_rate": 1e-05, "loss": 0.9497, "step": 91600 }, { "epoch": 81.13817537643932, "grad_norm": 0.247398242354393, "learning_rate": 1e-05, "loss": 0.9342, "step": 91605 }, { "epoch": 81.14260407440213, "grad_norm": 0.28469541668891907, "learning_rate": 1e-05, "loss": 0.9214, "step": 91610 }, { "epoch": 81.14703277236492, "grad_norm": 0.2281649261713028, "learning_rate": 1e-05, "loss": 0.9537, "step": 91615 }, { "epoch": 81.15146147032772, "grad_norm": 0.2777380347251892, "learning_rate": 1e-05, "loss": 0.9665, "step": 91620 }, { "epoch": 81.15589016829053, "grad_norm": 0.24761246144771576, "learning_rate": 1e-05, "loss": 0.9678, "step": 91625 }, { "epoch": 81.16031886625332, "grad_norm": 0.2413511574268341, "learning_rate": 1e-05, "loss": 0.968, "step": 91630 }, { "epoch": 81.16474756421611, "grad_norm": 0.26133501529693604, "learning_rate": 1e-05, "loss": 0.9389, "step": 91635 }, { "epoch": 81.16917626217892, "grad_norm": 0.24164548516273499, "learning_rate": 1e-05, "loss": 0.9456, "step": 91640 }, { "epoch": 81.17360496014172, "grad_norm": 0.21848039329051971, "learning_rate": 1e-05, "loss": 0.9214, "step": 91645 }, { "epoch": 81.17803365810451, "grad_norm": 0.24051550030708313, "learning_rate": 1e-05, "loss": 0.9854, "step": 91650 }, { "epoch": 81.18246235606732, "grad_norm": 0.21918097138404846, "learning_rate": 1e-05, "loss": 0.9024, "step": 91655 }, { "epoch": 81.18689105403011, "grad_norm": 0.21730661392211914, "learning_rate": 1e-05, "loss": 0.9212, "step": 91660 }, { "epoch": 81.1913197519929, "grad_norm": 0.247291699051857, "learning_rate": 1e-05, "loss": 0.981, "step": 91665 }, { "epoch": 81.19574844995572, "grad_norm": 0.2425602674484253, "learning_rate": 1e-05, "loss": 0.9311, "step": 91670 }, { "epoch": 81.20017714791851, "grad_norm": 0.2484261393547058, "learning_rate": 1e-05, "loss": 0.9862, "step": 91675 }, { "epoch": 81.20460584588132, "grad_norm": 0.2652241289615631, "learning_rate": 1e-05, "loss": 0.9778, "step": 91680 }, { "epoch": 81.20903454384411, "grad_norm": 0.28946951031684875, "learning_rate": 1e-05, "loss": 0.9835, "step": 91685 }, { "epoch": 81.2134632418069, "grad_norm": 0.21521642804145813, "learning_rate": 1e-05, "loss": 0.9315, "step": 91690 }, { "epoch": 81.21789193976971, "grad_norm": 0.23349493741989136, "learning_rate": 1e-05, "loss": 0.9889, "step": 91695 }, { "epoch": 81.22232063773251, "grad_norm": 0.2843078374862671, "learning_rate": 1e-05, "loss": 0.9479, "step": 91700 }, { "epoch": 81.2267493356953, "grad_norm": 0.23122236132621765, "learning_rate": 1e-05, "loss": 0.9974, "step": 91705 }, { "epoch": 81.23117803365811, "grad_norm": 0.23589713871479034, "learning_rate": 1e-05, "loss": 0.982, "step": 91710 }, { "epoch": 81.2356067316209, "grad_norm": 0.23940306901931763, "learning_rate": 1e-05, "loss": 1.0633, "step": 91715 }, { "epoch": 81.2400354295837, "grad_norm": 0.2338026463985443, "learning_rate": 1e-05, "loss": 0.9512, "step": 91720 }, { "epoch": 81.24446412754651, "grad_norm": 0.25521382689476013, "learning_rate": 1e-05, "loss": 0.9557, "step": 91725 }, { "epoch": 81.2488928255093, "grad_norm": 0.20579414069652557, "learning_rate": 1e-05, "loss": 0.9798, "step": 91730 }, { "epoch": 81.2533215234721, "grad_norm": 0.203611359000206, "learning_rate": 1e-05, "loss": 0.9664, "step": 91735 }, { "epoch": 81.2577502214349, "grad_norm": 0.26565149426460266, "learning_rate": 1e-05, "loss": 0.9847, "step": 91740 }, { "epoch": 81.2621789193977, "grad_norm": 0.25646114349365234, "learning_rate": 1e-05, "loss": 0.9139, "step": 91745 }, { "epoch": 81.26660761736049, "grad_norm": 0.24975556135177612, "learning_rate": 1e-05, "loss": 1.0507, "step": 91750 }, { "epoch": 81.2710363153233, "grad_norm": 0.21961677074432373, "learning_rate": 1e-05, "loss": 0.9437, "step": 91755 }, { "epoch": 81.2754650132861, "grad_norm": 0.2566573917865753, "learning_rate": 1e-05, "loss": 0.9474, "step": 91760 }, { "epoch": 81.27989371124889, "grad_norm": 0.25011637806892395, "learning_rate": 1e-05, "loss": 0.9452, "step": 91765 }, { "epoch": 81.2843224092117, "grad_norm": 0.26534557342529297, "learning_rate": 1e-05, "loss": 1.0179, "step": 91770 }, { "epoch": 81.28875110717449, "grad_norm": 0.21518675982952118, "learning_rate": 1e-05, "loss": 0.9075, "step": 91775 }, { "epoch": 81.29317980513729, "grad_norm": 0.24369928240776062, "learning_rate": 1e-05, "loss": 1.0214, "step": 91780 }, { "epoch": 81.2976085031001, "grad_norm": 0.23437944054603577, "learning_rate": 1e-05, "loss": 0.9907, "step": 91785 }, { "epoch": 81.30203720106289, "grad_norm": 0.20320825278759003, "learning_rate": 1e-05, "loss": 0.8983, "step": 91790 }, { "epoch": 81.30646589902568, "grad_norm": 0.2202541083097458, "learning_rate": 1e-05, "loss": 0.9492, "step": 91795 }, { "epoch": 81.31089459698849, "grad_norm": 0.2626502513885498, "learning_rate": 1e-05, "loss": 0.9831, "step": 91800 }, { "epoch": 81.31532329495128, "grad_norm": 0.2656176686286926, "learning_rate": 1e-05, "loss": 0.8879, "step": 91805 }, { "epoch": 81.31975199291408, "grad_norm": 0.22513507306575775, "learning_rate": 1e-05, "loss": 0.949, "step": 91810 }, { "epoch": 81.32418069087689, "grad_norm": 0.2819122076034546, "learning_rate": 1e-05, "loss": 0.9667, "step": 91815 }, { "epoch": 81.32860938883968, "grad_norm": 0.21301336586475372, "learning_rate": 1e-05, "loss": 0.9466, "step": 91820 }, { "epoch": 81.33303808680247, "grad_norm": 0.2654283046722412, "learning_rate": 1e-05, "loss": 0.9005, "step": 91825 }, { "epoch": 81.33746678476528, "grad_norm": 0.22072476148605347, "learning_rate": 1e-05, "loss": 0.9771, "step": 91830 }, { "epoch": 81.34189548272808, "grad_norm": 0.3169809579849243, "learning_rate": 1e-05, "loss": 0.9591, "step": 91835 }, { "epoch": 81.34632418069087, "grad_norm": 0.22610099613666534, "learning_rate": 1e-05, "loss": 0.9388, "step": 91840 }, { "epoch": 81.35075287865368, "grad_norm": 0.24698369204998016, "learning_rate": 1e-05, "loss": 0.9819, "step": 91845 }, { "epoch": 81.35518157661647, "grad_norm": 0.21200230717658997, "learning_rate": 1e-05, "loss": 0.9308, "step": 91850 }, { "epoch": 81.35961027457927, "grad_norm": 0.2316744178533554, "learning_rate": 1e-05, "loss": 1.0299, "step": 91855 }, { "epoch": 81.36403897254208, "grad_norm": 0.2111215889453888, "learning_rate": 1e-05, "loss": 0.9647, "step": 91860 }, { "epoch": 81.36846767050487, "grad_norm": 0.2498284876346588, "learning_rate": 1e-05, "loss": 0.9221, "step": 91865 }, { "epoch": 81.37289636846766, "grad_norm": 0.2121538519859314, "learning_rate": 1e-05, "loss": 0.9638, "step": 91870 }, { "epoch": 81.37732506643047, "grad_norm": 0.2108762115240097, "learning_rate": 1e-05, "loss": 0.9372, "step": 91875 }, { "epoch": 81.38175376439327, "grad_norm": 0.2326587438583374, "learning_rate": 1e-05, "loss": 0.9709, "step": 91880 }, { "epoch": 81.38618246235606, "grad_norm": 0.24290013313293457, "learning_rate": 1e-05, "loss": 0.9784, "step": 91885 }, { "epoch": 81.39061116031887, "grad_norm": 0.2500825524330139, "learning_rate": 1e-05, "loss": 0.9644, "step": 91890 }, { "epoch": 81.39503985828166, "grad_norm": 0.23301725089550018, "learning_rate": 1e-05, "loss": 0.9405, "step": 91895 }, { "epoch": 81.39946855624446, "grad_norm": 0.22314879298210144, "learning_rate": 1e-05, "loss": 0.9941, "step": 91900 }, { "epoch": 81.40389725420727, "grad_norm": 0.24765098094940186, "learning_rate": 1e-05, "loss": 0.9475, "step": 91905 }, { "epoch": 81.40832595217006, "grad_norm": 0.23086926341056824, "learning_rate": 1e-05, "loss": 0.9354, "step": 91910 }, { "epoch": 81.41275465013285, "grad_norm": 0.2578636109828949, "learning_rate": 1e-05, "loss": 0.9262, "step": 91915 }, { "epoch": 81.41718334809566, "grad_norm": 0.24778832495212555, "learning_rate": 1e-05, "loss": 0.989, "step": 91920 }, { "epoch": 81.42161204605846, "grad_norm": 0.27392515540122986, "learning_rate": 1e-05, "loss": 0.9703, "step": 91925 }, { "epoch": 81.42604074402126, "grad_norm": 0.23272523283958435, "learning_rate": 1e-05, "loss": 1.0123, "step": 91930 }, { "epoch": 81.43046944198406, "grad_norm": 0.2507994472980499, "learning_rate": 1e-05, "loss": 0.9849, "step": 91935 }, { "epoch": 81.43489813994685, "grad_norm": 0.25610029697418213, "learning_rate": 1e-05, "loss": 0.9827, "step": 91940 }, { "epoch": 81.43932683790966, "grad_norm": 0.27585524320602417, "learning_rate": 1e-05, "loss": 0.9393, "step": 91945 }, { "epoch": 81.44375553587246, "grad_norm": 0.2214675396680832, "learning_rate": 1e-05, "loss": 0.9834, "step": 91950 }, { "epoch": 81.44818423383525, "grad_norm": 0.22733256220817566, "learning_rate": 1e-05, "loss": 0.9901, "step": 91955 }, { "epoch": 81.45261293179806, "grad_norm": 0.2225317507982254, "learning_rate": 1e-05, "loss": 0.9795, "step": 91960 }, { "epoch": 81.45704162976085, "grad_norm": 0.2192356288433075, "learning_rate": 1e-05, "loss": 0.9974, "step": 91965 }, { "epoch": 81.46147032772365, "grad_norm": 0.19329208135604858, "learning_rate": 1e-05, "loss": 0.9558, "step": 91970 }, { "epoch": 81.46589902568645, "grad_norm": 0.23062781989574432, "learning_rate": 1e-05, "loss": 0.9834, "step": 91975 }, { "epoch": 81.47032772364925, "grad_norm": 0.24189935624599457, "learning_rate": 1e-05, "loss": 0.92, "step": 91980 }, { "epoch": 81.47475642161204, "grad_norm": 0.22949936985969543, "learning_rate": 1e-05, "loss": 0.9321, "step": 91985 }, { "epoch": 81.47918511957485, "grad_norm": 0.2188076674938202, "learning_rate": 1e-05, "loss": 0.9653, "step": 91990 }, { "epoch": 81.48361381753764, "grad_norm": 0.21806061267852783, "learning_rate": 1e-05, "loss": 0.9488, "step": 91995 }, { "epoch": 81.48804251550044, "grad_norm": 0.2329736351966858, "learning_rate": 1e-05, "loss": 0.9784, "step": 92000 }, { "epoch": 81.49247121346325, "grad_norm": 0.21016301214694977, "learning_rate": 1e-05, "loss": 0.939, "step": 92005 }, { "epoch": 81.49689991142604, "grad_norm": 0.20761898159980774, "learning_rate": 1e-05, "loss": 0.9321, "step": 92010 }, { "epoch": 81.50132860938884, "grad_norm": 0.2447303831577301, "learning_rate": 1e-05, "loss": 0.969, "step": 92015 }, { "epoch": 81.50575730735164, "grad_norm": 0.23071740567684174, "learning_rate": 1e-05, "loss": 0.9546, "step": 92020 }, { "epoch": 81.51018600531444, "grad_norm": 0.23061630129814148, "learning_rate": 1e-05, "loss": 0.9685, "step": 92025 }, { "epoch": 81.51461470327723, "grad_norm": 0.26496872305870056, "learning_rate": 1e-05, "loss": 0.9563, "step": 92030 }, { "epoch": 81.51904340124004, "grad_norm": 0.23935557901859283, "learning_rate": 1e-05, "loss": 0.9599, "step": 92035 }, { "epoch": 81.52347209920283, "grad_norm": 0.24850115180015564, "learning_rate": 1e-05, "loss": 0.9688, "step": 92040 }, { "epoch": 81.52790079716563, "grad_norm": 0.24320080876350403, "learning_rate": 1e-05, "loss": 0.963, "step": 92045 }, { "epoch": 81.53232949512844, "grad_norm": 0.2559666633605957, "learning_rate": 1e-05, "loss": 0.998, "step": 92050 }, { "epoch": 81.53675819309123, "grad_norm": 0.19619694352149963, "learning_rate": 1e-05, "loss": 0.9673, "step": 92055 }, { "epoch": 81.54118689105402, "grad_norm": 0.23495841026306152, "learning_rate": 1e-05, "loss": 0.9829, "step": 92060 }, { "epoch": 81.54561558901683, "grad_norm": 0.24964475631713867, "learning_rate": 1e-05, "loss": 0.9677, "step": 92065 }, { "epoch": 81.55004428697963, "grad_norm": 0.24080830812454224, "learning_rate": 1e-05, "loss": 0.9642, "step": 92070 }, { "epoch": 81.55447298494242, "grad_norm": 0.24716900289058685, "learning_rate": 1e-05, "loss": 0.9282, "step": 92075 }, { "epoch": 81.55890168290523, "grad_norm": 0.32225799560546875, "learning_rate": 1e-05, "loss": 0.9483, "step": 92080 }, { "epoch": 81.56333038086802, "grad_norm": 0.23737460374832153, "learning_rate": 1e-05, "loss": 0.9754, "step": 92085 }, { "epoch": 81.56775907883082, "grad_norm": 0.2512807250022888, "learning_rate": 1e-05, "loss": 0.9598, "step": 92090 }, { "epoch": 81.57218777679363, "grad_norm": 0.22749358415603638, "learning_rate": 1e-05, "loss": 0.9436, "step": 92095 }, { "epoch": 81.57661647475642, "grad_norm": 0.26936012506484985, "learning_rate": 1e-05, "loss": 0.9301, "step": 92100 }, { "epoch": 81.58104517271921, "grad_norm": 0.22237396240234375, "learning_rate": 1e-05, "loss": 0.9601, "step": 92105 }, { "epoch": 81.58547387068202, "grad_norm": 0.22897861897945404, "learning_rate": 1e-05, "loss": 0.9758, "step": 92110 }, { "epoch": 81.58990256864482, "grad_norm": 0.24843649566173553, "learning_rate": 1e-05, "loss": 1.0007, "step": 92115 }, { "epoch": 81.59433126660761, "grad_norm": 0.23942862451076508, "learning_rate": 1e-05, "loss": 0.958, "step": 92120 }, { "epoch": 81.59875996457042, "grad_norm": 0.24777622520923615, "learning_rate": 1e-05, "loss": 0.9454, "step": 92125 }, { "epoch": 81.60318866253321, "grad_norm": 0.25789037346839905, "learning_rate": 1e-05, "loss": 0.9856, "step": 92130 }, { "epoch": 81.60761736049601, "grad_norm": 0.25624892115592957, "learning_rate": 1e-05, "loss": 0.9398, "step": 92135 }, { "epoch": 81.61204605845882, "grad_norm": 0.23858697712421417, "learning_rate": 1e-05, "loss": 0.9651, "step": 92140 }, { "epoch": 81.61647475642161, "grad_norm": 0.26931270956993103, "learning_rate": 1e-05, "loss": 0.9856, "step": 92145 }, { "epoch": 81.6209034543844, "grad_norm": 0.26362141966819763, "learning_rate": 1e-05, "loss": 0.9521, "step": 92150 }, { "epoch": 81.62533215234721, "grad_norm": 0.23770059645175934, "learning_rate": 1e-05, "loss": 0.971, "step": 92155 }, { "epoch": 81.62976085031, "grad_norm": 0.22525376081466675, "learning_rate": 1e-05, "loss": 0.9788, "step": 92160 }, { "epoch": 81.63418954827281, "grad_norm": 0.21832843124866486, "learning_rate": 1e-05, "loss": 0.9498, "step": 92165 }, { "epoch": 81.63861824623561, "grad_norm": 0.2812003493309021, "learning_rate": 1e-05, "loss": 0.9505, "step": 92170 }, { "epoch": 81.6430469441984, "grad_norm": 0.21997977793216705, "learning_rate": 1e-05, "loss": 0.9349, "step": 92175 }, { "epoch": 81.64747564216121, "grad_norm": 0.23646268248558044, "learning_rate": 1e-05, "loss": 0.9826, "step": 92180 }, { "epoch": 81.651904340124, "grad_norm": 0.23823480308055878, "learning_rate": 1e-05, "loss": 0.9586, "step": 92185 }, { "epoch": 81.6563330380868, "grad_norm": 0.25054866075515747, "learning_rate": 1e-05, "loss": 0.9717, "step": 92190 }, { "epoch": 81.66076173604961, "grad_norm": 0.2461187094449997, "learning_rate": 1e-05, "loss": 0.9663, "step": 92195 }, { "epoch": 81.6651904340124, "grad_norm": 0.24883274734020233, "learning_rate": 1e-05, "loss": 0.8884, "step": 92200 }, { "epoch": 81.6696191319752, "grad_norm": 0.238087460398674, "learning_rate": 1e-05, "loss": 1.045, "step": 92205 }, { "epoch": 81.674047829938, "grad_norm": 0.2064228355884552, "learning_rate": 1e-05, "loss": 0.9205, "step": 92210 }, { "epoch": 81.6784765279008, "grad_norm": 0.2297319769859314, "learning_rate": 1e-05, "loss": 0.9387, "step": 92215 }, { "epoch": 81.68290522586359, "grad_norm": 0.2751140892505646, "learning_rate": 1e-05, "loss": 0.9188, "step": 92220 }, { "epoch": 81.6873339238264, "grad_norm": 0.23701654374599457, "learning_rate": 1e-05, "loss": 0.9515, "step": 92225 }, { "epoch": 81.6917626217892, "grad_norm": 0.2428114414215088, "learning_rate": 1e-05, "loss": 0.921, "step": 92230 }, { "epoch": 81.69619131975199, "grad_norm": 0.24080325663089752, "learning_rate": 1e-05, "loss": 0.9408, "step": 92235 }, { "epoch": 81.7006200177148, "grad_norm": 0.2512313425540924, "learning_rate": 1e-05, "loss": 1.0007, "step": 92240 }, { "epoch": 81.70504871567759, "grad_norm": 0.2967431843280792, "learning_rate": 1e-05, "loss": 0.9913, "step": 92245 }, { "epoch": 81.70947741364039, "grad_norm": 0.20880401134490967, "learning_rate": 1e-05, "loss": 0.9962, "step": 92250 }, { "epoch": 81.7139061116032, "grad_norm": 0.22423481941223145, "learning_rate": 1e-05, "loss": 0.9619, "step": 92255 }, { "epoch": 81.71833480956599, "grad_norm": 0.20792345702648163, "learning_rate": 1e-05, "loss": 0.9583, "step": 92260 }, { "epoch": 81.72276350752878, "grad_norm": 0.2642989754676819, "learning_rate": 1e-05, "loss": 0.9436, "step": 92265 }, { "epoch": 81.72719220549159, "grad_norm": 0.2577972114086151, "learning_rate": 1e-05, "loss": 0.9526, "step": 92270 }, { "epoch": 81.73162090345438, "grad_norm": 0.22807720303535461, "learning_rate": 1e-05, "loss": 0.9197, "step": 92275 }, { "epoch": 81.73604960141718, "grad_norm": 0.30161944031715393, "learning_rate": 1e-05, "loss": 0.9933, "step": 92280 }, { "epoch": 81.74047829937999, "grad_norm": 0.2140309363603592, "learning_rate": 1e-05, "loss": 0.9782, "step": 92285 }, { "epoch": 81.74490699734278, "grad_norm": 0.24424193799495697, "learning_rate": 1e-05, "loss": 0.9534, "step": 92290 }, { "epoch": 81.74933569530558, "grad_norm": 0.23796679079532623, "learning_rate": 1e-05, "loss": 0.9356, "step": 92295 }, { "epoch": 81.75376439326838, "grad_norm": 0.2624052166938782, "learning_rate": 1e-05, "loss": 0.9378, "step": 92300 }, { "epoch": 81.75819309123118, "grad_norm": 0.24641534686088562, "learning_rate": 1e-05, "loss": 0.9177, "step": 92305 }, { "epoch": 81.76262178919397, "grad_norm": 0.22633585333824158, "learning_rate": 1e-05, "loss": 0.9945, "step": 92310 }, { "epoch": 81.76705048715678, "grad_norm": 0.2430175095796585, "learning_rate": 1e-05, "loss": 0.9825, "step": 92315 }, { "epoch": 81.77147918511957, "grad_norm": 0.2352379709482193, "learning_rate": 1e-05, "loss": 1.0082, "step": 92320 }, { "epoch": 81.77590788308237, "grad_norm": 0.27295732498168945, "learning_rate": 1e-05, "loss": 0.9332, "step": 92325 }, { "epoch": 81.78033658104518, "grad_norm": 0.2678602635860443, "learning_rate": 1e-05, "loss": 0.9256, "step": 92330 }, { "epoch": 81.78476527900797, "grad_norm": 0.26329970359802246, "learning_rate": 1e-05, "loss": 0.9332, "step": 92335 }, { "epoch": 81.78919397697076, "grad_norm": 0.21536113321781158, "learning_rate": 1e-05, "loss": 0.9707, "step": 92340 }, { "epoch": 81.79362267493357, "grad_norm": 0.21278207004070282, "learning_rate": 1e-05, "loss": 1.016, "step": 92345 }, { "epoch": 81.79805137289637, "grad_norm": 0.2980216443538666, "learning_rate": 1e-05, "loss": 0.9683, "step": 92350 }, { "epoch": 81.80248007085916, "grad_norm": 0.23702271282672882, "learning_rate": 1e-05, "loss": 1.005, "step": 92355 }, { "epoch": 81.80690876882197, "grad_norm": 0.2431996762752533, "learning_rate": 1e-05, "loss": 0.9416, "step": 92360 }, { "epoch": 81.81133746678476, "grad_norm": 0.2558686137199402, "learning_rate": 1e-05, "loss": 0.8983, "step": 92365 }, { "epoch": 81.81576616474756, "grad_norm": 0.26055800914764404, "learning_rate": 1e-05, "loss": 0.978, "step": 92370 }, { "epoch": 81.82019486271037, "grad_norm": 0.2423030436038971, "learning_rate": 1e-05, "loss": 0.9398, "step": 92375 }, { "epoch": 81.82462356067316, "grad_norm": 0.2459389865398407, "learning_rate": 1e-05, "loss": 0.9933, "step": 92380 }, { "epoch": 81.82905225863595, "grad_norm": 0.21564634144306183, "learning_rate": 1e-05, "loss": 1.0072, "step": 92385 }, { "epoch": 81.83348095659876, "grad_norm": 0.2186041921377182, "learning_rate": 1e-05, "loss": 0.9346, "step": 92390 }, { "epoch": 81.83790965456156, "grad_norm": 0.21956850588321686, "learning_rate": 1e-05, "loss": 0.9471, "step": 92395 }, { "epoch": 81.84233835252435, "grad_norm": 0.2123612016439438, "learning_rate": 1e-05, "loss": 0.9472, "step": 92400 }, { "epoch": 81.84676705048716, "grad_norm": 0.21563979983329773, "learning_rate": 1e-05, "loss": 0.9651, "step": 92405 }, { "epoch": 81.85119574844995, "grad_norm": 0.2123672068119049, "learning_rate": 1e-05, "loss": 0.9117, "step": 92410 }, { "epoch": 81.85562444641276, "grad_norm": 0.240903839468956, "learning_rate": 1e-05, "loss": 0.9463, "step": 92415 }, { "epoch": 81.86005314437556, "grad_norm": 0.2425391674041748, "learning_rate": 1e-05, "loss": 0.9833, "step": 92420 }, { "epoch": 81.86448184233835, "grad_norm": 0.23530447483062744, "learning_rate": 1e-05, "loss": 0.9978, "step": 92425 }, { "epoch": 81.86891054030116, "grad_norm": 0.24704478681087494, "learning_rate": 1e-05, "loss": 0.9685, "step": 92430 }, { "epoch": 81.87333923826395, "grad_norm": 0.272141695022583, "learning_rate": 1e-05, "loss": 0.9309, "step": 92435 }, { "epoch": 81.87776793622675, "grad_norm": 0.2595696747303009, "learning_rate": 1e-05, "loss": 0.9418, "step": 92440 }, { "epoch": 81.88219663418955, "grad_norm": 0.22840823233127594, "learning_rate": 1e-05, "loss": 0.9644, "step": 92445 }, { "epoch": 81.88662533215235, "grad_norm": 0.24744726717472076, "learning_rate": 1e-05, "loss": 1.0123, "step": 92450 }, { "epoch": 81.89105403011514, "grad_norm": 0.2935553789138794, "learning_rate": 1e-05, "loss": 0.9488, "step": 92455 }, { "epoch": 81.89548272807795, "grad_norm": 0.2323647290468216, "learning_rate": 1e-05, "loss": 1.0062, "step": 92460 }, { "epoch": 81.89991142604075, "grad_norm": 0.2983042299747467, "learning_rate": 1e-05, "loss": 0.99, "step": 92465 }, { "epoch": 81.90434012400354, "grad_norm": 0.2761507034301758, "learning_rate": 1e-05, "loss": 0.9873, "step": 92470 }, { "epoch": 81.90876882196635, "grad_norm": 0.25779053568840027, "learning_rate": 1e-05, "loss": 0.9611, "step": 92475 }, { "epoch": 81.91319751992914, "grad_norm": 0.28125467896461487, "learning_rate": 1e-05, "loss": 0.9576, "step": 92480 }, { "epoch": 81.91762621789194, "grad_norm": 0.23626072704792023, "learning_rate": 1e-05, "loss": 1.025, "step": 92485 }, { "epoch": 81.92205491585474, "grad_norm": 0.2601568102836609, "learning_rate": 1e-05, "loss": 0.9426, "step": 92490 }, { "epoch": 81.92648361381754, "grad_norm": 0.25193536281585693, "learning_rate": 1e-05, "loss": 0.9012, "step": 92495 }, { "epoch": 81.93091231178033, "grad_norm": 0.20401793718338013, "learning_rate": 1e-05, "loss": 0.9979, "step": 92500 }, { "epoch": 81.93534100974314, "grad_norm": 0.22088439762592316, "learning_rate": 1e-05, "loss": 0.9377, "step": 92505 }, { "epoch": 81.93976970770593, "grad_norm": 0.21642133593559265, "learning_rate": 1e-05, "loss": 0.9639, "step": 92510 }, { "epoch": 81.94419840566873, "grad_norm": 0.23719359934329987, "learning_rate": 1e-05, "loss": 0.909, "step": 92515 }, { "epoch": 81.94862710363154, "grad_norm": 0.22414977848529816, "learning_rate": 1e-05, "loss": 0.9215, "step": 92520 }, { "epoch": 81.95305580159433, "grad_norm": 0.23038838803768158, "learning_rate": 1e-05, "loss": 0.9348, "step": 92525 }, { "epoch": 81.95748449955713, "grad_norm": 0.26599398255348206, "learning_rate": 1e-05, "loss": 0.911, "step": 92530 }, { "epoch": 81.96191319751993, "grad_norm": 0.25525933504104614, "learning_rate": 1e-05, "loss": 0.9149, "step": 92535 }, { "epoch": 81.96634189548273, "grad_norm": 0.26162856817245483, "learning_rate": 1e-05, "loss": 0.9321, "step": 92540 }, { "epoch": 81.97077059344552, "grad_norm": 0.2528301477432251, "learning_rate": 1e-05, "loss": 0.9754, "step": 92545 }, { "epoch": 81.97519929140833, "grad_norm": 0.25905710458755493, "learning_rate": 1e-05, "loss": 0.9333, "step": 92550 }, { "epoch": 81.97962798937112, "grad_norm": 0.2095557302236557, "learning_rate": 1e-05, "loss": 0.9531, "step": 92555 }, { "epoch": 81.98405668733392, "grad_norm": 0.24877482652664185, "learning_rate": 1e-05, "loss": 0.9498, "step": 92560 }, { "epoch": 81.98848538529673, "grad_norm": 0.23846156895160675, "learning_rate": 1e-05, "loss": 0.9354, "step": 92565 }, { "epoch": 81.99291408325952, "grad_norm": 0.24456891417503357, "learning_rate": 1e-05, "loss": 0.9555, "step": 92570 }, { "epoch": 81.99734278122232, "grad_norm": 0.24258312582969666, "learning_rate": 1e-05, "loss": 0.9619, "step": 92575 }, { "epoch": 82.00177147918512, "grad_norm": 0.22928327322006226, "learning_rate": 1e-05, "loss": 0.9258, "step": 92580 }, { "epoch": 82.00620017714792, "grad_norm": 0.2241503894329071, "learning_rate": 1e-05, "loss": 0.9998, "step": 92585 }, { "epoch": 82.01062887511071, "grad_norm": 0.2845040559768677, "learning_rate": 1e-05, "loss": 0.9613, "step": 92590 }, { "epoch": 82.01505757307352, "grad_norm": 0.2193719744682312, "learning_rate": 1e-05, "loss": 0.9794, "step": 92595 }, { "epoch": 82.01948627103631, "grad_norm": 0.2309652864933014, "learning_rate": 1e-05, "loss": 0.9165, "step": 92600 }, { "epoch": 82.02391496899911, "grad_norm": 0.2453942745923996, "learning_rate": 1e-05, "loss": 0.9657, "step": 92605 }, { "epoch": 82.02834366696192, "grad_norm": 0.21675848960876465, "learning_rate": 1e-05, "loss": 0.9219, "step": 92610 }, { "epoch": 82.03277236492471, "grad_norm": 0.2201230376958847, "learning_rate": 1e-05, "loss": 0.9588, "step": 92615 }, { "epoch": 82.0372010628875, "grad_norm": 0.2498418390750885, "learning_rate": 1e-05, "loss": 1.0335, "step": 92620 }, { "epoch": 82.04162976085031, "grad_norm": 0.245519757270813, "learning_rate": 1e-05, "loss": 0.9973, "step": 92625 }, { "epoch": 82.0460584588131, "grad_norm": 0.2237943708896637, "learning_rate": 1e-05, "loss": 0.9307, "step": 92630 }, { "epoch": 82.0504871567759, "grad_norm": 0.21586014330387115, "learning_rate": 1e-05, "loss": 0.9114, "step": 92635 }, { "epoch": 82.05491585473871, "grad_norm": 0.271984726190567, "learning_rate": 1e-05, "loss": 0.9922, "step": 92640 }, { "epoch": 82.0593445527015, "grad_norm": 0.2107381373643875, "learning_rate": 1e-05, "loss": 0.9479, "step": 92645 }, { "epoch": 82.0637732506643, "grad_norm": 0.2336064577102661, "learning_rate": 1e-05, "loss": 0.9464, "step": 92650 }, { "epoch": 82.0682019486271, "grad_norm": 0.24420031905174255, "learning_rate": 1e-05, "loss": 0.9473, "step": 92655 }, { "epoch": 82.0726306465899, "grad_norm": 0.2063104510307312, "learning_rate": 1e-05, "loss": 0.9557, "step": 92660 }, { "epoch": 82.07705934455271, "grad_norm": 0.27653226256370544, "learning_rate": 1e-05, "loss": 0.9519, "step": 92665 }, { "epoch": 82.0814880425155, "grad_norm": 0.3089624345302582, "learning_rate": 1e-05, "loss": 0.9655, "step": 92670 }, { "epoch": 82.0859167404783, "grad_norm": 0.20853734016418457, "learning_rate": 1e-05, "loss": 0.9527, "step": 92675 }, { "epoch": 82.0903454384411, "grad_norm": 0.2841624617576599, "learning_rate": 1e-05, "loss": 1.0271, "step": 92680 }, { "epoch": 82.0947741364039, "grad_norm": 0.26382946968078613, "learning_rate": 1e-05, "loss": 0.9412, "step": 92685 }, { "epoch": 82.0992028343667, "grad_norm": 0.29224419593811035, "learning_rate": 1e-05, "loss": 0.9284, "step": 92690 }, { "epoch": 82.1036315323295, "grad_norm": 0.30479520559310913, "learning_rate": 1e-05, "loss": 0.9491, "step": 92695 }, { "epoch": 82.1080602302923, "grad_norm": 0.2462264448404312, "learning_rate": 1e-05, "loss": 0.9571, "step": 92700 }, { "epoch": 82.11248892825509, "grad_norm": 0.23337511718273163, "learning_rate": 1e-05, "loss": 1.0242, "step": 92705 }, { "epoch": 82.1169176262179, "grad_norm": 0.2221749722957611, "learning_rate": 1e-05, "loss": 0.9077, "step": 92710 }, { "epoch": 82.12134632418069, "grad_norm": 0.23320898413658142, "learning_rate": 1e-05, "loss": 0.9975, "step": 92715 }, { "epoch": 82.12577502214349, "grad_norm": 0.21928885579109192, "learning_rate": 1e-05, "loss": 0.9752, "step": 92720 }, { "epoch": 82.1302037201063, "grad_norm": 0.23455867171287537, "learning_rate": 1e-05, "loss": 0.914, "step": 92725 }, { "epoch": 82.13463241806909, "grad_norm": 0.2031598538160324, "learning_rate": 1e-05, "loss": 0.9223, "step": 92730 }, { "epoch": 82.13906111603188, "grad_norm": 0.19815897941589355, "learning_rate": 1e-05, "loss": 1.0116, "step": 92735 }, { "epoch": 82.14348981399469, "grad_norm": 0.23133572936058044, "learning_rate": 1e-05, "loss": 0.9696, "step": 92740 }, { "epoch": 82.14791851195749, "grad_norm": 0.2101556956768036, "learning_rate": 1e-05, "loss": 0.9629, "step": 92745 }, { "epoch": 82.15234720992028, "grad_norm": 0.2910807728767395, "learning_rate": 1e-05, "loss": 0.9803, "step": 92750 }, { "epoch": 82.15677590788309, "grad_norm": 0.3049667179584503, "learning_rate": 1e-05, "loss": 0.9044, "step": 92755 }, { "epoch": 82.16120460584588, "grad_norm": 0.2579787075519562, "learning_rate": 1e-05, "loss": 0.9812, "step": 92760 }, { "epoch": 82.16563330380868, "grad_norm": 0.23370757699012756, "learning_rate": 1e-05, "loss": 1.0367, "step": 92765 }, { "epoch": 82.17006200177148, "grad_norm": 0.28080523014068604, "learning_rate": 1e-05, "loss": 0.9463, "step": 92770 }, { "epoch": 82.17449069973428, "grad_norm": 0.20041103661060333, "learning_rate": 1e-05, "loss": 0.9888, "step": 92775 }, { "epoch": 82.17891939769707, "grad_norm": 0.2114713042974472, "learning_rate": 1e-05, "loss": 1.0092, "step": 92780 }, { "epoch": 82.18334809565988, "grad_norm": 0.2356327325105667, "learning_rate": 1e-05, "loss": 0.9635, "step": 92785 }, { "epoch": 82.18777679362267, "grad_norm": 0.21957969665527344, "learning_rate": 1e-05, "loss": 0.9249, "step": 92790 }, { "epoch": 82.19220549158547, "grad_norm": 0.24526815116405487, "learning_rate": 1e-05, "loss": 0.9692, "step": 92795 }, { "epoch": 82.19663418954828, "grad_norm": 0.2780296504497528, "learning_rate": 1e-05, "loss": 0.945, "step": 92800 }, { "epoch": 82.20106288751107, "grad_norm": 0.22559769451618195, "learning_rate": 1e-05, "loss": 0.9091, "step": 92805 }, { "epoch": 82.20549158547387, "grad_norm": 0.19436782598495483, "learning_rate": 1e-05, "loss": 0.923, "step": 92810 }, { "epoch": 82.20992028343667, "grad_norm": 0.2156790792942047, "learning_rate": 1e-05, "loss": 0.9603, "step": 92815 }, { "epoch": 82.21434898139947, "grad_norm": 0.22508729994297028, "learning_rate": 1e-05, "loss": 0.9288, "step": 92820 }, { "epoch": 82.21877767936226, "grad_norm": 0.26046520471572876, "learning_rate": 1e-05, "loss": 0.9675, "step": 92825 }, { "epoch": 82.22320637732507, "grad_norm": 0.21837759017944336, "learning_rate": 1e-05, "loss": 0.9537, "step": 92830 }, { "epoch": 82.22763507528786, "grad_norm": 0.22372931241989136, "learning_rate": 1e-05, "loss": 0.9438, "step": 92835 }, { "epoch": 82.23206377325066, "grad_norm": 0.2550649642944336, "learning_rate": 1e-05, "loss": 0.9324, "step": 92840 }, { "epoch": 82.23649247121347, "grad_norm": 0.28876209259033203, "learning_rate": 1e-05, "loss": 0.9862, "step": 92845 }, { "epoch": 82.24092116917626, "grad_norm": 0.24127979576587677, "learning_rate": 1e-05, "loss": 0.9675, "step": 92850 }, { "epoch": 82.24534986713905, "grad_norm": 0.2675110399723053, "learning_rate": 1e-05, "loss": 0.9723, "step": 92855 }, { "epoch": 82.24977856510186, "grad_norm": 0.2507060766220093, "learning_rate": 1e-05, "loss": 0.9671, "step": 92860 }, { "epoch": 82.25420726306466, "grad_norm": 0.2154734879732132, "learning_rate": 1e-05, "loss": 0.9834, "step": 92865 }, { "epoch": 82.25863596102745, "grad_norm": 0.2733849585056305, "learning_rate": 1e-05, "loss": 0.929, "step": 92870 }, { "epoch": 82.26306465899026, "grad_norm": 0.22636304795742035, "learning_rate": 1e-05, "loss": 0.9829, "step": 92875 }, { "epoch": 82.26749335695305, "grad_norm": 0.22808566689491272, "learning_rate": 1e-05, "loss": 0.9401, "step": 92880 }, { "epoch": 82.27192205491585, "grad_norm": 0.2521180212497711, "learning_rate": 1e-05, "loss": 0.9513, "step": 92885 }, { "epoch": 82.27635075287866, "grad_norm": 0.23331858217716217, "learning_rate": 1e-05, "loss": 0.991, "step": 92890 }, { "epoch": 82.28077945084145, "grad_norm": 0.23508606851100922, "learning_rate": 1e-05, "loss": 0.9813, "step": 92895 }, { "epoch": 82.28520814880426, "grad_norm": 0.26364055275917053, "learning_rate": 1e-05, "loss": 0.9513, "step": 92900 }, { "epoch": 82.28963684676705, "grad_norm": 0.2509511709213257, "learning_rate": 1e-05, "loss": 0.9445, "step": 92905 }, { "epoch": 82.29406554472985, "grad_norm": 0.2176615446805954, "learning_rate": 1e-05, "loss": 0.9494, "step": 92910 }, { "epoch": 82.29849424269266, "grad_norm": 0.23476454615592957, "learning_rate": 1e-05, "loss": 0.9444, "step": 92915 }, { "epoch": 82.30292294065545, "grad_norm": 0.24876607954502106, "learning_rate": 1e-05, "loss": 1.0001, "step": 92920 }, { "epoch": 82.30735163861824, "grad_norm": 0.22676807641983032, "learning_rate": 1e-05, "loss": 0.9927, "step": 92925 }, { "epoch": 82.31178033658105, "grad_norm": 0.24459494650363922, "learning_rate": 1e-05, "loss": 0.9506, "step": 92930 }, { "epoch": 82.31620903454385, "grad_norm": 0.25257933139801025, "learning_rate": 1e-05, "loss": 0.9776, "step": 92935 }, { "epoch": 82.32063773250664, "grad_norm": 0.2784808874130249, "learning_rate": 1e-05, "loss": 0.9699, "step": 92940 }, { "epoch": 82.32506643046945, "grad_norm": 0.25922703742980957, "learning_rate": 1e-05, "loss": 0.9584, "step": 92945 }, { "epoch": 82.32949512843224, "grad_norm": 0.2753784954547882, "learning_rate": 1e-05, "loss": 0.9525, "step": 92950 }, { "epoch": 82.33392382639504, "grad_norm": 0.24441808462142944, "learning_rate": 1e-05, "loss": 0.9194, "step": 92955 }, { "epoch": 82.33835252435784, "grad_norm": 0.2293798178434372, "learning_rate": 1e-05, "loss": 0.9408, "step": 92960 }, { "epoch": 82.34278122232064, "grad_norm": 0.22352881729602814, "learning_rate": 1e-05, "loss": 0.9937, "step": 92965 }, { "epoch": 82.34720992028343, "grad_norm": 0.1953272670507431, "learning_rate": 1e-05, "loss": 0.9234, "step": 92970 }, { "epoch": 82.35163861824624, "grad_norm": 0.2378494292497635, "learning_rate": 1e-05, "loss": 1.0192, "step": 92975 }, { "epoch": 82.35606731620904, "grad_norm": 0.2331896424293518, "learning_rate": 1e-05, "loss": 0.9701, "step": 92980 }, { "epoch": 82.36049601417183, "grad_norm": 0.2640392482280731, "learning_rate": 1e-05, "loss": 0.9652, "step": 92985 }, { "epoch": 82.36492471213464, "grad_norm": 0.2554363012313843, "learning_rate": 1e-05, "loss": 0.9307, "step": 92990 }, { "epoch": 82.36935341009743, "grad_norm": 0.2527042031288147, "learning_rate": 1e-05, "loss": 0.9137, "step": 92995 }, { "epoch": 82.37378210806023, "grad_norm": 0.21290934085845947, "learning_rate": 1e-05, "loss": 0.9142, "step": 93000 }, { "epoch": 82.37821080602303, "grad_norm": 0.23322202265262604, "learning_rate": 1e-05, "loss": 0.937, "step": 93005 }, { "epoch": 82.38263950398583, "grad_norm": 0.20906901359558105, "learning_rate": 1e-05, "loss": 0.9314, "step": 93010 }, { "epoch": 82.38706820194862, "grad_norm": 0.2555759847164154, "learning_rate": 1e-05, "loss": 0.9333, "step": 93015 }, { "epoch": 82.39149689991143, "grad_norm": 0.2137509137392044, "learning_rate": 1e-05, "loss": 0.9475, "step": 93020 }, { "epoch": 82.39592559787422, "grad_norm": 0.2556888163089752, "learning_rate": 1e-05, "loss": 0.9411, "step": 93025 }, { "epoch": 82.40035429583702, "grad_norm": 0.23329058289527893, "learning_rate": 1e-05, "loss": 0.992, "step": 93030 }, { "epoch": 82.40478299379983, "grad_norm": 0.2291184961795807, "learning_rate": 1e-05, "loss": 0.9804, "step": 93035 }, { "epoch": 82.40921169176262, "grad_norm": 0.24417750537395477, "learning_rate": 1e-05, "loss": 0.9088, "step": 93040 }, { "epoch": 82.41364038972542, "grad_norm": 0.2221381962299347, "learning_rate": 1e-05, "loss": 0.9832, "step": 93045 }, { "epoch": 82.41806908768822, "grad_norm": 0.213862344622612, "learning_rate": 1e-05, "loss": 1.0133, "step": 93050 }, { "epoch": 82.42249778565102, "grad_norm": 0.20365607738494873, "learning_rate": 1e-05, "loss": 0.939, "step": 93055 }, { "epoch": 82.42692648361381, "grad_norm": 0.21590612828731537, "learning_rate": 1e-05, "loss": 0.9743, "step": 93060 }, { "epoch": 82.43135518157662, "grad_norm": 0.23220698535442352, "learning_rate": 1e-05, "loss": 0.9838, "step": 93065 }, { "epoch": 82.43578387953941, "grad_norm": 0.20819173753261566, "learning_rate": 1e-05, "loss": 0.9414, "step": 93070 }, { "epoch": 82.44021257750221, "grad_norm": 0.21610568463802338, "learning_rate": 1e-05, "loss": 1.0014, "step": 93075 }, { "epoch": 82.44464127546502, "grad_norm": 0.2327726185321808, "learning_rate": 1e-05, "loss": 0.9509, "step": 93080 }, { "epoch": 82.44906997342781, "grad_norm": 0.2299305647611618, "learning_rate": 1e-05, "loss": 0.9702, "step": 93085 }, { "epoch": 82.4534986713906, "grad_norm": 0.2253926545381546, "learning_rate": 1e-05, "loss": 0.9686, "step": 93090 }, { "epoch": 82.45792736935341, "grad_norm": 0.21596622467041016, "learning_rate": 1e-05, "loss": 0.9716, "step": 93095 }, { "epoch": 82.46235606731621, "grad_norm": 0.25354430079460144, "learning_rate": 1e-05, "loss": 0.9749, "step": 93100 }, { "epoch": 82.466784765279, "grad_norm": 0.2254207283258438, "learning_rate": 1e-05, "loss": 1.0501, "step": 93105 }, { "epoch": 82.47121346324181, "grad_norm": 0.2142503261566162, "learning_rate": 1e-05, "loss": 0.9582, "step": 93110 }, { "epoch": 82.4756421612046, "grad_norm": 0.2229224294424057, "learning_rate": 1e-05, "loss": 0.9915, "step": 93115 }, { "epoch": 82.4800708591674, "grad_norm": 0.22385510802268982, "learning_rate": 1e-05, "loss": 0.9817, "step": 93120 }, { "epoch": 82.4844995571302, "grad_norm": 0.25829800963401794, "learning_rate": 1e-05, "loss": 1.006, "step": 93125 }, { "epoch": 82.488928255093, "grad_norm": 0.32452359795570374, "learning_rate": 1e-05, "loss": 0.959, "step": 93130 }, { "epoch": 82.4933569530558, "grad_norm": 0.28407981991767883, "learning_rate": 1e-05, "loss": 0.9668, "step": 93135 }, { "epoch": 82.4977856510186, "grad_norm": 0.23401610553264618, "learning_rate": 1e-05, "loss": 1.0103, "step": 93140 }, { "epoch": 82.5022143489814, "grad_norm": 0.22621333599090576, "learning_rate": 1e-05, "loss": 0.9498, "step": 93145 }, { "epoch": 82.5066430469442, "grad_norm": 0.2164662480354309, "learning_rate": 1e-05, "loss": 0.9791, "step": 93150 }, { "epoch": 82.511071744907, "grad_norm": 0.2804012894630432, "learning_rate": 1e-05, "loss": 0.9033, "step": 93155 }, { "epoch": 82.5155004428698, "grad_norm": 0.2612646222114563, "learning_rate": 1e-05, "loss": 0.9498, "step": 93160 }, { "epoch": 82.5199291408326, "grad_norm": 0.24112051725387573, "learning_rate": 1e-05, "loss": 0.9298, "step": 93165 }, { "epoch": 82.5243578387954, "grad_norm": 0.24598760902881622, "learning_rate": 1e-05, "loss": 0.9122, "step": 93170 }, { "epoch": 82.52878653675819, "grad_norm": 0.25218650698661804, "learning_rate": 1e-05, "loss": 0.9949, "step": 93175 }, { "epoch": 82.533215234721, "grad_norm": 0.2694810926914215, "learning_rate": 1e-05, "loss": 0.9456, "step": 93180 }, { "epoch": 82.53764393268379, "grad_norm": 0.2419082224369049, "learning_rate": 1e-05, "loss": 0.9532, "step": 93185 }, { "epoch": 82.54207263064659, "grad_norm": 0.2429601550102234, "learning_rate": 1e-05, "loss": 0.9605, "step": 93190 }, { "epoch": 82.5465013286094, "grad_norm": 0.26009324193000793, "learning_rate": 1e-05, "loss": 0.9127, "step": 93195 }, { "epoch": 82.55093002657219, "grad_norm": 0.2199859917163849, "learning_rate": 1e-05, "loss": 0.9576, "step": 93200 }, { "epoch": 82.55535872453498, "grad_norm": 0.2532244622707367, "learning_rate": 1e-05, "loss": 0.9356, "step": 93205 }, { "epoch": 82.55978742249779, "grad_norm": 0.2014007419347763, "learning_rate": 1e-05, "loss": 0.9214, "step": 93210 }, { "epoch": 82.56421612046059, "grad_norm": 0.2239326685667038, "learning_rate": 1e-05, "loss": 0.9221, "step": 93215 }, { "epoch": 82.56864481842338, "grad_norm": 0.23541061580181122, "learning_rate": 1e-05, "loss": 0.959, "step": 93220 }, { "epoch": 82.57307351638619, "grad_norm": 0.22436967492103577, "learning_rate": 1e-05, "loss": 0.9764, "step": 93225 }, { "epoch": 82.57750221434898, "grad_norm": 0.24005074799060822, "learning_rate": 1e-05, "loss": 0.9412, "step": 93230 }, { "epoch": 82.58193091231178, "grad_norm": 0.2823718786239624, "learning_rate": 1e-05, "loss": 0.9402, "step": 93235 }, { "epoch": 82.58635961027458, "grad_norm": 0.25429126620292664, "learning_rate": 1e-05, "loss": 0.9521, "step": 93240 }, { "epoch": 82.59078830823738, "grad_norm": 0.28728052973747253, "learning_rate": 1e-05, "loss": 0.9471, "step": 93245 }, { "epoch": 82.59521700620017, "grad_norm": 0.30380699038505554, "learning_rate": 1e-05, "loss": 0.9468, "step": 93250 }, { "epoch": 82.59964570416298, "grad_norm": 0.23408547043800354, "learning_rate": 1e-05, "loss": 0.9782, "step": 93255 }, { "epoch": 82.60407440212578, "grad_norm": 0.27851173281669617, "learning_rate": 1e-05, "loss": 0.9334, "step": 93260 }, { "epoch": 82.60850310008857, "grad_norm": 0.2508356273174286, "learning_rate": 1e-05, "loss": 0.9166, "step": 93265 }, { "epoch": 82.61293179805138, "grad_norm": 0.22674061357975006, "learning_rate": 1e-05, "loss": 0.9588, "step": 93270 }, { "epoch": 82.61736049601417, "grad_norm": 0.25120002031326294, "learning_rate": 1e-05, "loss": 0.966, "step": 93275 }, { "epoch": 82.62178919397697, "grad_norm": 0.1947123408317566, "learning_rate": 1e-05, "loss": 0.9637, "step": 93280 }, { "epoch": 82.62621789193977, "grad_norm": 0.20119597017765045, "learning_rate": 1e-05, "loss": 0.9332, "step": 93285 }, { "epoch": 82.63064658990257, "grad_norm": 0.26322153210639954, "learning_rate": 1e-05, "loss": 0.9351, "step": 93290 }, { "epoch": 82.63507528786536, "grad_norm": 0.2715175747871399, "learning_rate": 1e-05, "loss": 0.9647, "step": 93295 }, { "epoch": 82.63950398582817, "grad_norm": 0.2481713742017746, "learning_rate": 1e-05, "loss": 0.9588, "step": 93300 }, { "epoch": 82.64393268379096, "grad_norm": 0.2643562853336334, "learning_rate": 1e-05, "loss": 0.9844, "step": 93305 }, { "epoch": 82.64836138175376, "grad_norm": 0.2414681613445282, "learning_rate": 1e-05, "loss": 0.9631, "step": 93310 }, { "epoch": 82.65279007971657, "grad_norm": 0.2412794977426529, "learning_rate": 1e-05, "loss": 0.9093, "step": 93315 }, { "epoch": 82.65721877767936, "grad_norm": 0.24661731719970703, "learning_rate": 1e-05, "loss": 0.9578, "step": 93320 }, { "epoch": 82.66164747564216, "grad_norm": 0.23826812207698822, "learning_rate": 1e-05, "loss": 0.9786, "step": 93325 }, { "epoch": 82.66607617360496, "grad_norm": 0.25095582008361816, "learning_rate": 1e-05, "loss": 1.01, "step": 93330 }, { "epoch": 82.67050487156776, "grad_norm": 0.2329203337430954, "learning_rate": 1e-05, "loss": 0.9829, "step": 93335 }, { "epoch": 82.67493356953055, "grad_norm": 0.24053002893924713, "learning_rate": 1e-05, "loss": 0.9423, "step": 93340 }, { "epoch": 82.67936226749336, "grad_norm": 0.2403680831193924, "learning_rate": 1e-05, "loss": 0.9899, "step": 93345 }, { "epoch": 82.68379096545615, "grad_norm": 0.2531846761703491, "learning_rate": 1e-05, "loss": 0.9255, "step": 93350 }, { "epoch": 82.68821966341895, "grad_norm": 0.24978692829608917, "learning_rate": 1e-05, "loss": 0.982, "step": 93355 }, { "epoch": 82.69264836138176, "grad_norm": 0.20021769404411316, "learning_rate": 1e-05, "loss": 0.9315, "step": 93360 }, { "epoch": 82.69707705934455, "grad_norm": 0.2459782212972641, "learning_rate": 1e-05, "loss": 0.9287, "step": 93365 }, { "epoch": 82.70150575730734, "grad_norm": 0.2232067734003067, "learning_rate": 1e-05, "loss": 0.9587, "step": 93370 }, { "epoch": 82.70593445527015, "grad_norm": 0.19907306134700775, "learning_rate": 1e-05, "loss": 0.9604, "step": 93375 }, { "epoch": 82.71036315323295, "grad_norm": 0.2596239447593689, "learning_rate": 1e-05, "loss": 0.9674, "step": 93380 }, { "epoch": 82.71479185119574, "grad_norm": 0.22871831059455872, "learning_rate": 1e-05, "loss": 0.9891, "step": 93385 }, { "epoch": 82.71922054915855, "grad_norm": 0.2362879067659378, "learning_rate": 1e-05, "loss": 0.9983, "step": 93390 }, { "epoch": 82.72364924712134, "grad_norm": 0.24570442736148834, "learning_rate": 1e-05, "loss": 0.9535, "step": 93395 }, { "epoch": 82.72807794508415, "grad_norm": 0.2295689433813095, "learning_rate": 1e-05, "loss": 0.9624, "step": 93400 }, { "epoch": 82.73250664304695, "grad_norm": 0.2715066075325012, "learning_rate": 1e-05, "loss": 0.9914, "step": 93405 }, { "epoch": 82.73693534100974, "grad_norm": 0.2211691439151764, "learning_rate": 1e-05, "loss": 1.0296, "step": 93410 }, { "epoch": 82.74136403897255, "grad_norm": 0.22630882263183594, "learning_rate": 1e-05, "loss": 0.9538, "step": 93415 }, { "epoch": 82.74579273693534, "grad_norm": 0.20364220440387726, "learning_rate": 1e-05, "loss": 0.9625, "step": 93420 }, { "epoch": 82.75022143489814, "grad_norm": 0.23906847834587097, "learning_rate": 1e-05, "loss": 0.9306, "step": 93425 }, { "epoch": 82.75465013286095, "grad_norm": 0.22127152979373932, "learning_rate": 1e-05, "loss": 0.9094, "step": 93430 }, { "epoch": 82.75907883082374, "grad_norm": 0.24635757505893707, "learning_rate": 1e-05, "loss": 0.9977, "step": 93435 }, { "epoch": 82.76350752878653, "grad_norm": 0.23902717232704163, "learning_rate": 1e-05, "loss": 1.0056, "step": 93440 }, { "epoch": 82.76793622674934, "grad_norm": 0.23260647058486938, "learning_rate": 1e-05, "loss": 0.9775, "step": 93445 }, { "epoch": 82.77236492471214, "grad_norm": 0.22645121812820435, "learning_rate": 1e-05, "loss": 0.9846, "step": 93450 }, { "epoch": 82.77679362267493, "grad_norm": 0.25291764736175537, "learning_rate": 1e-05, "loss": 0.9866, "step": 93455 }, { "epoch": 82.78122232063774, "grad_norm": 0.23050011694431305, "learning_rate": 1e-05, "loss": 0.9735, "step": 93460 }, { "epoch": 82.78565101860053, "grad_norm": 0.24984940886497498, "learning_rate": 1e-05, "loss": 0.939, "step": 93465 }, { "epoch": 82.79007971656333, "grad_norm": 0.22395296394824982, "learning_rate": 1e-05, "loss": 0.9822, "step": 93470 }, { "epoch": 82.79450841452613, "grad_norm": 0.2991333603858948, "learning_rate": 1e-05, "loss": 0.9871, "step": 93475 }, { "epoch": 82.79893711248893, "grad_norm": 0.20330600440502167, "learning_rate": 1e-05, "loss": 0.9811, "step": 93480 }, { "epoch": 82.80336581045172, "grad_norm": 0.26736918091773987, "learning_rate": 1e-05, "loss": 0.982, "step": 93485 }, { "epoch": 82.80779450841453, "grad_norm": 0.2058844417333603, "learning_rate": 1e-05, "loss": 0.9572, "step": 93490 }, { "epoch": 82.81222320637733, "grad_norm": 0.27888891100883484, "learning_rate": 1e-05, "loss": 0.9617, "step": 93495 }, { "epoch": 82.81665190434012, "grad_norm": 0.2519075274467468, "learning_rate": 1e-05, "loss": 0.9374, "step": 93500 }, { "epoch": 82.82108060230293, "grad_norm": 0.25055989623069763, "learning_rate": 1e-05, "loss": 0.9121, "step": 93505 }, { "epoch": 82.82550930026572, "grad_norm": 0.23881596326828003, "learning_rate": 1e-05, "loss": 0.9784, "step": 93510 }, { "epoch": 82.82993799822852, "grad_norm": 0.2660433053970337, "learning_rate": 1e-05, "loss": 0.9406, "step": 93515 }, { "epoch": 82.83436669619132, "grad_norm": 0.2356128990650177, "learning_rate": 1e-05, "loss": 0.9004, "step": 93520 }, { "epoch": 82.83879539415412, "grad_norm": 0.27228793501853943, "learning_rate": 1e-05, "loss": 1.0454, "step": 93525 }, { "epoch": 82.84322409211691, "grad_norm": 0.27038222551345825, "learning_rate": 1e-05, "loss": 0.9682, "step": 93530 }, { "epoch": 82.84765279007972, "grad_norm": 0.23006685078144073, "learning_rate": 1e-05, "loss": 0.9283, "step": 93535 }, { "epoch": 82.85208148804251, "grad_norm": 0.22513146698474884, "learning_rate": 1e-05, "loss": 0.8931, "step": 93540 }, { "epoch": 82.85651018600531, "grad_norm": 0.25537872314453125, "learning_rate": 1e-05, "loss": 0.9568, "step": 93545 }, { "epoch": 82.86093888396812, "grad_norm": 0.216476172208786, "learning_rate": 1e-05, "loss": 0.9628, "step": 93550 }, { "epoch": 82.86536758193091, "grad_norm": 0.29048407077789307, "learning_rate": 1e-05, "loss": 1.0043, "step": 93555 }, { "epoch": 82.8697962798937, "grad_norm": 0.24756154417991638, "learning_rate": 1e-05, "loss": 0.9942, "step": 93560 }, { "epoch": 82.87422497785651, "grad_norm": 0.26578348875045776, "learning_rate": 1e-05, "loss": 0.9311, "step": 93565 }, { "epoch": 82.87865367581931, "grad_norm": 0.20526890456676483, "learning_rate": 1e-05, "loss": 0.9893, "step": 93570 }, { "epoch": 82.8830823737821, "grad_norm": 0.24398256838321686, "learning_rate": 1e-05, "loss": 1.0052, "step": 93575 }, { "epoch": 82.88751107174491, "grad_norm": 0.240494504570961, "learning_rate": 1e-05, "loss": 1.0197, "step": 93580 }, { "epoch": 82.8919397697077, "grad_norm": 0.2334831953048706, "learning_rate": 1e-05, "loss": 0.9274, "step": 93585 }, { "epoch": 82.8963684676705, "grad_norm": 0.2473529726266861, "learning_rate": 1e-05, "loss": 0.9383, "step": 93590 }, { "epoch": 82.9007971656333, "grad_norm": 0.2314014732837677, "learning_rate": 1e-05, "loss": 0.9532, "step": 93595 }, { "epoch": 82.9052258635961, "grad_norm": 0.2326183021068573, "learning_rate": 1e-05, "loss": 0.9951, "step": 93600 }, { "epoch": 82.9096545615589, "grad_norm": 0.23426249623298645, "learning_rate": 1e-05, "loss": 0.9687, "step": 93605 }, { "epoch": 82.9140832595217, "grad_norm": 0.2544894516468048, "learning_rate": 1e-05, "loss": 0.9483, "step": 93610 }, { "epoch": 82.9185119574845, "grad_norm": 0.23566783964633942, "learning_rate": 1e-05, "loss": 0.9285, "step": 93615 }, { "epoch": 82.92294065544729, "grad_norm": 0.2213720679283142, "learning_rate": 1e-05, "loss": 0.9549, "step": 93620 }, { "epoch": 82.9273693534101, "grad_norm": 0.22739677131175995, "learning_rate": 1e-05, "loss": 0.953, "step": 93625 }, { "epoch": 82.9317980513729, "grad_norm": 0.23768272995948792, "learning_rate": 1e-05, "loss": 1.0153, "step": 93630 }, { "epoch": 82.9362267493357, "grad_norm": 0.274625301361084, "learning_rate": 1e-05, "loss": 0.9561, "step": 93635 }, { "epoch": 82.9406554472985, "grad_norm": 0.21489015221595764, "learning_rate": 1e-05, "loss": 0.9581, "step": 93640 }, { "epoch": 82.94508414526129, "grad_norm": 0.22081969678401947, "learning_rate": 1e-05, "loss": 0.9891, "step": 93645 }, { "epoch": 82.9495128432241, "grad_norm": 0.2381548285484314, "learning_rate": 1e-05, "loss": 0.9147, "step": 93650 }, { "epoch": 82.9539415411869, "grad_norm": 0.25227224826812744, "learning_rate": 1e-05, "loss": 0.9287, "step": 93655 }, { "epoch": 82.95837023914969, "grad_norm": 0.23421943187713623, "learning_rate": 1e-05, "loss": 0.8847, "step": 93660 }, { "epoch": 82.9627989371125, "grad_norm": 0.23901128768920898, "learning_rate": 1e-05, "loss": 0.9837, "step": 93665 }, { "epoch": 82.96722763507529, "grad_norm": 0.2223140150308609, "learning_rate": 1e-05, "loss": 0.9138, "step": 93670 }, { "epoch": 82.97165633303808, "grad_norm": 0.23102019727230072, "learning_rate": 1e-05, "loss": 0.9948, "step": 93675 }, { "epoch": 82.97608503100089, "grad_norm": 0.23231561481952667, "learning_rate": 1e-05, "loss": 0.9638, "step": 93680 }, { "epoch": 82.98051372896369, "grad_norm": 0.265850692987442, "learning_rate": 1e-05, "loss": 0.9827, "step": 93685 }, { "epoch": 82.98494242692648, "grad_norm": 0.24320149421691895, "learning_rate": 1e-05, "loss": 0.9245, "step": 93690 }, { "epoch": 82.98937112488929, "grad_norm": 0.21519111096858978, "learning_rate": 1e-05, "loss": 1.0019, "step": 93695 }, { "epoch": 82.99379982285208, "grad_norm": 0.25483617186546326, "learning_rate": 1e-05, "loss": 0.9373, "step": 93700 }, { "epoch": 82.99822852081488, "grad_norm": 0.2806563973426819, "learning_rate": 1e-05, "loss": 0.9762, "step": 93705 }, { "epoch": 83.00265721877768, "grad_norm": 0.2656966745853424, "learning_rate": 1e-05, "loss": 0.9243, "step": 93710 }, { "epoch": 83.00708591674048, "grad_norm": 0.20566770434379578, "learning_rate": 1e-05, "loss": 0.947, "step": 93715 }, { "epoch": 83.01151461470327, "grad_norm": 0.2320093810558319, "learning_rate": 1e-05, "loss": 0.9625, "step": 93720 }, { "epoch": 83.01594331266608, "grad_norm": 0.24047455191612244, "learning_rate": 1e-05, "loss": 0.9815, "step": 93725 }, { "epoch": 83.02037201062888, "grad_norm": 0.28820961713790894, "learning_rate": 1e-05, "loss": 0.9929, "step": 93730 }, { "epoch": 83.02480070859167, "grad_norm": 0.2528837025165558, "learning_rate": 1e-05, "loss": 0.9623, "step": 93735 }, { "epoch": 83.02922940655448, "grad_norm": 0.21906864643096924, "learning_rate": 1e-05, "loss": 0.9894, "step": 93740 }, { "epoch": 83.03365810451727, "grad_norm": 0.24371162056922913, "learning_rate": 1e-05, "loss": 0.9505, "step": 93745 }, { "epoch": 83.03808680248007, "grad_norm": 0.25481653213500977, "learning_rate": 1e-05, "loss": 1.0109, "step": 93750 }, { "epoch": 83.04251550044287, "grad_norm": 0.25346407294273376, "learning_rate": 1e-05, "loss": 1.0049, "step": 93755 }, { "epoch": 83.04694419840567, "grad_norm": 0.22388868033885956, "learning_rate": 1e-05, "loss": 0.9563, "step": 93760 }, { "epoch": 83.05137289636846, "grad_norm": 0.2326165735721588, "learning_rate": 1e-05, "loss": 0.9582, "step": 93765 }, { "epoch": 83.05580159433127, "grad_norm": 0.23829403519630432, "learning_rate": 1e-05, "loss": 0.9954, "step": 93770 }, { "epoch": 83.06023029229407, "grad_norm": 0.254862904548645, "learning_rate": 1e-05, "loss": 0.9827, "step": 93775 }, { "epoch": 83.06465899025686, "grad_norm": 0.261135071516037, "learning_rate": 1e-05, "loss": 0.9782, "step": 93780 }, { "epoch": 83.06908768821967, "grad_norm": 0.2482091784477234, "learning_rate": 1e-05, "loss": 0.9649, "step": 93785 }, { "epoch": 83.07351638618246, "grad_norm": 0.24871626496315002, "learning_rate": 1e-05, "loss": 0.967, "step": 93790 }, { "epoch": 83.07794508414526, "grad_norm": 0.22859255969524384, "learning_rate": 1e-05, "loss": 0.979, "step": 93795 }, { "epoch": 83.08237378210806, "grad_norm": 0.2156241238117218, "learning_rate": 1e-05, "loss": 0.996, "step": 93800 }, { "epoch": 83.08680248007086, "grad_norm": 0.24705450236797333, "learning_rate": 1e-05, "loss": 0.9502, "step": 93805 }, { "epoch": 83.09123117803365, "grad_norm": 0.253366619348526, "learning_rate": 1e-05, "loss": 1.0136, "step": 93810 }, { "epoch": 83.09565987599646, "grad_norm": 0.26688405871391296, "learning_rate": 1e-05, "loss": 0.9434, "step": 93815 }, { "epoch": 83.10008857395925, "grad_norm": 0.24295754730701447, "learning_rate": 1e-05, "loss": 0.9494, "step": 93820 }, { "epoch": 83.10451727192205, "grad_norm": 0.2648477852344513, "learning_rate": 1e-05, "loss": 0.917, "step": 93825 }, { "epoch": 83.10894596988486, "grad_norm": 0.24447417259216309, "learning_rate": 1e-05, "loss": 0.9163, "step": 93830 }, { "epoch": 83.11337466784765, "grad_norm": 0.23296914994716644, "learning_rate": 1e-05, "loss": 0.928, "step": 93835 }, { "epoch": 83.11780336581045, "grad_norm": 0.24273428320884705, "learning_rate": 1e-05, "loss": 0.9528, "step": 93840 }, { "epoch": 83.12223206377325, "grad_norm": 0.24386471509933472, "learning_rate": 1e-05, "loss": 0.9255, "step": 93845 }, { "epoch": 83.12666076173605, "grad_norm": 0.2202557623386383, "learning_rate": 1e-05, "loss": 0.9744, "step": 93850 }, { "epoch": 83.13108945969884, "grad_norm": 0.21652165055274963, "learning_rate": 1e-05, "loss": 0.9819, "step": 93855 }, { "epoch": 83.13551815766165, "grad_norm": 0.2537614405155182, "learning_rate": 1e-05, "loss": 0.9244, "step": 93860 }, { "epoch": 83.13994685562444, "grad_norm": 0.21981799602508545, "learning_rate": 1e-05, "loss": 0.9743, "step": 93865 }, { "epoch": 83.14437555358724, "grad_norm": 0.2640995383262634, "learning_rate": 1e-05, "loss": 0.9378, "step": 93870 }, { "epoch": 83.14880425155005, "grad_norm": 0.2844601571559906, "learning_rate": 1e-05, "loss": 1.002, "step": 93875 }, { "epoch": 83.15323294951284, "grad_norm": 0.2336452305316925, "learning_rate": 1e-05, "loss": 0.9849, "step": 93880 }, { "epoch": 83.15766164747565, "grad_norm": 0.23424705862998962, "learning_rate": 1e-05, "loss": 0.9459, "step": 93885 }, { "epoch": 83.16209034543844, "grad_norm": 0.22237670421600342, "learning_rate": 1e-05, "loss": 0.9559, "step": 93890 }, { "epoch": 83.16651904340124, "grad_norm": 0.256944477558136, "learning_rate": 1e-05, "loss": 0.9718, "step": 93895 }, { "epoch": 83.17094774136405, "grad_norm": 0.27956265211105347, "learning_rate": 1e-05, "loss": 0.9321, "step": 93900 }, { "epoch": 83.17537643932684, "grad_norm": 0.2760850787162781, "learning_rate": 1e-05, "loss": 0.9646, "step": 93905 }, { "epoch": 83.17980513728963, "grad_norm": 0.24100790917873383, "learning_rate": 1e-05, "loss": 0.9661, "step": 93910 }, { "epoch": 83.18423383525244, "grad_norm": 0.2767733335494995, "learning_rate": 1e-05, "loss": 0.9605, "step": 93915 }, { "epoch": 83.18866253321524, "grad_norm": 0.2566892206668854, "learning_rate": 1e-05, "loss": 1.0081, "step": 93920 }, { "epoch": 83.19309123117803, "grad_norm": 0.26374760270118713, "learning_rate": 1e-05, "loss": 0.9895, "step": 93925 }, { "epoch": 83.19751992914084, "grad_norm": 0.25351467728614807, "learning_rate": 1e-05, "loss": 0.9793, "step": 93930 }, { "epoch": 83.20194862710363, "grad_norm": 0.2797525227069855, "learning_rate": 1e-05, "loss": 0.9338, "step": 93935 }, { "epoch": 83.20637732506643, "grad_norm": 0.2216639667749405, "learning_rate": 1e-05, "loss": 0.954, "step": 93940 }, { "epoch": 83.21080602302924, "grad_norm": 0.2510758936405182, "learning_rate": 1e-05, "loss": 0.9177, "step": 93945 }, { "epoch": 83.21523472099203, "grad_norm": 0.2510535717010498, "learning_rate": 1e-05, "loss": 0.9437, "step": 93950 }, { "epoch": 83.21966341895482, "grad_norm": 0.20791025459766388, "learning_rate": 1e-05, "loss": 0.9723, "step": 93955 }, { "epoch": 83.22409211691763, "grad_norm": 0.2299337387084961, "learning_rate": 1e-05, "loss": 0.9697, "step": 93960 }, { "epoch": 83.22852081488043, "grad_norm": 0.24523498117923737, "learning_rate": 1e-05, "loss": 0.9288, "step": 93965 }, { "epoch": 83.23294951284322, "grad_norm": 0.2189374417066574, "learning_rate": 1e-05, "loss": 0.9566, "step": 93970 }, { "epoch": 83.23737821080603, "grad_norm": 0.2211942970752716, "learning_rate": 1e-05, "loss": 0.9416, "step": 93975 }, { "epoch": 83.24180690876882, "grad_norm": 0.27817586064338684, "learning_rate": 1e-05, "loss": 1.001, "step": 93980 }, { "epoch": 83.24623560673162, "grad_norm": 0.24003823101520538, "learning_rate": 1e-05, "loss": 0.9511, "step": 93985 }, { "epoch": 83.25066430469442, "grad_norm": 0.2670159637928009, "learning_rate": 1e-05, "loss": 0.9194, "step": 93990 }, { "epoch": 83.25509300265722, "grad_norm": 0.24465975165367126, "learning_rate": 1e-05, "loss": 0.9963, "step": 93995 }, { "epoch": 83.25952170062001, "grad_norm": 0.23428300023078918, "learning_rate": 1e-05, "loss": 0.9712, "step": 94000 }, { "epoch": 83.26395039858282, "grad_norm": 0.26530906558036804, "learning_rate": 1e-05, "loss": 0.9394, "step": 94005 }, { "epoch": 83.26837909654562, "grad_norm": 0.23970292508602142, "learning_rate": 1e-05, "loss": 0.9998, "step": 94010 }, { "epoch": 83.27280779450841, "grad_norm": 0.23912519216537476, "learning_rate": 1e-05, "loss": 0.937, "step": 94015 }, { "epoch": 83.27723649247122, "grad_norm": 0.22111675143241882, "learning_rate": 1e-05, "loss": 0.9752, "step": 94020 }, { "epoch": 83.28166519043401, "grad_norm": 0.2533017694950104, "learning_rate": 1e-05, "loss": 0.928, "step": 94025 }, { "epoch": 83.2860938883968, "grad_norm": 0.2356625348329544, "learning_rate": 1e-05, "loss": 1.0055, "step": 94030 }, { "epoch": 83.29052258635961, "grad_norm": 0.2418094277381897, "learning_rate": 1e-05, "loss": 0.9309, "step": 94035 }, { "epoch": 83.29495128432241, "grad_norm": 0.22982174158096313, "learning_rate": 1e-05, "loss": 1.0282, "step": 94040 }, { "epoch": 83.2993799822852, "grad_norm": 0.23485109210014343, "learning_rate": 1e-05, "loss": 0.9082, "step": 94045 }, { "epoch": 83.30380868024801, "grad_norm": 0.24822627007961273, "learning_rate": 1e-05, "loss": 0.9743, "step": 94050 }, { "epoch": 83.3082373782108, "grad_norm": 0.22795900702476501, "learning_rate": 1e-05, "loss": 0.9361, "step": 94055 }, { "epoch": 83.3126660761736, "grad_norm": 0.24357326328754425, "learning_rate": 1e-05, "loss": 0.9561, "step": 94060 }, { "epoch": 83.31709477413641, "grad_norm": 0.20118311047554016, "learning_rate": 1e-05, "loss": 0.9789, "step": 94065 }, { "epoch": 83.3215234720992, "grad_norm": 0.23784692585468292, "learning_rate": 1e-05, "loss": 0.9659, "step": 94070 }, { "epoch": 83.325952170062, "grad_norm": 0.2454223781824112, "learning_rate": 1e-05, "loss": 0.9563, "step": 94075 }, { "epoch": 83.3303808680248, "grad_norm": 0.21749012172222137, "learning_rate": 1e-05, "loss": 0.896, "step": 94080 }, { "epoch": 83.3348095659876, "grad_norm": 0.2591759264469147, "learning_rate": 1e-05, "loss": 1.003, "step": 94085 }, { "epoch": 83.33923826395039, "grad_norm": 0.22938373684883118, "learning_rate": 1e-05, "loss": 0.9905, "step": 94090 }, { "epoch": 83.3436669619132, "grad_norm": 0.23867255449295044, "learning_rate": 1e-05, "loss": 0.9295, "step": 94095 }, { "epoch": 83.348095659876, "grad_norm": 0.26197946071624756, "learning_rate": 1e-05, "loss": 0.9188, "step": 94100 }, { "epoch": 83.35252435783879, "grad_norm": 0.19717928767204285, "learning_rate": 1e-05, "loss": 0.98, "step": 94105 }, { "epoch": 83.3569530558016, "grad_norm": 0.243445485830307, "learning_rate": 1e-05, "loss": 0.9966, "step": 94110 }, { "epoch": 83.36138175376439, "grad_norm": 0.2510448694229126, "learning_rate": 1e-05, "loss": 1.0064, "step": 94115 }, { "epoch": 83.36581045172719, "grad_norm": 0.24246296286582947, "learning_rate": 1e-05, "loss": 0.9116, "step": 94120 }, { "epoch": 83.37023914969, "grad_norm": 0.23425616323947906, "learning_rate": 1e-05, "loss": 0.9252, "step": 94125 }, { "epoch": 83.37466784765279, "grad_norm": 0.27216097712516785, "learning_rate": 1e-05, "loss": 0.9675, "step": 94130 }, { "epoch": 83.3790965456156, "grad_norm": 0.22336341440677643, "learning_rate": 1e-05, "loss": 0.9339, "step": 94135 }, { "epoch": 83.38352524357839, "grad_norm": 0.23352289199829102, "learning_rate": 1e-05, "loss": 0.9757, "step": 94140 }, { "epoch": 83.38795394154118, "grad_norm": 0.2487468123435974, "learning_rate": 1e-05, "loss": 0.9449, "step": 94145 }, { "epoch": 83.39238263950399, "grad_norm": 0.23621197044849396, "learning_rate": 1e-05, "loss": 0.9453, "step": 94150 }, { "epoch": 83.39681133746679, "grad_norm": 0.2535061240196228, "learning_rate": 1e-05, "loss": 0.9549, "step": 94155 }, { "epoch": 83.40124003542958, "grad_norm": 0.21085460484027863, "learning_rate": 1e-05, "loss": 0.955, "step": 94160 }, { "epoch": 83.40566873339239, "grad_norm": 0.22095757722854614, "learning_rate": 1e-05, "loss": 0.9444, "step": 94165 }, { "epoch": 83.41009743135518, "grad_norm": 0.23923417925834656, "learning_rate": 1e-05, "loss": 0.9605, "step": 94170 }, { "epoch": 83.41452612931798, "grad_norm": 0.20255383849143982, "learning_rate": 1e-05, "loss": 0.9608, "step": 94175 }, { "epoch": 83.41895482728079, "grad_norm": 0.2439180463552475, "learning_rate": 1e-05, "loss": 0.9317, "step": 94180 }, { "epoch": 83.42338352524358, "grad_norm": 0.21478743851184845, "learning_rate": 1e-05, "loss": 0.9645, "step": 94185 }, { "epoch": 83.42781222320637, "grad_norm": 0.2719917297363281, "learning_rate": 1e-05, "loss": 1.0214, "step": 94190 }, { "epoch": 83.43224092116918, "grad_norm": 0.24778911471366882, "learning_rate": 1e-05, "loss": 0.9526, "step": 94195 }, { "epoch": 83.43666961913198, "grad_norm": 0.23322463035583496, "learning_rate": 1e-05, "loss": 0.909, "step": 94200 }, { "epoch": 83.44109831709477, "grad_norm": 0.2065475881099701, "learning_rate": 1e-05, "loss": 1.0028, "step": 94205 }, { "epoch": 83.44552701505758, "grad_norm": 0.31307050585746765, "learning_rate": 1e-05, "loss": 0.9903, "step": 94210 }, { "epoch": 83.44995571302037, "grad_norm": 0.2713836431503296, "learning_rate": 1e-05, "loss": 0.9735, "step": 94215 }, { "epoch": 83.45438441098317, "grad_norm": 0.2739722430706024, "learning_rate": 1e-05, "loss": 0.9119, "step": 94220 }, { "epoch": 83.45881310894598, "grad_norm": 0.2169104665517807, "learning_rate": 1e-05, "loss": 0.9441, "step": 94225 }, { "epoch": 83.46324180690877, "grad_norm": 0.2187582552433014, "learning_rate": 1e-05, "loss": 0.9513, "step": 94230 }, { "epoch": 83.46767050487156, "grad_norm": 0.2652219235897064, "learning_rate": 1e-05, "loss": 0.959, "step": 94235 }, { "epoch": 83.47209920283437, "grad_norm": 0.2385750412940979, "learning_rate": 1e-05, "loss": 0.9393, "step": 94240 }, { "epoch": 83.47652790079717, "grad_norm": 0.23065853118896484, "learning_rate": 1e-05, "loss": 0.9355, "step": 94245 }, { "epoch": 83.48095659875996, "grad_norm": 0.23643366992473602, "learning_rate": 1e-05, "loss": 0.9636, "step": 94250 }, { "epoch": 83.48538529672277, "grad_norm": 0.21341638267040253, "learning_rate": 1e-05, "loss": 0.9669, "step": 94255 }, { "epoch": 83.48981399468556, "grad_norm": 0.23498867452144623, "learning_rate": 1e-05, "loss": 0.9627, "step": 94260 }, { "epoch": 83.49424269264836, "grad_norm": 0.2745988965034485, "learning_rate": 1e-05, "loss": 0.9552, "step": 94265 }, { "epoch": 83.49867139061116, "grad_norm": 0.23934786021709442, "learning_rate": 1e-05, "loss": 0.9565, "step": 94270 }, { "epoch": 83.50310008857396, "grad_norm": 0.2456146478652954, "learning_rate": 1e-05, "loss": 0.9533, "step": 94275 }, { "epoch": 83.50752878653675, "grad_norm": 0.2496112436056137, "learning_rate": 1e-05, "loss": 0.9893, "step": 94280 }, { "epoch": 83.51195748449956, "grad_norm": 0.23185831308364868, "learning_rate": 1e-05, "loss": 0.9491, "step": 94285 }, { "epoch": 83.51638618246236, "grad_norm": 0.22603899240493774, "learning_rate": 1e-05, "loss": 0.928, "step": 94290 }, { "epoch": 83.52081488042515, "grad_norm": 0.2061179131269455, "learning_rate": 1e-05, "loss": 0.9108, "step": 94295 }, { "epoch": 83.52524357838796, "grad_norm": 0.22852736711502075, "learning_rate": 1e-05, "loss": 0.9457, "step": 94300 }, { "epoch": 83.52967227635075, "grad_norm": 0.25372546911239624, "learning_rate": 1e-05, "loss": 1.0114, "step": 94305 }, { "epoch": 83.53410097431355, "grad_norm": 0.25917112827301025, "learning_rate": 1e-05, "loss": 0.9653, "step": 94310 }, { "epoch": 83.53852967227635, "grad_norm": 0.2512149512767792, "learning_rate": 1e-05, "loss": 1.0308, "step": 94315 }, { "epoch": 83.54295837023915, "grad_norm": 0.256344199180603, "learning_rate": 1e-05, "loss": 0.8993, "step": 94320 }, { "epoch": 83.54738706820194, "grad_norm": 0.23940466344356537, "learning_rate": 1e-05, "loss": 0.9586, "step": 94325 }, { "epoch": 83.55181576616475, "grad_norm": 0.2747702896595001, "learning_rate": 1e-05, "loss": 0.9663, "step": 94330 }, { "epoch": 83.55624446412754, "grad_norm": 0.2620646059513092, "learning_rate": 1e-05, "loss": 1.0089, "step": 94335 }, { "epoch": 83.56067316209034, "grad_norm": 0.24633489549160004, "learning_rate": 1e-05, "loss": 0.9359, "step": 94340 }, { "epoch": 83.56510186005315, "grad_norm": 0.2337767630815506, "learning_rate": 1e-05, "loss": 0.9316, "step": 94345 }, { "epoch": 83.56953055801594, "grad_norm": 0.2599075734615326, "learning_rate": 1e-05, "loss": 0.9184, "step": 94350 }, { "epoch": 83.57395925597874, "grad_norm": 0.2704962491989136, "learning_rate": 1e-05, "loss": 0.9372, "step": 94355 }, { "epoch": 83.57838795394154, "grad_norm": 0.25836795568466187, "learning_rate": 1e-05, "loss": 0.9332, "step": 94360 }, { "epoch": 83.58281665190434, "grad_norm": 0.25076961517333984, "learning_rate": 1e-05, "loss": 1.0115, "step": 94365 }, { "epoch": 83.58724534986715, "grad_norm": 0.2208089828491211, "learning_rate": 1e-05, "loss": 0.9698, "step": 94370 }, { "epoch": 83.59167404782994, "grad_norm": 0.23884716629981995, "learning_rate": 1e-05, "loss": 0.9655, "step": 94375 }, { "epoch": 83.59610274579273, "grad_norm": 0.22609737515449524, "learning_rate": 1e-05, "loss": 0.9924, "step": 94380 }, { "epoch": 83.60053144375554, "grad_norm": 0.2637893557548523, "learning_rate": 1e-05, "loss": 0.9885, "step": 94385 }, { "epoch": 83.60496014171834, "grad_norm": 0.24326424300670624, "learning_rate": 1e-05, "loss": 0.9435, "step": 94390 }, { "epoch": 83.60938883968113, "grad_norm": 0.23057204484939575, "learning_rate": 1e-05, "loss": 1.0339, "step": 94395 }, { "epoch": 83.61381753764394, "grad_norm": 0.23118382692337036, "learning_rate": 1e-05, "loss": 0.973, "step": 94400 }, { "epoch": 83.61824623560673, "grad_norm": 0.2289682924747467, "learning_rate": 1e-05, "loss": 1.02, "step": 94405 }, { "epoch": 83.62267493356953, "grad_norm": 0.21582165360450745, "learning_rate": 1e-05, "loss": 0.9359, "step": 94410 }, { "epoch": 83.62710363153234, "grad_norm": 0.27156198024749756, "learning_rate": 1e-05, "loss": 0.9057, "step": 94415 }, { "epoch": 83.63153232949513, "grad_norm": 0.23715059459209442, "learning_rate": 1e-05, "loss": 0.9754, "step": 94420 }, { "epoch": 83.63596102745792, "grad_norm": 0.24554996192455292, "learning_rate": 1e-05, "loss": 0.9719, "step": 94425 }, { "epoch": 83.64038972542073, "grad_norm": 0.19621340930461884, "learning_rate": 1e-05, "loss": 0.9038, "step": 94430 }, { "epoch": 83.64481842338353, "grad_norm": 0.22292792797088623, "learning_rate": 1e-05, "loss": 0.9693, "step": 94435 }, { "epoch": 83.64924712134632, "grad_norm": 0.21607856452465057, "learning_rate": 1e-05, "loss": 1.0021, "step": 94440 }, { "epoch": 83.65367581930913, "grad_norm": 0.24264638125896454, "learning_rate": 1e-05, "loss": 0.8949, "step": 94445 }, { "epoch": 83.65810451727192, "grad_norm": 0.2808072865009308, "learning_rate": 1e-05, "loss": 0.9903, "step": 94450 }, { "epoch": 83.66253321523472, "grad_norm": 0.2280391901731491, "learning_rate": 1e-05, "loss": 0.9436, "step": 94455 }, { "epoch": 83.66696191319753, "grad_norm": 0.2014925479888916, "learning_rate": 1e-05, "loss": 0.9296, "step": 94460 }, { "epoch": 83.67139061116032, "grad_norm": 0.26185154914855957, "learning_rate": 1e-05, "loss": 0.9527, "step": 94465 }, { "epoch": 83.67581930912311, "grad_norm": 0.29537293314933777, "learning_rate": 1e-05, "loss": 0.9701, "step": 94470 }, { "epoch": 83.68024800708592, "grad_norm": 0.2430686205625534, "learning_rate": 1e-05, "loss": 0.9215, "step": 94475 }, { "epoch": 83.68467670504872, "grad_norm": 0.25594595074653625, "learning_rate": 1e-05, "loss": 0.9331, "step": 94480 }, { "epoch": 83.68910540301151, "grad_norm": 0.28057706356048584, "learning_rate": 1e-05, "loss": 0.9761, "step": 94485 }, { "epoch": 83.69353410097432, "grad_norm": 0.26596999168395996, "learning_rate": 1e-05, "loss": 0.9961, "step": 94490 }, { "epoch": 83.69796279893711, "grad_norm": 0.21645207703113556, "learning_rate": 1e-05, "loss": 0.9324, "step": 94495 }, { "epoch": 83.7023914968999, "grad_norm": 0.2167506068944931, "learning_rate": 1e-05, "loss": 1.0098, "step": 94500 }, { "epoch": 83.70682019486271, "grad_norm": 0.26335132122039795, "learning_rate": 1e-05, "loss": 0.9415, "step": 94505 }, { "epoch": 83.71124889282551, "grad_norm": 0.24410569667816162, "learning_rate": 1e-05, "loss": 0.9318, "step": 94510 }, { "epoch": 83.7156775907883, "grad_norm": 0.27268674969673157, "learning_rate": 1e-05, "loss": 0.9953, "step": 94515 }, { "epoch": 83.72010628875111, "grad_norm": 0.26459160447120667, "learning_rate": 1e-05, "loss": 0.9528, "step": 94520 }, { "epoch": 83.7245349867139, "grad_norm": 0.26562801003456116, "learning_rate": 1e-05, "loss": 0.9374, "step": 94525 }, { "epoch": 83.7289636846767, "grad_norm": 0.22361956536769867, "learning_rate": 1e-05, "loss": 0.948, "step": 94530 }, { "epoch": 83.73339238263951, "grad_norm": 0.2498777210712433, "learning_rate": 1e-05, "loss": 1.0177, "step": 94535 }, { "epoch": 83.7378210806023, "grad_norm": 0.23832964897155762, "learning_rate": 1e-05, "loss": 0.9412, "step": 94540 }, { "epoch": 83.7422497785651, "grad_norm": 0.22808273136615753, "learning_rate": 1e-05, "loss": 0.9376, "step": 94545 }, { "epoch": 83.7466784765279, "grad_norm": 0.22089675068855286, "learning_rate": 1e-05, "loss": 0.9192, "step": 94550 }, { "epoch": 83.7511071744907, "grad_norm": 0.21950238943099976, "learning_rate": 1e-05, "loss": 0.9257, "step": 94555 }, { "epoch": 83.75553587245349, "grad_norm": 0.23859283328056335, "learning_rate": 1e-05, "loss": 0.9569, "step": 94560 }, { "epoch": 83.7599645704163, "grad_norm": 0.26677677035331726, "learning_rate": 1e-05, "loss": 0.9795, "step": 94565 }, { "epoch": 83.7643932683791, "grad_norm": 0.2582817077636719, "learning_rate": 1e-05, "loss": 1.0173, "step": 94570 }, { "epoch": 83.76882196634189, "grad_norm": 0.2499431073665619, "learning_rate": 1e-05, "loss": 0.9707, "step": 94575 }, { "epoch": 83.7732506643047, "grad_norm": 0.26266103982925415, "learning_rate": 1e-05, "loss": 0.9458, "step": 94580 }, { "epoch": 83.77767936226749, "grad_norm": 0.2051333785057068, "learning_rate": 1e-05, "loss": 0.9545, "step": 94585 }, { "epoch": 83.78210806023029, "grad_norm": 0.25427547097206116, "learning_rate": 1e-05, "loss": 0.9983, "step": 94590 }, { "epoch": 83.7865367581931, "grad_norm": 0.2582823634147644, "learning_rate": 1e-05, "loss": 0.9499, "step": 94595 }, { "epoch": 83.79096545615589, "grad_norm": 0.2203664630651474, "learning_rate": 1e-05, "loss": 0.9563, "step": 94600 }, { "epoch": 83.79539415411868, "grad_norm": 0.25477251410484314, "learning_rate": 1e-05, "loss": 0.9456, "step": 94605 }, { "epoch": 83.79982285208149, "grad_norm": 0.23443010449409485, "learning_rate": 1e-05, "loss": 0.9871, "step": 94610 }, { "epoch": 83.80425155004428, "grad_norm": 0.2462361454963684, "learning_rate": 1e-05, "loss": 0.9906, "step": 94615 }, { "epoch": 83.8086802480071, "grad_norm": 0.2674030065536499, "learning_rate": 1e-05, "loss": 0.9446, "step": 94620 }, { "epoch": 83.81310894596989, "grad_norm": 0.28179067373275757, "learning_rate": 1e-05, "loss": 0.8903, "step": 94625 }, { "epoch": 83.81753764393268, "grad_norm": 0.2698670029640198, "learning_rate": 1e-05, "loss": 0.9713, "step": 94630 }, { "epoch": 83.82196634189549, "grad_norm": 0.2474033683538437, "learning_rate": 1e-05, "loss": 0.9814, "step": 94635 }, { "epoch": 83.82639503985828, "grad_norm": 0.2378651648759842, "learning_rate": 1e-05, "loss": 1.0021, "step": 94640 }, { "epoch": 83.83082373782108, "grad_norm": 0.23646245896816254, "learning_rate": 1e-05, "loss": 0.8945, "step": 94645 }, { "epoch": 83.83525243578389, "grad_norm": 0.2506885230541229, "learning_rate": 1e-05, "loss": 0.9129, "step": 94650 }, { "epoch": 83.83968113374668, "grad_norm": 0.21488212049007416, "learning_rate": 1e-05, "loss": 0.9575, "step": 94655 }, { "epoch": 83.84410983170947, "grad_norm": 0.25113147497177124, "learning_rate": 1e-05, "loss": 0.9671, "step": 94660 }, { "epoch": 83.84853852967228, "grad_norm": 0.24635741114616394, "learning_rate": 1e-05, "loss": 0.9819, "step": 94665 }, { "epoch": 83.85296722763508, "grad_norm": 0.21752731502056122, "learning_rate": 1e-05, "loss": 0.9149, "step": 94670 }, { "epoch": 83.85739592559787, "grad_norm": 0.24392277002334595, "learning_rate": 1e-05, "loss": 0.9282, "step": 94675 }, { "epoch": 83.86182462356068, "grad_norm": 0.22871124744415283, "learning_rate": 1e-05, "loss": 0.9968, "step": 94680 }, { "epoch": 83.86625332152347, "grad_norm": 0.23201842606067657, "learning_rate": 1e-05, "loss": 0.9356, "step": 94685 }, { "epoch": 83.87068201948627, "grad_norm": 0.21385346353054047, "learning_rate": 1e-05, "loss": 0.9459, "step": 94690 }, { "epoch": 83.87511071744908, "grad_norm": 0.22885695099830627, "learning_rate": 1e-05, "loss": 0.9859, "step": 94695 }, { "epoch": 83.87953941541187, "grad_norm": 0.25592973828315735, "learning_rate": 1e-05, "loss": 0.9357, "step": 94700 }, { "epoch": 83.88396811337466, "grad_norm": 0.22758303582668304, "learning_rate": 1e-05, "loss": 0.9466, "step": 94705 }, { "epoch": 83.88839681133747, "grad_norm": 0.23584698140621185, "learning_rate": 1e-05, "loss": 0.9867, "step": 94710 }, { "epoch": 83.89282550930027, "grad_norm": 0.24014510214328766, "learning_rate": 1e-05, "loss": 0.9604, "step": 94715 }, { "epoch": 83.89725420726306, "grad_norm": 0.2617188096046448, "learning_rate": 1e-05, "loss": 0.9979, "step": 94720 }, { "epoch": 83.90168290522587, "grad_norm": 0.27377843856811523, "learning_rate": 1e-05, "loss": 0.9372, "step": 94725 }, { "epoch": 83.90611160318866, "grad_norm": 0.2842259407043457, "learning_rate": 1e-05, "loss": 0.9074, "step": 94730 }, { "epoch": 83.91054030115146, "grad_norm": 0.23000213503837585, "learning_rate": 1e-05, "loss": 0.9647, "step": 94735 }, { "epoch": 83.91496899911427, "grad_norm": 0.2568821609020233, "learning_rate": 1e-05, "loss": 0.9707, "step": 94740 }, { "epoch": 83.91939769707706, "grad_norm": 0.2215203195810318, "learning_rate": 1e-05, "loss": 0.9633, "step": 94745 }, { "epoch": 83.92382639503985, "grad_norm": 0.2594284415245056, "learning_rate": 1e-05, "loss": 0.9153, "step": 94750 }, { "epoch": 83.92825509300266, "grad_norm": 0.23628057539463043, "learning_rate": 1e-05, "loss": 0.9701, "step": 94755 }, { "epoch": 83.93268379096546, "grad_norm": 0.24657033383846283, "learning_rate": 1e-05, "loss": 0.9786, "step": 94760 }, { "epoch": 83.93711248892825, "grad_norm": 0.21617963910102844, "learning_rate": 1e-05, "loss": 0.9157, "step": 94765 }, { "epoch": 83.94154118689106, "grad_norm": 0.23649059236049652, "learning_rate": 1e-05, "loss": 0.9187, "step": 94770 }, { "epoch": 83.94596988485385, "grad_norm": 0.2079278528690338, "learning_rate": 1e-05, "loss": 0.9972, "step": 94775 }, { "epoch": 83.95039858281665, "grad_norm": 0.2660117745399475, "learning_rate": 1e-05, "loss": 0.8715, "step": 94780 }, { "epoch": 83.95482728077945, "grad_norm": 0.23003382980823517, "learning_rate": 1e-05, "loss": 0.9752, "step": 94785 }, { "epoch": 83.95925597874225, "grad_norm": 0.24406704306602478, "learning_rate": 1e-05, "loss": 1.0089, "step": 94790 }, { "epoch": 83.96368467670504, "grad_norm": 0.2565998435020447, "learning_rate": 1e-05, "loss": 0.9286, "step": 94795 }, { "epoch": 83.96811337466785, "grad_norm": 0.2575805187225342, "learning_rate": 1e-05, "loss": 1.011, "step": 94800 }, { "epoch": 83.97254207263065, "grad_norm": 0.23764467239379883, "learning_rate": 1e-05, "loss": 1.0227, "step": 94805 }, { "epoch": 83.97697077059344, "grad_norm": 0.23078009486198425, "learning_rate": 1e-05, "loss": 0.9388, "step": 94810 }, { "epoch": 83.98139946855625, "grad_norm": 0.21822692453861237, "learning_rate": 1e-05, "loss": 0.9772, "step": 94815 }, { "epoch": 83.98582816651904, "grad_norm": 0.2532300651073456, "learning_rate": 1e-05, "loss": 0.9957, "step": 94820 }, { "epoch": 83.99025686448184, "grad_norm": 0.30720821022987366, "learning_rate": 1e-05, "loss": 0.9816, "step": 94825 }, { "epoch": 83.99468556244464, "grad_norm": 0.22327812016010284, "learning_rate": 1e-05, "loss": 0.9555, "step": 94830 }, { "epoch": 83.99911426040744, "grad_norm": 0.23577070236206055, "learning_rate": 1e-05, "loss": 0.9792, "step": 94835 }, { "epoch": 84.00354295837023, "grad_norm": 0.2950497567653656, "learning_rate": 1e-05, "loss": 0.9699, "step": 94840 }, { "epoch": 84.00797165633304, "grad_norm": 0.2588575780391693, "learning_rate": 1e-05, "loss": 0.9354, "step": 94845 }, { "epoch": 84.01240035429583, "grad_norm": 0.28334105014801025, "learning_rate": 1e-05, "loss": 0.9749, "step": 94850 }, { "epoch": 84.01682905225863, "grad_norm": 0.24211524426937103, "learning_rate": 1e-05, "loss": 1.0016, "step": 94855 }, { "epoch": 84.02125775022144, "grad_norm": 0.24798712134361267, "learning_rate": 1e-05, "loss": 0.9654, "step": 94860 }, { "epoch": 84.02568644818423, "grad_norm": 0.24365146458148956, "learning_rate": 1e-05, "loss": 0.9968, "step": 94865 }, { "epoch": 84.03011514614704, "grad_norm": 0.2494594305753708, "learning_rate": 1e-05, "loss": 0.9842, "step": 94870 }, { "epoch": 84.03454384410983, "grad_norm": 0.2192710041999817, "learning_rate": 1e-05, "loss": 0.9408, "step": 94875 }, { "epoch": 84.03897254207263, "grad_norm": 0.2324598878622055, "learning_rate": 1e-05, "loss": 0.9257, "step": 94880 }, { "epoch": 84.04340124003544, "grad_norm": 0.27968695759773254, "learning_rate": 1e-05, "loss": 0.945, "step": 94885 }, { "epoch": 84.04782993799823, "grad_norm": 0.22181417047977448, "learning_rate": 1e-05, "loss": 0.9689, "step": 94890 }, { "epoch": 84.05225863596102, "grad_norm": 0.24373731017112732, "learning_rate": 1e-05, "loss": 0.9991, "step": 94895 }, { "epoch": 84.05668733392383, "grad_norm": 0.2466449737548828, "learning_rate": 1e-05, "loss": 1.0116, "step": 94900 }, { "epoch": 84.06111603188663, "grad_norm": 0.2175285816192627, "learning_rate": 1e-05, "loss": 0.9974, "step": 94905 }, { "epoch": 84.06554472984942, "grad_norm": 0.2768596410751343, "learning_rate": 1e-05, "loss": 0.956, "step": 94910 }, { "epoch": 84.06997342781223, "grad_norm": 0.25873762369155884, "learning_rate": 1e-05, "loss": 0.9565, "step": 94915 }, { "epoch": 84.07440212577502, "grad_norm": 0.2660638391971588, "learning_rate": 1e-05, "loss": 0.9648, "step": 94920 }, { "epoch": 84.07883082373782, "grad_norm": 0.22319315373897552, "learning_rate": 1e-05, "loss": 0.9054, "step": 94925 }, { "epoch": 84.08325952170063, "grad_norm": 0.21766650676727295, "learning_rate": 1e-05, "loss": 0.9298, "step": 94930 }, { "epoch": 84.08768821966342, "grad_norm": 0.2293868064880371, "learning_rate": 1e-05, "loss": 0.9356, "step": 94935 }, { "epoch": 84.09211691762621, "grad_norm": 0.27747952938079834, "learning_rate": 1e-05, "loss": 0.9053, "step": 94940 }, { "epoch": 84.09654561558902, "grad_norm": 0.23462530970573425, "learning_rate": 1e-05, "loss": 0.9712, "step": 94945 }, { "epoch": 84.10097431355182, "grad_norm": 0.2313680350780487, "learning_rate": 1e-05, "loss": 0.9985, "step": 94950 }, { "epoch": 84.10540301151461, "grad_norm": 0.21589159965515137, "learning_rate": 1e-05, "loss": 0.9544, "step": 94955 }, { "epoch": 84.10983170947742, "grad_norm": 0.2361680269241333, "learning_rate": 1e-05, "loss": 0.9753, "step": 94960 }, { "epoch": 84.11426040744021, "grad_norm": 0.26228246092796326, "learning_rate": 1e-05, "loss": 0.9442, "step": 94965 }, { "epoch": 84.118689105403, "grad_norm": 0.21194227039813995, "learning_rate": 1e-05, "loss": 0.968, "step": 94970 }, { "epoch": 84.12311780336582, "grad_norm": 0.241623193025589, "learning_rate": 1e-05, "loss": 0.9535, "step": 94975 }, { "epoch": 84.12754650132861, "grad_norm": 0.2385462522506714, "learning_rate": 1e-05, "loss": 0.907, "step": 94980 }, { "epoch": 84.1319751992914, "grad_norm": 0.22319334745407104, "learning_rate": 1e-05, "loss": 0.9687, "step": 94985 }, { "epoch": 84.13640389725421, "grad_norm": 0.21094316244125366, "learning_rate": 1e-05, "loss": 0.9784, "step": 94990 }, { "epoch": 84.140832595217, "grad_norm": 0.21485401690006256, "learning_rate": 1e-05, "loss": 0.9352, "step": 94995 }, { "epoch": 84.1452612931798, "grad_norm": 0.22340507805347443, "learning_rate": 1e-05, "loss": 0.9683, "step": 95000 }, { "epoch": 84.14968999114261, "grad_norm": 0.22193661332130432, "learning_rate": 1e-05, "loss": 0.9467, "step": 95005 }, { "epoch": 84.1541186891054, "grad_norm": 0.191301167011261, "learning_rate": 1e-05, "loss": 1.0176, "step": 95010 }, { "epoch": 84.1585473870682, "grad_norm": 0.25105011463165283, "learning_rate": 1e-05, "loss": 0.9846, "step": 95015 }, { "epoch": 84.162976085031, "grad_norm": 0.2406686395406723, "learning_rate": 1e-05, "loss": 0.9763, "step": 95020 }, { "epoch": 84.1674047829938, "grad_norm": 0.245299831032753, "learning_rate": 1e-05, "loss": 0.8933, "step": 95025 }, { "epoch": 84.1718334809566, "grad_norm": 0.22139288485050201, "learning_rate": 1e-05, "loss": 0.9896, "step": 95030 }, { "epoch": 84.1762621789194, "grad_norm": 0.2491191178560257, "learning_rate": 1e-05, "loss": 0.986, "step": 95035 }, { "epoch": 84.1806908768822, "grad_norm": 0.2570236623287201, "learning_rate": 1e-05, "loss": 0.9506, "step": 95040 }, { "epoch": 84.18511957484499, "grad_norm": 0.22704187035560608, "learning_rate": 1e-05, "loss": 0.9924, "step": 95045 }, { "epoch": 84.1895482728078, "grad_norm": 0.2254067063331604, "learning_rate": 1e-05, "loss": 0.9356, "step": 95050 }, { "epoch": 84.19397697077059, "grad_norm": 0.22757205367088318, "learning_rate": 1e-05, "loss": 0.9534, "step": 95055 }, { "epoch": 84.19840566873339, "grad_norm": 0.22878535091876984, "learning_rate": 1e-05, "loss": 0.9237, "step": 95060 }, { "epoch": 84.2028343666962, "grad_norm": 0.24961993098258972, "learning_rate": 1e-05, "loss": 0.9876, "step": 95065 }, { "epoch": 84.20726306465899, "grad_norm": 0.2246078997850418, "learning_rate": 1e-05, "loss": 0.9265, "step": 95070 }, { "epoch": 84.21169176262178, "grad_norm": 0.2078251987695694, "learning_rate": 1e-05, "loss": 0.9611, "step": 95075 }, { "epoch": 84.21612046058459, "grad_norm": 0.2782115936279297, "learning_rate": 1e-05, "loss": 0.9857, "step": 95080 }, { "epoch": 84.22054915854739, "grad_norm": 0.24469760060310364, "learning_rate": 1e-05, "loss": 0.9434, "step": 95085 }, { "epoch": 84.22497785651018, "grad_norm": 0.26080989837646484, "learning_rate": 1e-05, "loss": 0.9833, "step": 95090 }, { "epoch": 84.22940655447299, "grad_norm": 0.2590102553367615, "learning_rate": 1e-05, "loss": 0.9934, "step": 95095 }, { "epoch": 84.23383525243578, "grad_norm": 0.18541674315929413, "learning_rate": 1e-05, "loss": 0.9878, "step": 95100 }, { "epoch": 84.23826395039858, "grad_norm": 0.20707251131534576, "learning_rate": 1e-05, "loss": 0.9452, "step": 95105 }, { "epoch": 84.24269264836138, "grad_norm": 0.24196644127368927, "learning_rate": 1e-05, "loss": 1.0068, "step": 95110 }, { "epoch": 84.24712134632418, "grad_norm": 0.2021612972021103, "learning_rate": 1e-05, "loss": 0.9926, "step": 95115 }, { "epoch": 84.25155004428699, "grad_norm": 0.2330324798822403, "learning_rate": 1e-05, "loss": 0.9907, "step": 95120 }, { "epoch": 84.25597874224978, "grad_norm": 0.2529261112213135, "learning_rate": 1e-05, "loss": 0.925, "step": 95125 }, { "epoch": 84.26040744021257, "grad_norm": 0.24427860975265503, "learning_rate": 1e-05, "loss": 0.9309, "step": 95130 }, { "epoch": 84.26483613817538, "grad_norm": 0.21987900137901306, "learning_rate": 1e-05, "loss": 0.9468, "step": 95135 }, { "epoch": 84.26926483613818, "grad_norm": 0.25686851143836975, "learning_rate": 1e-05, "loss": 0.8946, "step": 95140 }, { "epoch": 84.27369353410097, "grad_norm": 0.22783900797367096, "learning_rate": 1e-05, "loss": 0.9842, "step": 95145 }, { "epoch": 84.27812223206378, "grad_norm": 0.2344811111688614, "learning_rate": 1e-05, "loss": 0.9742, "step": 95150 }, { "epoch": 84.28255093002657, "grad_norm": 0.22524075210094452, "learning_rate": 1e-05, "loss": 0.9416, "step": 95155 }, { "epoch": 84.28697962798937, "grad_norm": 0.24430370330810547, "learning_rate": 1e-05, "loss": 0.9555, "step": 95160 }, { "epoch": 84.29140832595218, "grad_norm": 0.22766785323619843, "learning_rate": 1e-05, "loss": 0.9928, "step": 95165 }, { "epoch": 84.29583702391497, "grad_norm": 0.20383374392986298, "learning_rate": 1e-05, "loss": 0.9826, "step": 95170 }, { "epoch": 84.30026572187776, "grad_norm": 0.2632075250148773, "learning_rate": 1e-05, "loss": 0.9308, "step": 95175 }, { "epoch": 84.30469441984057, "grad_norm": 0.2478143870830536, "learning_rate": 1e-05, "loss": 1.0049, "step": 95180 }, { "epoch": 84.30912311780337, "grad_norm": 0.23032645881175995, "learning_rate": 1e-05, "loss": 0.9572, "step": 95185 }, { "epoch": 84.31355181576616, "grad_norm": 0.2212352305650711, "learning_rate": 1e-05, "loss": 0.9856, "step": 95190 }, { "epoch": 84.31798051372897, "grad_norm": 0.2680801451206207, "learning_rate": 1e-05, "loss": 0.9451, "step": 95195 }, { "epoch": 84.32240921169176, "grad_norm": 0.22448107600212097, "learning_rate": 1e-05, "loss": 0.9339, "step": 95200 }, { "epoch": 84.32683790965456, "grad_norm": 0.24958157539367676, "learning_rate": 1e-05, "loss": 0.9897, "step": 95205 }, { "epoch": 84.33126660761737, "grad_norm": 0.22682936489582062, "learning_rate": 1e-05, "loss": 0.9579, "step": 95210 }, { "epoch": 84.33569530558016, "grad_norm": 0.25163504481315613, "learning_rate": 1e-05, "loss": 0.906, "step": 95215 }, { "epoch": 84.34012400354295, "grad_norm": 0.232279434800148, "learning_rate": 1e-05, "loss": 0.9932, "step": 95220 }, { "epoch": 84.34455270150576, "grad_norm": 0.2613610625267029, "learning_rate": 1e-05, "loss": 0.9765, "step": 95225 }, { "epoch": 84.34898139946856, "grad_norm": 0.26100119948387146, "learning_rate": 1e-05, "loss": 0.9355, "step": 95230 }, { "epoch": 84.35341009743135, "grad_norm": 0.24040307104587555, "learning_rate": 1e-05, "loss": 0.9645, "step": 95235 }, { "epoch": 84.35783879539416, "grad_norm": 0.21100692451000214, "learning_rate": 1e-05, "loss": 0.9415, "step": 95240 }, { "epoch": 84.36226749335695, "grad_norm": 0.2626730501651764, "learning_rate": 1e-05, "loss": 0.9376, "step": 95245 }, { "epoch": 84.36669619131975, "grad_norm": 0.2319963276386261, "learning_rate": 1e-05, "loss": 0.9566, "step": 95250 }, { "epoch": 84.37112488928256, "grad_norm": 0.28629860281944275, "learning_rate": 1e-05, "loss": 0.9371, "step": 95255 }, { "epoch": 84.37555358724535, "grad_norm": 0.23362088203430176, "learning_rate": 1e-05, "loss": 0.9238, "step": 95260 }, { "epoch": 84.37998228520814, "grad_norm": 0.22681240737438202, "learning_rate": 1e-05, "loss": 0.9988, "step": 95265 }, { "epoch": 84.38441098317095, "grad_norm": 0.23317331075668335, "learning_rate": 1e-05, "loss": 0.9432, "step": 95270 }, { "epoch": 84.38883968113375, "grad_norm": 0.27958372235298157, "learning_rate": 1e-05, "loss": 0.9371, "step": 95275 }, { "epoch": 84.39326837909654, "grad_norm": 0.2536323368549347, "learning_rate": 1e-05, "loss": 1.0151, "step": 95280 }, { "epoch": 84.39769707705935, "grad_norm": 0.2526127099990845, "learning_rate": 1e-05, "loss": 0.9188, "step": 95285 }, { "epoch": 84.40212577502214, "grad_norm": 0.2667582035064697, "learning_rate": 1e-05, "loss": 0.9737, "step": 95290 }, { "epoch": 84.40655447298494, "grad_norm": 0.2597736716270447, "learning_rate": 1e-05, "loss": 0.9876, "step": 95295 }, { "epoch": 84.41098317094774, "grad_norm": 0.2338431179523468, "learning_rate": 1e-05, "loss": 0.9424, "step": 95300 }, { "epoch": 84.41541186891054, "grad_norm": 0.25917288661003113, "learning_rate": 1e-05, "loss": 0.9371, "step": 95305 }, { "epoch": 84.41984056687333, "grad_norm": 0.23737460374832153, "learning_rate": 1e-05, "loss": 0.9801, "step": 95310 }, { "epoch": 84.42426926483614, "grad_norm": 0.2870638370513916, "learning_rate": 1e-05, "loss": 0.9917, "step": 95315 }, { "epoch": 84.42869796279894, "grad_norm": 0.21002182364463806, "learning_rate": 1e-05, "loss": 0.9549, "step": 95320 }, { "epoch": 84.43312666076173, "grad_norm": 0.20469854772090912, "learning_rate": 1e-05, "loss": 0.9184, "step": 95325 }, { "epoch": 84.43755535872454, "grad_norm": 0.21518981456756592, "learning_rate": 1e-05, "loss": 0.9482, "step": 95330 }, { "epoch": 84.44198405668733, "grad_norm": 0.20124612748622894, "learning_rate": 1e-05, "loss": 0.9052, "step": 95335 }, { "epoch": 84.44641275465013, "grad_norm": 0.21300576627254486, "learning_rate": 1e-05, "loss": 0.9531, "step": 95340 }, { "epoch": 84.45084145261293, "grad_norm": 0.248631551861763, "learning_rate": 1e-05, "loss": 1.007, "step": 95345 }, { "epoch": 84.45527015057573, "grad_norm": 0.27155691385269165, "learning_rate": 1e-05, "loss": 1.022, "step": 95350 }, { "epoch": 84.45969884853854, "grad_norm": 0.217047780752182, "learning_rate": 1e-05, "loss": 0.9676, "step": 95355 }, { "epoch": 84.46412754650133, "grad_norm": 0.2551300525665283, "learning_rate": 1e-05, "loss": 0.9823, "step": 95360 }, { "epoch": 84.46855624446412, "grad_norm": 0.21841205656528473, "learning_rate": 1e-05, "loss": 0.9445, "step": 95365 }, { "epoch": 84.47298494242693, "grad_norm": 0.22561630606651306, "learning_rate": 1e-05, "loss": 0.9765, "step": 95370 }, { "epoch": 84.47741364038973, "grad_norm": 0.23209480941295624, "learning_rate": 1e-05, "loss": 0.9179, "step": 95375 }, { "epoch": 84.48184233835252, "grad_norm": 0.24146901071071625, "learning_rate": 1e-05, "loss": 0.9746, "step": 95380 }, { "epoch": 84.48627103631533, "grad_norm": 0.23416675627231598, "learning_rate": 1e-05, "loss": 0.9354, "step": 95385 }, { "epoch": 84.49069973427812, "grad_norm": 0.2584945559501648, "learning_rate": 1e-05, "loss": 1.0099, "step": 95390 }, { "epoch": 84.49512843224092, "grad_norm": 0.2838519215583801, "learning_rate": 1e-05, "loss": 1.0032, "step": 95395 }, { "epoch": 84.49955713020373, "grad_norm": 0.23502430319786072, "learning_rate": 1e-05, "loss": 0.9864, "step": 95400 }, { "epoch": 84.50398582816652, "grad_norm": 0.242192804813385, "learning_rate": 1e-05, "loss": 0.9911, "step": 95405 }, { "epoch": 84.50841452612931, "grad_norm": 0.2141350954771042, "learning_rate": 1e-05, "loss": 0.9963, "step": 95410 }, { "epoch": 84.51284322409212, "grad_norm": 0.23970328271389008, "learning_rate": 1e-05, "loss": 1.0014, "step": 95415 }, { "epoch": 84.51727192205492, "grad_norm": 0.25437837839126587, "learning_rate": 1e-05, "loss": 0.9442, "step": 95420 }, { "epoch": 84.52170062001771, "grad_norm": 0.24400384724140167, "learning_rate": 1e-05, "loss": 1.005, "step": 95425 }, { "epoch": 84.52612931798052, "grad_norm": 0.22580833733081818, "learning_rate": 1e-05, "loss": 1.0039, "step": 95430 }, { "epoch": 84.53055801594331, "grad_norm": 0.2961694598197937, "learning_rate": 1e-05, "loss": 0.94, "step": 95435 }, { "epoch": 84.53498671390611, "grad_norm": 0.25675418972969055, "learning_rate": 1e-05, "loss": 0.997, "step": 95440 }, { "epoch": 84.53941541186892, "grad_norm": 0.2608996331691742, "learning_rate": 1e-05, "loss": 0.9534, "step": 95445 }, { "epoch": 84.54384410983171, "grad_norm": 0.2342691272497177, "learning_rate": 1e-05, "loss": 0.9431, "step": 95450 }, { "epoch": 84.5482728077945, "grad_norm": 0.26692405343055725, "learning_rate": 1e-05, "loss": 0.9907, "step": 95455 }, { "epoch": 84.55270150575731, "grad_norm": 0.2560400664806366, "learning_rate": 1e-05, "loss": 0.9066, "step": 95460 }, { "epoch": 84.5571302037201, "grad_norm": 0.19967405498027802, "learning_rate": 1e-05, "loss": 0.9421, "step": 95465 }, { "epoch": 84.5615589016829, "grad_norm": 0.2428506314754486, "learning_rate": 1e-05, "loss": 0.9512, "step": 95470 }, { "epoch": 84.56598759964571, "grad_norm": 0.2687271237373352, "learning_rate": 1e-05, "loss": 0.9595, "step": 95475 }, { "epoch": 84.5704162976085, "grad_norm": 0.27449697256088257, "learning_rate": 1e-05, "loss": 0.9412, "step": 95480 }, { "epoch": 84.5748449955713, "grad_norm": 0.23221999406814575, "learning_rate": 1e-05, "loss": 0.9819, "step": 95485 }, { "epoch": 84.5792736935341, "grad_norm": 0.27246710658073425, "learning_rate": 1e-05, "loss": 0.9728, "step": 95490 }, { "epoch": 84.5837023914969, "grad_norm": 0.2390279322862625, "learning_rate": 1e-05, "loss": 0.9048, "step": 95495 }, { "epoch": 84.5881310894597, "grad_norm": 0.2245926409959793, "learning_rate": 1e-05, "loss": 0.9316, "step": 95500 }, { "epoch": 84.5925597874225, "grad_norm": 0.23420579731464386, "learning_rate": 1e-05, "loss": 0.9333, "step": 95505 }, { "epoch": 84.5969884853853, "grad_norm": 0.2726210653781891, "learning_rate": 1e-05, "loss": 0.93, "step": 95510 }, { "epoch": 84.60141718334809, "grad_norm": 0.2940177321434021, "learning_rate": 1e-05, "loss": 0.9792, "step": 95515 }, { "epoch": 84.6058458813109, "grad_norm": 0.2457304149866104, "learning_rate": 1e-05, "loss": 0.9308, "step": 95520 }, { "epoch": 84.61027457927369, "grad_norm": 0.2744705379009247, "learning_rate": 1e-05, "loss": 0.9796, "step": 95525 }, { "epoch": 84.61470327723649, "grad_norm": 0.29199880361557007, "learning_rate": 1e-05, "loss": 0.9733, "step": 95530 }, { "epoch": 84.6191319751993, "grad_norm": 0.2516320049762726, "learning_rate": 1e-05, "loss": 0.9586, "step": 95535 }, { "epoch": 84.62356067316209, "grad_norm": 0.21507993340492249, "learning_rate": 1e-05, "loss": 0.986, "step": 95540 }, { "epoch": 84.62798937112488, "grad_norm": 0.22222648561000824, "learning_rate": 1e-05, "loss": 0.8921, "step": 95545 }, { "epoch": 84.63241806908769, "grad_norm": 0.2148786187171936, "learning_rate": 1e-05, "loss": 0.9887, "step": 95550 }, { "epoch": 84.63684676705049, "grad_norm": 0.20781093835830688, "learning_rate": 1e-05, "loss": 0.9093, "step": 95555 }, { "epoch": 84.64127546501328, "grad_norm": 0.22792086005210876, "learning_rate": 1e-05, "loss": 0.9632, "step": 95560 }, { "epoch": 84.64570416297609, "grad_norm": 0.23570506274700165, "learning_rate": 1e-05, "loss": 0.9507, "step": 95565 }, { "epoch": 84.65013286093888, "grad_norm": 0.2219143658876419, "learning_rate": 1e-05, "loss": 0.9119, "step": 95570 }, { "epoch": 84.65456155890168, "grad_norm": 0.22343456745147705, "learning_rate": 1e-05, "loss": 0.965, "step": 95575 }, { "epoch": 84.65899025686448, "grad_norm": 0.2881627678871155, "learning_rate": 1e-05, "loss": 0.9725, "step": 95580 }, { "epoch": 84.66341895482728, "grad_norm": 0.2373683899641037, "learning_rate": 1e-05, "loss": 0.968, "step": 95585 }, { "epoch": 84.66784765279007, "grad_norm": 0.24828515946865082, "learning_rate": 1e-05, "loss": 0.9673, "step": 95590 }, { "epoch": 84.67227635075288, "grad_norm": 0.2688041925430298, "learning_rate": 1e-05, "loss": 0.9231, "step": 95595 }, { "epoch": 84.67670504871568, "grad_norm": 0.24422608315944672, "learning_rate": 1e-05, "loss": 0.9605, "step": 95600 }, { "epoch": 84.68113374667848, "grad_norm": 0.21485407650470734, "learning_rate": 1e-05, "loss": 0.9314, "step": 95605 }, { "epoch": 84.68556244464128, "grad_norm": 0.2476443201303482, "learning_rate": 1e-05, "loss": 0.9179, "step": 95610 }, { "epoch": 84.68999114260407, "grad_norm": 0.2540789246559143, "learning_rate": 1e-05, "loss": 0.9867, "step": 95615 }, { "epoch": 84.69441984056688, "grad_norm": 0.24090464413166046, "learning_rate": 1e-05, "loss": 0.9659, "step": 95620 }, { "epoch": 84.69884853852967, "grad_norm": 0.22515863180160522, "learning_rate": 1e-05, "loss": 0.9393, "step": 95625 }, { "epoch": 84.70327723649247, "grad_norm": 0.22733783721923828, "learning_rate": 1e-05, "loss": 0.9718, "step": 95630 }, { "epoch": 84.70770593445528, "grad_norm": 0.23291243612766266, "learning_rate": 1e-05, "loss": 0.9431, "step": 95635 }, { "epoch": 84.71213463241807, "grad_norm": 0.22671736776828766, "learning_rate": 1e-05, "loss": 0.9795, "step": 95640 }, { "epoch": 84.71656333038086, "grad_norm": 0.20079156756401062, "learning_rate": 1e-05, "loss": 0.9626, "step": 95645 }, { "epoch": 84.72099202834367, "grad_norm": 0.2567376494407654, "learning_rate": 1e-05, "loss": 0.9325, "step": 95650 }, { "epoch": 84.72542072630647, "grad_norm": 0.2568581700325012, "learning_rate": 1e-05, "loss": 0.9627, "step": 95655 }, { "epoch": 84.72984942426926, "grad_norm": 0.2080429345369339, "learning_rate": 1e-05, "loss": 0.9275, "step": 95660 }, { "epoch": 84.73427812223207, "grad_norm": 0.2557322680950165, "learning_rate": 1e-05, "loss": 1.0084, "step": 95665 }, { "epoch": 84.73870682019486, "grad_norm": 0.24420638382434845, "learning_rate": 1e-05, "loss": 0.9299, "step": 95670 }, { "epoch": 84.74313551815766, "grad_norm": 0.2776748239994049, "learning_rate": 1e-05, "loss": 0.952, "step": 95675 }, { "epoch": 84.74756421612047, "grad_norm": 0.20128783583641052, "learning_rate": 1e-05, "loss": 0.9282, "step": 95680 }, { "epoch": 84.75199291408326, "grad_norm": 0.21968212723731995, "learning_rate": 1e-05, "loss": 1.0026, "step": 95685 }, { "epoch": 84.75642161204605, "grad_norm": 0.21135146915912628, "learning_rate": 1e-05, "loss": 0.8722, "step": 95690 }, { "epoch": 84.76085031000886, "grad_norm": 0.23909050226211548, "learning_rate": 1e-05, "loss": 0.9348, "step": 95695 }, { "epoch": 84.76527900797166, "grad_norm": 0.2672107219696045, "learning_rate": 1e-05, "loss": 0.9471, "step": 95700 }, { "epoch": 84.76970770593445, "grad_norm": 0.3292006254196167, "learning_rate": 1e-05, "loss": 0.945, "step": 95705 }, { "epoch": 84.77413640389726, "grad_norm": 0.2799969017505646, "learning_rate": 1e-05, "loss": 0.9665, "step": 95710 }, { "epoch": 84.77856510186005, "grad_norm": 0.24608798325061798, "learning_rate": 1e-05, "loss": 0.9136, "step": 95715 }, { "epoch": 84.78299379982285, "grad_norm": 0.2224859744310379, "learning_rate": 1e-05, "loss": 0.9444, "step": 95720 }, { "epoch": 84.78742249778566, "grad_norm": 0.2026747465133667, "learning_rate": 1e-05, "loss": 1.011, "step": 95725 }, { "epoch": 84.79185119574845, "grad_norm": 0.25361397862434387, "learning_rate": 1e-05, "loss": 0.9457, "step": 95730 }, { "epoch": 84.79627989371124, "grad_norm": 0.2583954930305481, "learning_rate": 1e-05, "loss": 0.9458, "step": 95735 }, { "epoch": 84.80070859167405, "grad_norm": 0.23851393163204193, "learning_rate": 1e-05, "loss": 0.9451, "step": 95740 }, { "epoch": 84.80513728963685, "grad_norm": 0.23354066908359528, "learning_rate": 1e-05, "loss": 0.9162, "step": 95745 }, { "epoch": 84.80956598759964, "grad_norm": 0.26972946524620056, "learning_rate": 1e-05, "loss": 0.9165, "step": 95750 }, { "epoch": 84.81399468556245, "grad_norm": 0.226934015750885, "learning_rate": 1e-05, "loss": 0.9466, "step": 95755 }, { "epoch": 84.81842338352524, "grad_norm": 0.2299937605857849, "learning_rate": 1e-05, "loss": 0.9698, "step": 95760 }, { "epoch": 84.82285208148804, "grad_norm": 0.24781367182731628, "learning_rate": 1e-05, "loss": 0.9037, "step": 95765 }, { "epoch": 84.82728077945085, "grad_norm": 0.2466375082731247, "learning_rate": 1e-05, "loss": 0.9361, "step": 95770 }, { "epoch": 84.83170947741364, "grad_norm": 0.2405197024345398, "learning_rate": 1e-05, "loss": 0.9609, "step": 95775 }, { "epoch": 84.83613817537643, "grad_norm": 0.2225402444601059, "learning_rate": 1e-05, "loss": 0.9765, "step": 95780 }, { "epoch": 84.84056687333924, "grad_norm": 0.2650313973426819, "learning_rate": 1e-05, "loss": 0.9541, "step": 95785 }, { "epoch": 84.84499557130204, "grad_norm": 0.2287360280752182, "learning_rate": 1e-05, "loss": 0.9444, "step": 95790 }, { "epoch": 84.84942426926483, "grad_norm": 0.26884597539901733, "learning_rate": 1e-05, "loss": 0.9893, "step": 95795 }, { "epoch": 84.85385296722764, "grad_norm": 0.32081130146980286, "learning_rate": 1e-05, "loss": 0.9366, "step": 95800 }, { "epoch": 84.85828166519043, "grad_norm": 0.22239120304584503, "learning_rate": 1e-05, "loss": 0.9416, "step": 95805 }, { "epoch": 84.86271036315323, "grad_norm": 0.24696296453475952, "learning_rate": 1e-05, "loss": 0.941, "step": 95810 }, { "epoch": 84.86713906111603, "grad_norm": 0.23134391009807587, "learning_rate": 1e-05, "loss": 0.9543, "step": 95815 }, { "epoch": 84.87156775907883, "grad_norm": 0.24801728129386902, "learning_rate": 1e-05, "loss": 0.9442, "step": 95820 }, { "epoch": 84.87599645704162, "grad_norm": 0.2218467891216278, "learning_rate": 1e-05, "loss": 0.9442, "step": 95825 }, { "epoch": 84.88042515500443, "grad_norm": 0.27515849471092224, "learning_rate": 1e-05, "loss": 0.9435, "step": 95830 }, { "epoch": 84.88485385296723, "grad_norm": 0.2518330514431, "learning_rate": 1e-05, "loss": 0.9441, "step": 95835 }, { "epoch": 84.88928255093003, "grad_norm": 0.23716948926448822, "learning_rate": 1e-05, "loss": 0.9552, "step": 95840 }, { "epoch": 84.89371124889283, "grad_norm": 0.2406175583600998, "learning_rate": 1e-05, "loss": 1.0114, "step": 95845 }, { "epoch": 84.89813994685562, "grad_norm": 0.22526101768016815, "learning_rate": 1e-05, "loss": 0.9572, "step": 95850 }, { "epoch": 84.90256864481843, "grad_norm": 0.2963843047618866, "learning_rate": 1e-05, "loss": 0.9831, "step": 95855 }, { "epoch": 84.90699734278122, "grad_norm": 0.2801845967769623, "learning_rate": 1e-05, "loss": 0.9901, "step": 95860 }, { "epoch": 84.91142604074402, "grad_norm": 0.2423602044582367, "learning_rate": 1e-05, "loss": 0.9829, "step": 95865 }, { "epoch": 84.91585473870683, "grad_norm": 0.23360104858875275, "learning_rate": 1e-05, "loss": 0.937, "step": 95870 }, { "epoch": 84.92028343666962, "grad_norm": 0.25409355759620667, "learning_rate": 1e-05, "loss": 0.9812, "step": 95875 }, { "epoch": 84.92471213463241, "grad_norm": 0.23014232516288757, "learning_rate": 1e-05, "loss": 0.9468, "step": 95880 }, { "epoch": 84.92914083259522, "grad_norm": 0.23911939561367035, "learning_rate": 1e-05, "loss": 1.0022, "step": 95885 }, { "epoch": 84.93356953055802, "grad_norm": 0.2504749596118927, "learning_rate": 1e-05, "loss": 0.9321, "step": 95890 }, { "epoch": 84.93799822852081, "grad_norm": 0.29032817482948303, "learning_rate": 1e-05, "loss": 0.9664, "step": 95895 }, { "epoch": 84.94242692648362, "grad_norm": 0.2515168786048889, "learning_rate": 1e-05, "loss": 0.9672, "step": 95900 }, { "epoch": 84.94685562444641, "grad_norm": 0.24183066189289093, "learning_rate": 1e-05, "loss": 0.9545, "step": 95905 }, { "epoch": 84.95128432240921, "grad_norm": 0.23755010962486267, "learning_rate": 1e-05, "loss": 0.9282, "step": 95910 }, { "epoch": 84.95571302037202, "grad_norm": 0.2761787474155426, "learning_rate": 1e-05, "loss": 0.9643, "step": 95915 }, { "epoch": 84.96014171833481, "grad_norm": 0.22239238023757935, "learning_rate": 1e-05, "loss": 0.9665, "step": 95920 }, { "epoch": 84.9645704162976, "grad_norm": 0.23220498859882355, "learning_rate": 1e-05, "loss": 0.9762, "step": 95925 }, { "epoch": 84.96899911426041, "grad_norm": 0.2367704063653946, "learning_rate": 1e-05, "loss": 0.963, "step": 95930 }, { "epoch": 84.9734278122232, "grad_norm": 0.2466348558664322, "learning_rate": 1e-05, "loss": 0.9557, "step": 95935 }, { "epoch": 84.977856510186, "grad_norm": 0.26395756006240845, "learning_rate": 1e-05, "loss": 0.9433, "step": 95940 }, { "epoch": 84.98228520814881, "grad_norm": 0.250715047121048, "learning_rate": 1e-05, "loss": 0.9223, "step": 95945 }, { "epoch": 84.9867139061116, "grad_norm": 0.24061135947704315, "learning_rate": 1e-05, "loss": 0.9876, "step": 95950 }, { "epoch": 84.9911426040744, "grad_norm": 0.21292370557785034, "learning_rate": 1e-05, "loss": 0.9701, "step": 95955 }, { "epoch": 84.9955713020372, "grad_norm": 0.21280573308467865, "learning_rate": 1e-05, "loss": 0.9301, "step": 95960 }, { "epoch": 85.0, "grad_norm": 0.2240002304315567, "learning_rate": 1e-05, "loss": 0.8722, "step": 95965 }, { "epoch": 85.0044286979628, "grad_norm": 0.2056228518486023, "learning_rate": 1e-05, "loss": 0.9105, "step": 95970 }, { "epoch": 85.0088573959256, "grad_norm": 0.22989049553871155, "learning_rate": 1e-05, "loss": 0.9675, "step": 95975 }, { "epoch": 85.0132860938884, "grad_norm": 0.2428256869316101, "learning_rate": 1e-05, "loss": 0.9474, "step": 95980 }, { "epoch": 85.01771479185119, "grad_norm": 0.2553693652153015, "learning_rate": 1e-05, "loss": 0.9109, "step": 95985 }, { "epoch": 85.022143489814, "grad_norm": 0.24397438764572144, "learning_rate": 1e-05, "loss": 0.9185, "step": 95990 }, { "epoch": 85.0265721877768, "grad_norm": 0.2233012616634369, "learning_rate": 1e-05, "loss": 0.9426, "step": 95995 }, { "epoch": 85.03100088573959, "grad_norm": 0.2178582102060318, "learning_rate": 1e-05, "loss": 0.9842, "step": 96000 }, { "epoch": 85.0354295837024, "grad_norm": 0.236038476228714, "learning_rate": 1e-05, "loss": 0.9538, "step": 96005 }, { "epoch": 85.03985828166519, "grad_norm": 0.24216914176940918, "learning_rate": 1e-05, "loss": 0.9529, "step": 96010 }, { "epoch": 85.04428697962798, "grad_norm": 0.23506732285022736, "learning_rate": 1e-05, "loss": 0.9674, "step": 96015 }, { "epoch": 85.04871567759079, "grad_norm": 0.222891703248024, "learning_rate": 1e-05, "loss": 0.9664, "step": 96020 }, { "epoch": 85.05314437555359, "grad_norm": 0.20837712287902832, "learning_rate": 1e-05, "loss": 0.9437, "step": 96025 }, { "epoch": 85.05757307351638, "grad_norm": 0.2472689002752304, "learning_rate": 1e-05, "loss": 0.951, "step": 96030 }, { "epoch": 85.06200177147919, "grad_norm": 0.22774197161197662, "learning_rate": 1e-05, "loss": 0.9547, "step": 96035 }, { "epoch": 85.06643046944198, "grad_norm": 0.2266194075345993, "learning_rate": 1e-05, "loss": 0.9433, "step": 96040 }, { "epoch": 85.07085916740478, "grad_norm": 0.2301270216703415, "learning_rate": 1e-05, "loss": 0.9626, "step": 96045 }, { "epoch": 85.07528786536759, "grad_norm": 0.2130812257528305, "learning_rate": 1e-05, "loss": 0.9781, "step": 96050 }, { "epoch": 85.07971656333038, "grad_norm": 0.23036104440689087, "learning_rate": 1e-05, "loss": 0.9318, "step": 96055 }, { "epoch": 85.08414526129317, "grad_norm": 0.25373438000679016, "learning_rate": 1e-05, "loss": 0.9651, "step": 96060 }, { "epoch": 85.08857395925598, "grad_norm": 0.24780772626399994, "learning_rate": 1e-05, "loss": 0.9477, "step": 96065 }, { "epoch": 85.09300265721878, "grad_norm": 0.21206624805927277, "learning_rate": 1e-05, "loss": 0.9523, "step": 96070 }, { "epoch": 85.09743135518157, "grad_norm": 0.24895332753658295, "learning_rate": 1e-05, "loss": 0.9386, "step": 96075 }, { "epoch": 85.10186005314438, "grad_norm": 0.2327793836593628, "learning_rate": 1e-05, "loss": 0.9651, "step": 96080 }, { "epoch": 85.10628875110717, "grad_norm": 0.24390637874603271, "learning_rate": 1e-05, "loss": 0.9221, "step": 96085 }, { "epoch": 85.11071744906998, "grad_norm": 0.26170188188552856, "learning_rate": 1e-05, "loss": 0.9973, "step": 96090 }, { "epoch": 85.11514614703277, "grad_norm": 0.22147339582443237, "learning_rate": 1e-05, "loss": 0.9316, "step": 96095 }, { "epoch": 85.11957484499557, "grad_norm": 0.23095647990703583, "learning_rate": 1e-05, "loss": 0.9283, "step": 96100 }, { "epoch": 85.12400354295838, "grad_norm": 0.2348744124174118, "learning_rate": 1e-05, "loss": 0.9203, "step": 96105 }, { "epoch": 85.12843224092117, "grad_norm": 0.2612001299858093, "learning_rate": 1e-05, "loss": 0.9737, "step": 96110 }, { "epoch": 85.13286093888397, "grad_norm": 0.23263311386108398, "learning_rate": 1e-05, "loss": 0.9096, "step": 96115 }, { "epoch": 85.13728963684677, "grad_norm": 0.25108346343040466, "learning_rate": 1e-05, "loss": 0.9579, "step": 96120 }, { "epoch": 85.14171833480957, "grad_norm": 0.27531638741493225, "learning_rate": 1e-05, "loss": 1.0082, "step": 96125 }, { "epoch": 85.14614703277236, "grad_norm": 0.23817220330238342, "learning_rate": 1e-05, "loss": 0.9585, "step": 96130 }, { "epoch": 85.15057573073517, "grad_norm": 0.3062964379787445, "learning_rate": 1e-05, "loss": 0.9974, "step": 96135 }, { "epoch": 85.15500442869796, "grad_norm": 0.2517683506011963, "learning_rate": 1e-05, "loss": 0.9519, "step": 96140 }, { "epoch": 85.15943312666076, "grad_norm": 0.2597475051879883, "learning_rate": 1e-05, "loss": 0.9067, "step": 96145 }, { "epoch": 85.16386182462357, "grad_norm": 0.2368151992559433, "learning_rate": 1e-05, "loss": 0.9664, "step": 96150 }, { "epoch": 85.16829052258636, "grad_norm": 0.2859891355037689, "learning_rate": 1e-05, "loss": 0.9785, "step": 96155 }, { "epoch": 85.17271922054915, "grad_norm": 0.25269755721092224, "learning_rate": 1e-05, "loss": 0.9559, "step": 96160 }, { "epoch": 85.17714791851196, "grad_norm": 0.22500145435333252, "learning_rate": 1e-05, "loss": 0.9418, "step": 96165 }, { "epoch": 85.18157661647476, "grad_norm": 0.26541993021965027, "learning_rate": 1e-05, "loss": 1.005, "step": 96170 }, { "epoch": 85.18600531443755, "grad_norm": 0.22606979310512543, "learning_rate": 1e-05, "loss": 0.9579, "step": 96175 }, { "epoch": 85.19043401240036, "grad_norm": 0.30259039998054504, "learning_rate": 1e-05, "loss": 0.9865, "step": 96180 }, { "epoch": 85.19486271036315, "grad_norm": 0.21107599139213562, "learning_rate": 1e-05, "loss": 0.95, "step": 96185 }, { "epoch": 85.19929140832595, "grad_norm": 0.20577754080295563, "learning_rate": 1e-05, "loss": 0.9604, "step": 96190 }, { "epoch": 85.20372010628876, "grad_norm": 0.2331273853778839, "learning_rate": 1e-05, "loss": 0.9429, "step": 96195 }, { "epoch": 85.20814880425155, "grad_norm": 0.25989672541618347, "learning_rate": 1e-05, "loss": 0.9344, "step": 96200 }, { "epoch": 85.21257750221434, "grad_norm": 0.2498919665813446, "learning_rate": 1e-05, "loss": 0.95, "step": 96205 }, { "epoch": 85.21700620017715, "grad_norm": 0.24295833706855774, "learning_rate": 1e-05, "loss": 0.9511, "step": 96210 }, { "epoch": 85.22143489813995, "grad_norm": 0.21256954967975616, "learning_rate": 1e-05, "loss": 0.9457, "step": 96215 }, { "epoch": 85.22586359610274, "grad_norm": 0.22360825538635254, "learning_rate": 1e-05, "loss": 0.9488, "step": 96220 }, { "epoch": 85.23029229406555, "grad_norm": 0.24023771286010742, "learning_rate": 1e-05, "loss": 1.0032, "step": 96225 }, { "epoch": 85.23472099202834, "grad_norm": 0.27968940138816833, "learning_rate": 1e-05, "loss": 0.9869, "step": 96230 }, { "epoch": 85.23914968999114, "grad_norm": 0.26634642481803894, "learning_rate": 1e-05, "loss": 0.9274, "step": 96235 }, { "epoch": 85.24357838795395, "grad_norm": 0.22590051591396332, "learning_rate": 1e-05, "loss": 0.9959, "step": 96240 }, { "epoch": 85.24800708591674, "grad_norm": 0.2178240716457367, "learning_rate": 1e-05, "loss": 0.9728, "step": 96245 }, { "epoch": 85.25243578387953, "grad_norm": 0.22034253180027008, "learning_rate": 1e-05, "loss": 0.9734, "step": 96250 }, { "epoch": 85.25686448184234, "grad_norm": 0.2205260992050171, "learning_rate": 1e-05, "loss": 0.9486, "step": 96255 }, { "epoch": 85.26129317980514, "grad_norm": 0.2659783363342285, "learning_rate": 1e-05, "loss": 0.9359, "step": 96260 }, { "epoch": 85.26572187776793, "grad_norm": 0.2397063970565796, "learning_rate": 1e-05, "loss": 0.9524, "step": 96265 }, { "epoch": 85.27015057573074, "grad_norm": 0.24733999371528625, "learning_rate": 1e-05, "loss": 0.9565, "step": 96270 }, { "epoch": 85.27457927369353, "grad_norm": 0.24266697466373444, "learning_rate": 1e-05, "loss": 1.0102, "step": 96275 }, { "epoch": 85.27900797165633, "grad_norm": 0.25073230266571045, "learning_rate": 1e-05, "loss": 0.9021, "step": 96280 }, { "epoch": 85.28343666961914, "grad_norm": 0.23984003067016602, "learning_rate": 1e-05, "loss": 0.9801, "step": 96285 }, { "epoch": 85.28786536758193, "grad_norm": 0.24378043413162231, "learning_rate": 1e-05, "loss": 0.9947, "step": 96290 }, { "epoch": 85.29229406554472, "grad_norm": 0.21318432688713074, "learning_rate": 1e-05, "loss": 0.948, "step": 96295 }, { "epoch": 85.29672276350753, "grad_norm": 0.23227250576019287, "learning_rate": 1e-05, "loss": 0.9824, "step": 96300 }, { "epoch": 85.30115146147033, "grad_norm": 0.23576156795024872, "learning_rate": 1e-05, "loss": 1.008, "step": 96305 }, { "epoch": 85.30558015943312, "grad_norm": 0.22098015248775482, "learning_rate": 1e-05, "loss": 0.989, "step": 96310 }, { "epoch": 85.31000885739593, "grad_norm": 0.23480024933815002, "learning_rate": 1e-05, "loss": 0.9693, "step": 96315 }, { "epoch": 85.31443755535872, "grad_norm": 0.231546089053154, "learning_rate": 1e-05, "loss": 0.9495, "step": 96320 }, { "epoch": 85.31886625332152, "grad_norm": 0.2620023190975189, "learning_rate": 1e-05, "loss": 1.0364, "step": 96325 }, { "epoch": 85.32329495128432, "grad_norm": 0.2844887673854828, "learning_rate": 1e-05, "loss": 1.0167, "step": 96330 }, { "epoch": 85.32772364924712, "grad_norm": 0.2536701560020447, "learning_rate": 1e-05, "loss": 0.9623, "step": 96335 }, { "epoch": 85.33215234720993, "grad_norm": 0.23318342864513397, "learning_rate": 1e-05, "loss": 0.9578, "step": 96340 }, { "epoch": 85.33658104517272, "grad_norm": 0.26637378334999084, "learning_rate": 1e-05, "loss": 0.9961, "step": 96345 }, { "epoch": 85.34100974313552, "grad_norm": 0.25212958455085754, "learning_rate": 1e-05, "loss": 0.9598, "step": 96350 }, { "epoch": 85.34543844109832, "grad_norm": 0.26395702362060547, "learning_rate": 1e-05, "loss": 0.9608, "step": 96355 }, { "epoch": 85.34986713906112, "grad_norm": 0.29097980260849, "learning_rate": 1e-05, "loss": 0.9555, "step": 96360 }, { "epoch": 85.35429583702391, "grad_norm": 0.22208768129348755, "learning_rate": 1e-05, "loss": 0.9395, "step": 96365 }, { "epoch": 85.35872453498672, "grad_norm": 0.25918999314308167, "learning_rate": 1e-05, "loss": 0.9609, "step": 96370 }, { "epoch": 85.36315323294951, "grad_norm": 0.23392996191978455, "learning_rate": 1e-05, "loss": 0.9653, "step": 96375 }, { "epoch": 85.36758193091231, "grad_norm": 0.24971581995487213, "learning_rate": 1e-05, "loss": 0.9596, "step": 96380 }, { "epoch": 85.37201062887512, "grad_norm": 0.24059641361236572, "learning_rate": 1e-05, "loss": 0.9421, "step": 96385 }, { "epoch": 85.37643932683791, "grad_norm": 0.29464736580848694, "learning_rate": 1e-05, "loss": 0.9971, "step": 96390 }, { "epoch": 85.3808680248007, "grad_norm": 0.30348721146583557, "learning_rate": 1e-05, "loss": 0.9004, "step": 96395 }, { "epoch": 85.38529672276351, "grad_norm": 0.19624000787734985, "learning_rate": 1e-05, "loss": 0.962, "step": 96400 }, { "epoch": 85.38972542072631, "grad_norm": 0.25620174407958984, "learning_rate": 1e-05, "loss": 0.9494, "step": 96405 }, { "epoch": 85.3941541186891, "grad_norm": 0.25557562708854675, "learning_rate": 1e-05, "loss": 0.945, "step": 96410 }, { "epoch": 85.39858281665191, "grad_norm": 0.23023800551891327, "learning_rate": 1e-05, "loss": 0.895, "step": 96415 }, { "epoch": 85.4030115146147, "grad_norm": 0.21636566519737244, "learning_rate": 1e-05, "loss": 0.9394, "step": 96420 }, { "epoch": 85.4074402125775, "grad_norm": 0.26349443197250366, "learning_rate": 1e-05, "loss": 0.965, "step": 96425 }, { "epoch": 85.4118689105403, "grad_norm": 0.21954220533370972, "learning_rate": 1e-05, "loss": 0.9134, "step": 96430 }, { "epoch": 85.4162976085031, "grad_norm": 0.23321829736232758, "learning_rate": 1e-05, "loss": 0.9248, "step": 96435 }, { "epoch": 85.4207263064659, "grad_norm": 0.22055462002754211, "learning_rate": 1e-05, "loss": 0.889, "step": 96440 }, { "epoch": 85.4251550044287, "grad_norm": 0.22671271860599518, "learning_rate": 1e-05, "loss": 0.9501, "step": 96445 }, { "epoch": 85.4295837023915, "grad_norm": 0.24289150536060333, "learning_rate": 1e-05, "loss": 0.9558, "step": 96450 }, { "epoch": 85.43401240035429, "grad_norm": 0.20306511223316193, "learning_rate": 1e-05, "loss": 0.9717, "step": 96455 }, { "epoch": 85.4384410983171, "grad_norm": 0.22065265476703644, "learning_rate": 1e-05, "loss": 0.9431, "step": 96460 }, { "epoch": 85.4428697962799, "grad_norm": 0.24153415858745575, "learning_rate": 1e-05, "loss": 0.9914, "step": 96465 }, { "epoch": 85.44729849424269, "grad_norm": 0.24186073243618011, "learning_rate": 1e-05, "loss": 0.9585, "step": 96470 }, { "epoch": 85.4517271922055, "grad_norm": 0.26280611753463745, "learning_rate": 1e-05, "loss": 0.9481, "step": 96475 }, { "epoch": 85.45615589016829, "grad_norm": 0.2052849531173706, "learning_rate": 1e-05, "loss": 0.9437, "step": 96480 }, { "epoch": 85.46058458813108, "grad_norm": 0.2617191672325134, "learning_rate": 1e-05, "loss": 0.966, "step": 96485 }, { "epoch": 85.46501328609389, "grad_norm": 0.22841627895832062, "learning_rate": 1e-05, "loss": 0.9595, "step": 96490 }, { "epoch": 85.46944198405669, "grad_norm": 0.22078357636928558, "learning_rate": 1e-05, "loss": 0.9802, "step": 96495 }, { "epoch": 85.47387068201948, "grad_norm": 0.18779288232326508, "learning_rate": 1e-05, "loss": 0.9664, "step": 96500 }, { "epoch": 85.47829937998229, "grad_norm": 0.2376197725534439, "learning_rate": 1e-05, "loss": 0.9269, "step": 96505 }, { "epoch": 85.48272807794508, "grad_norm": 0.2139972299337387, "learning_rate": 1e-05, "loss": 0.9672, "step": 96510 }, { "epoch": 85.48715677590788, "grad_norm": 0.23659105598926544, "learning_rate": 1e-05, "loss": 0.9475, "step": 96515 }, { "epoch": 85.49158547387069, "grad_norm": 0.20469188690185547, "learning_rate": 1e-05, "loss": 0.9637, "step": 96520 }, { "epoch": 85.49601417183348, "grad_norm": 0.2353823482990265, "learning_rate": 1e-05, "loss": 0.9727, "step": 96525 }, { "epoch": 85.50044286979627, "grad_norm": 0.21777375042438507, "learning_rate": 1e-05, "loss": 0.958, "step": 96530 }, { "epoch": 85.50487156775908, "grad_norm": 0.29329055547714233, "learning_rate": 1e-05, "loss": 0.9398, "step": 96535 }, { "epoch": 85.50930026572188, "grad_norm": 0.25493407249450684, "learning_rate": 1e-05, "loss": 0.9722, "step": 96540 }, { "epoch": 85.51372896368467, "grad_norm": 0.26193639636039734, "learning_rate": 1e-05, "loss": 0.9879, "step": 96545 }, { "epoch": 85.51815766164748, "grad_norm": 0.22499775886535645, "learning_rate": 1e-05, "loss": 0.9633, "step": 96550 }, { "epoch": 85.52258635961027, "grad_norm": 0.2297782003879547, "learning_rate": 1e-05, "loss": 0.9817, "step": 96555 }, { "epoch": 85.52701505757307, "grad_norm": 0.2217954397201538, "learning_rate": 1e-05, "loss": 0.9229, "step": 96560 }, { "epoch": 85.53144375553588, "grad_norm": 0.20019471645355225, "learning_rate": 1e-05, "loss": 0.9931, "step": 96565 }, { "epoch": 85.53587245349867, "grad_norm": 0.2243320196866989, "learning_rate": 1e-05, "loss": 0.9115, "step": 96570 }, { "epoch": 85.54030115146146, "grad_norm": 0.2591068744659424, "learning_rate": 1e-05, "loss": 0.934, "step": 96575 }, { "epoch": 85.54472984942427, "grad_norm": 0.2729327082633972, "learning_rate": 1e-05, "loss": 1.027, "step": 96580 }, { "epoch": 85.54915854738707, "grad_norm": 0.20644141733646393, "learning_rate": 1e-05, "loss": 0.9455, "step": 96585 }, { "epoch": 85.55358724534987, "grad_norm": 0.24437689781188965, "learning_rate": 1e-05, "loss": 0.9439, "step": 96590 }, { "epoch": 85.55801594331267, "grad_norm": 0.21154636144638062, "learning_rate": 1e-05, "loss": 0.9665, "step": 96595 }, { "epoch": 85.56244464127546, "grad_norm": 0.3021708130836487, "learning_rate": 1e-05, "loss": 0.9676, "step": 96600 }, { "epoch": 85.56687333923827, "grad_norm": 0.24401721358299255, "learning_rate": 1e-05, "loss": 0.9656, "step": 96605 }, { "epoch": 85.57130203720106, "grad_norm": 0.23233330249786377, "learning_rate": 1e-05, "loss": 0.9673, "step": 96610 }, { "epoch": 85.57573073516386, "grad_norm": 0.23136001825332642, "learning_rate": 1e-05, "loss": 0.9115, "step": 96615 }, { "epoch": 85.58015943312667, "grad_norm": 0.22464518249034882, "learning_rate": 1e-05, "loss": 0.9537, "step": 96620 }, { "epoch": 85.58458813108946, "grad_norm": 0.2325783371925354, "learning_rate": 1e-05, "loss": 0.9321, "step": 96625 }, { "epoch": 85.58901682905226, "grad_norm": 0.2508772313594818, "learning_rate": 1e-05, "loss": 0.9514, "step": 96630 }, { "epoch": 85.59344552701506, "grad_norm": 0.24181824922561646, "learning_rate": 1e-05, "loss": 0.9803, "step": 96635 }, { "epoch": 85.59787422497786, "grad_norm": 0.29301080107688904, "learning_rate": 1e-05, "loss": 0.9561, "step": 96640 }, { "epoch": 85.60230292294065, "grad_norm": 0.21493034064769745, "learning_rate": 1e-05, "loss": 1.0019, "step": 96645 }, { "epoch": 85.60673162090346, "grad_norm": 0.24663220345973969, "learning_rate": 1e-05, "loss": 0.9741, "step": 96650 }, { "epoch": 85.61116031886625, "grad_norm": 0.2454022467136383, "learning_rate": 1e-05, "loss": 0.9757, "step": 96655 }, { "epoch": 85.61558901682905, "grad_norm": 0.21560049057006836, "learning_rate": 1e-05, "loss": 0.9478, "step": 96660 }, { "epoch": 85.62001771479186, "grad_norm": 0.23938271403312683, "learning_rate": 1e-05, "loss": 0.9365, "step": 96665 }, { "epoch": 85.62444641275465, "grad_norm": 0.2568552494049072, "learning_rate": 1e-05, "loss": 0.9643, "step": 96670 }, { "epoch": 85.62887511071744, "grad_norm": 0.21122068166732788, "learning_rate": 1e-05, "loss": 0.9696, "step": 96675 }, { "epoch": 85.63330380868025, "grad_norm": 0.2101432830095291, "learning_rate": 1e-05, "loss": 0.9353, "step": 96680 }, { "epoch": 85.63773250664305, "grad_norm": 0.25863781571388245, "learning_rate": 1e-05, "loss": 1.014, "step": 96685 }, { "epoch": 85.64216120460584, "grad_norm": 0.28838083148002625, "learning_rate": 1e-05, "loss": 0.948, "step": 96690 }, { "epoch": 85.64658990256865, "grad_norm": 0.22984611988067627, "learning_rate": 1e-05, "loss": 0.9685, "step": 96695 }, { "epoch": 85.65101860053144, "grad_norm": 0.25642403960227966, "learning_rate": 1e-05, "loss": 0.961, "step": 96700 }, { "epoch": 85.65544729849424, "grad_norm": 0.2519679367542267, "learning_rate": 1e-05, "loss": 0.9533, "step": 96705 }, { "epoch": 85.65987599645705, "grad_norm": 0.23742642998695374, "learning_rate": 1e-05, "loss": 0.9985, "step": 96710 }, { "epoch": 85.66430469441984, "grad_norm": 0.20871853828430176, "learning_rate": 1e-05, "loss": 0.9324, "step": 96715 }, { "epoch": 85.66873339238263, "grad_norm": 0.2628812789916992, "learning_rate": 1e-05, "loss": 0.9581, "step": 96720 }, { "epoch": 85.67316209034544, "grad_norm": 0.2525511682033539, "learning_rate": 1e-05, "loss": 0.9678, "step": 96725 }, { "epoch": 85.67759078830824, "grad_norm": 0.2440093755722046, "learning_rate": 1e-05, "loss": 0.9312, "step": 96730 }, { "epoch": 85.68201948627103, "grad_norm": 0.25228914618492126, "learning_rate": 1e-05, "loss": 0.973, "step": 96735 }, { "epoch": 85.68644818423384, "grad_norm": 0.21076780557632446, "learning_rate": 1e-05, "loss": 0.9996, "step": 96740 }, { "epoch": 85.69087688219663, "grad_norm": 0.21402721107006073, "learning_rate": 1e-05, "loss": 0.9521, "step": 96745 }, { "epoch": 85.69530558015943, "grad_norm": 0.23448315262794495, "learning_rate": 1e-05, "loss": 0.9734, "step": 96750 }, { "epoch": 85.69973427812224, "grad_norm": 0.2593778669834137, "learning_rate": 1e-05, "loss": 0.9907, "step": 96755 }, { "epoch": 85.70416297608503, "grad_norm": 0.20808227360248566, "learning_rate": 1e-05, "loss": 0.9389, "step": 96760 }, { "epoch": 85.70859167404782, "grad_norm": 0.21210364997386932, "learning_rate": 1e-05, "loss": 0.9446, "step": 96765 }, { "epoch": 85.71302037201063, "grad_norm": 0.23420847952365875, "learning_rate": 1e-05, "loss": 0.9033, "step": 96770 }, { "epoch": 85.71744906997343, "grad_norm": 0.2520560026168823, "learning_rate": 1e-05, "loss": 0.9093, "step": 96775 }, { "epoch": 85.72187776793622, "grad_norm": 0.24139678478240967, "learning_rate": 1e-05, "loss": 0.9556, "step": 96780 }, { "epoch": 85.72630646589903, "grad_norm": 0.23454616963863373, "learning_rate": 1e-05, "loss": 0.96, "step": 96785 }, { "epoch": 85.73073516386182, "grad_norm": 0.2702997922897339, "learning_rate": 1e-05, "loss": 0.9376, "step": 96790 }, { "epoch": 85.73516386182462, "grad_norm": 0.2382647842168808, "learning_rate": 1e-05, "loss": 1.019, "step": 96795 }, { "epoch": 85.73959255978743, "grad_norm": 0.24645884335041046, "learning_rate": 1e-05, "loss": 0.9528, "step": 96800 }, { "epoch": 85.74402125775022, "grad_norm": 0.23619645833969116, "learning_rate": 1e-05, "loss": 0.9183, "step": 96805 }, { "epoch": 85.74844995571301, "grad_norm": 0.2531241476535797, "learning_rate": 1e-05, "loss": 0.906, "step": 96810 }, { "epoch": 85.75287865367582, "grad_norm": 0.2335454374551773, "learning_rate": 1e-05, "loss": 0.9561, "step": 96815 }, { "epoch": 85.75730735163862, "grad_norm": 0.2425515502691269, "learning_rate": 1e-05, "loss": 0.9607, "step": 96820 }, { "epoch": 85.76173604960141, "grad_norm": 0.2611462473869324, "learning_rate": 1e-05, "loss": 0.9548, "step": 96825 }, { "epoch": 85.76616474756422, "grad_norm": 0.24901819229125977, "learning_rate": 1e-05, "loss": 0.9268, "step": 96830 }, { "epoch": 85.77059344552701, "grad_norm": 0.2651823163032532, "learning_rate": 1e-05, "loss": 0.9529, "step": 96835 }, { "epoch": 85.77502214348982, "grad_norm": 0.2842780649662018, "learning_rate": 1e-05, "loss": 0.9639, "step": 96840 }, { "epoch": 85.77945084145261, "grad_norm": 0.23318541049957275, "learning_rate": 1e-05, "loss": 0.9472, "step": 96845 }, { "epoch": 85.78387953941541, "grad_norm": 0.2438599020242691, "learning_rate": 1e-05, "loss": 0.9521, "step": 96850 }, { "epoch": 85.78830823737822, "grad_norm": 0.24094738066196442, "learning_rate": 1e-05, "loss": 0.9321, "step": 96855 }, { "epoch": 85.79273693534101, "grad_norm": 0.22020454704761505, "learning_rate": 1e-05, "loss": 0.9158, "step": 96860 }, { "epoch": 85.7971656333038, "grad_norm": 0.30342698097229004, "learning_rate": 1e-05, "loss": 0.9503, "step": 96865 }, { "epoch": 85.80159433126661, "grad_norm": 0.2090945988893509, "learning_rate": 1e-05, "loss": 0.9853, "step": 96870 }, { "epoch": 85.80602302922941, "grad_norm": 0.2712833285331726, "learning_rate": 1e-05, "loss": 0.9099, "step": 96875 }, { "epoch": 85.8104517271922, "grad_norm": 0.2247336506843567, "learning_rate": 1e-05, "loss": 1.0278, "step": 96880 }, { "epoch": 85.81488042515501, "grad_norm": 0.20719477534294128, "learning_rate": 1e-05, "loss": 0.9716, "step": 96885 }, { "epoch": 85.8193091231178, "grad_norm": 0.24392910301685333, "learning_rate": 1e-05, "loss": 0.9825, "step": 96890 }, { "epoch": 85.8237378210806, "grad_norm": 0.20210695266723633, "learning_rate": 1e-05, "loss": 0.9931, "step": 96895 }, { "epoch": 85.8281665190434, "grad_norm": 0.22325649857521057, "learning_rate": 1e-05, "loss": 0.9356, "step": 96900 }, { "epoch": 85.8325952170062, "grad_norm": 0.22377164661884308, "learning_rate": 1e-05, "loss": 0.9588, "step": 96905 }, { "epoch": 85.837023914969, "grad_norm": 0.2175963819026947, "learning_rate": 1e-05, "loss": 0.9793, "step": 96910 }, { "epoch": 85.8414526129318, "grad_norm": 0.23284812271595, "learning_rate": 1e-05, "loss": 0.9192, "step": 96915 }, { "epoch": 85.8458813108946, "grad_norm": 0.2144775688648224, "learning_rate": 1e-05, "loss": 0.9852, "step": 96920 }, { "epoch": 85.85031000885739, "grad_norm": 0.2345629632472992, "learning_rate": 1e-05, "loss": 0.9485, "step": 96925 }, { "epoch": 85.8547387068202, "grad_norm": 0.22753940522670746, "learning_rate": 1e-05, "loss": 0.9694, "step": 96930 }, { "epoch": 85.859167404783, "grad_norm": 0.22238260507583618, "learning_rate": 1e-05, "loss": 0.8913, "step": 96935 }, { "epoch": 85.86359610274579, "grad_norm": 0.2365919053554535, "learning_rate": 1e-05, "loss": 0.9111, "step": 96940 }, { "epoch": 85.8680248007086, "grad_norm": 0.2639627158641815, "learning_rate": 1e-05, "loss": 0.8692, "step": 96945 }, { "epoch": 85.87245349867139, "grad_norm": 0.24952369928359985, "learning_rate": 1e-05, "loss": 0.9649, "step": 96950 }, { "epoch": 85.87688219663418, "grad_norm": 0.2549598217010498, "learning_rate": 1e-05, "loss": 0.9617, "step": 96955 }, { "epoch": 85.881310894597, "grad_norm": 0.2707740068435669, "learning_rate": 1e-05, "loss": 0.9915, "step": 96960 }, { "epoch": 85.88573959255979, "grad_norm": 0.20950205624103546, "learning_rate": 1e-05, "loss": 0.9633, "step": 96965 }, { "epoch": 85.89016829052258, "grad_norm": 0.26541775465011597, "learning_rate": 1e-05, "loss": 0.9625, "step": 96970 }, { "epoch": 85.89459698848539, "grad_norm": 0.22506029903888702, "learning_rate": 1e-05, "loss": 0.8782, "step": 96975 }, { "epoch": 85.89902568644818, "grad_norm": 0.24413637816905975, "learning_rate": 1e-05, "loss": 0.9545, "step": 96980 }, { "epoch": 85.90345438441098, "grad_norm": 0.2097465693950653, "learning_rate": 1e-05, "loss": 0.9399, "step": 96985 }, { "epoch": 85.90788308237379, "grad_norm": 0.21111370623111725, "learning_rate": 1e-05, "loss": 0.9345, "step": 96990 }, { "epoch": 85.91231178033658, "grad_norm": 0.2513578534126282, "learning_rate": 1e-05, "loss": 0.9302, "step": 96995 }, { "epoch": 85.91674047829937, "grad_norm": 0.2248516082763672, "learning_rate": 1e-05, "loss": 0.9956, "step": 97000 }, { "epoch": 85.92116917626218, "grad_norm": 0.22819189727306366, "learning_rate": 1e-05, "loss": 0.9609, "step": 97005 }, { "epoch": 85.92559787422498, "grad_norm": 0.25540363788604736, "learning_rate": 1e-05, "loss": 0.9855, "step": 97010 }, { "epoch": 85.93002657218777, "grad_norm": 0.20727913081645966, "learning_rate": 1e-05, "loss": 0.9276, "step": 97015 }, { "epoch": 85.93445527015058, "grad_norm": 0.20069310069084167, "learning_rate": 1e-05, "loss": 0.9544, "step": 97020 }, { "epoch": 85.93888396811337, "grad_norm": 0.2319866120815277, "learning_rate": 1e-05, "loss": 0.9395, "step": 97025 }, { "epoch": 85.94331266607617, "grad_norm": 0.23785686492919922, "learning_rate": 1e-05, "loss": 0.9658, "step": 97030 }, { "epoch": 85.94774136403898, "grad_norm": 0.26096266508102417, "learning_rate": 1e-05, "loss": 1.0067, "step": 97035 }, { "epoch": 85.95217006200177, "grad_norm": 0.22606976330280304, "learning_rate": 1e-05, "loss": 0.9281, "step": 97040 }, { "epoch": 85.95659875996456, "grad_norm": 0.2725220024585724, "learning_rate": 1e-05, "loss": 0.9574, "step": 97045 }, { "epoch": 85.96102745792737, "grad_norm": 0.2425740361213684, "learning_rate": 1e-05, "loss": 0.9095, "step": 97050 }, { "epoch": 85.96545615589017, "grad_norm": 0.2293911725282669, "learning_rate": 1e-05, "loss": 0.9468, "step": 97055 }, { "epoch": 85.96988485385296, "grad_norm": 0.2514573037624359, "learning_rate": 1e-05, "loss": 0.9822, "step": 97060 }, { "epoch": 85.97431355181577, "grad_norm": 0.24325360357761383, "learning_rate": 1e-05, "loss": 0.9633, "step": 97065 }, { "epoch": 85.97874224977856, "grad_norm": 0.2408817559480667, "learning_rate": 1e-05, "loss": 0.8835, "step": 97070 }, { "epoch": 85.98317094774137, "grad_norm": 0.19104577600955963, "learning_rate": 1e-05, "loss": 0.9023, "step": 97075 }, { "epoch": 85.98759964570417, "grad_norm": 0.2191479653120041, "learning_rate": 1e-05, "loss": 0.9726, "step": 97080 }, { "epoch": 85.99202834366696, "grad_norm": 0.24199625849723816, "learning_rate": 1e-05, "loss": 1.0, "step": 97085 }, { "epoch": 85.99645704162977, "grad_norm": 0.23012059926986694, "learning_rate": 1e-05, "loss": 0.9908, "step": 97090 }, { "epoch": 86.00088573959256, "grad_norm": 0.24371026456356049, "learning_rate": 1e-05, "loss": 0.9571, "step": 97095 }, { "epoch": 86.00531443755536, "grad_norm": 0.2351166009902954, "learning_rate": 1e-05, "loss": 0.9161, "step": 97100 }, { "epoch": 86.00974313551816, "grad_norm": 0.2213047295808792, "learning_rate": 1e-05, "loss": 0.9989, "step": 97105 }, { "epoch": 86.01417183348096, "grad_norm": 0.2363177090883255, "learning_rate": 1e-05, "loss": 1.0112, "step": 97110 }, { "epoch": 86.01860053144375, "grad_norm": 0.2377803921699524, "learning_rate": 1e-05, "loss": 0.9978, "step": 97115 }, { "epoch": 86.02302922940656, "grad_norm": 0.24732117354869843, "learning_rate": 1e-05, "loss": 0.9492, "step": 97120 }, { "epoch": 86.02745792736935, "grad_norm": 0.28680679202079773, "learning_rate": 1e-05, "loss": 0.9339, "step": 97125 }, { "epoch": 86.03188662533215, "grad_norm": 0.2635520398616791, "learning_rate": 1e-05, "loss": 0.9787, "step": 97130 }, { "epoch": 86.03631532329496, "grad_norm": 0.22710517048835754, "learning_rate": 1e-05, "loss": 0.9144, "step": 97135 }, { "epoch": 86.04074402125775, "grad_norm": 0.2916337251663208, "learning_rate": 1e-05, "loss": 0.9394, "step": 97140 }, { "epoch": 86.04517271922055, "grad_norm": 0.20640406012535095, "learning_rate": 1e-05, "loss": 0.9387, "step": 97145 }, { "epoch": 86.04960141718335, "grad_norm": 0.22454360127449036, "learning_rate": 1e-05, "loss": 0.9884, "step": 97150 }, { "epoch": 86.05403011514615, "grad_norm": 0.22561374306678772, "learning_rate": 1e-05, "loss": 0.9334, "step": 97155 }, { "epoch": 86.05845881310894, "grad_norm": 0.2779489755630493, "learning_rate": 1e-05, "loss": 0.921, "step": 97160 }, { "epoch": 86.06288751107175, "grad_norm": 0.23810631036758423, "learning_rate": 1e-05, "loss": 0.9694, "step": 97165 }, { "epoch": 86.06731620903454, "grad_norm": 0.22184278070926666, "learning_rate": 1e-05, "loss": 0.9531, "step": 97170 }, { "epoch": 86.07174490699734, "grad_norm": 0.21872155368328094, "learning_rate": 1e-05, "loss": 0.9571, "step": 97175 }, { "epoch": 86.07617360496015, "grad_norm": 0.22361136972904205, "learning_rate": 1e-05, "loss": 0.9432, "step": 97180 }, { "epoch": 86.08060230292294, "grad_norm": 0.2417464703321457, "learning_rate": 1e-05, "loss": 0.9857, "step": 97185 }, { "epoch": 86.08503100088573, "grad_norm": 0.3094818890094757, "learning_rate": 1e-05, "loss": 0.9456, "step": 97190 }, { "epoch": 86.08945969884854, "grad_norm": 0.25736093521118164, "learning_rate": 1e-05, "loss": 0.9958, "step": 97195 }, { "epoch": 86.09388839681134, "grad_norm": 0.23641172051429749, "learning_rate": 1e-05, "loss": 0.9453, "step": 97200 }, { "epoch": 86.09831709477413, "grad_norm": 0.2542100250720978, "learning_rate": 1e-05, "loss": 0.9562, "step": 97205 }, { "epoch": 86.10274579273694, "grad_norm": 0.2423756867647171, "learning_rate": 1e-05, "loss": 0.9855, "step": 97210 }, { "epoch": 86.10717449069973, "grad_norm": 0.23302805423736572, "learning_rate": 1e-05, "loss": 0.9717, "step": 97215 }, { "epoch": 86.11160318866253, "grad_norm": 0.2346411794424057, "learning_rate": 1e-05, "loss": 1.0143, "step": 97220 }, { "epoch": 86.11603188662534, "grad_norm": 0.24297769367694855, "learning_rate": 1e-05, "loss": 0.9384, "step": 97225 }, { "epoch": 86.12046058458813, "grad_norm": 0.2443009912967682, "learning_rate": 1e-05, "loss": 0.9471, "step": 97230 }, { "epoch": 86.12488928255092, "grad_norm": 0.23632077872753143, "learning_rate": 1e-05, "loss": 0.969, "step": 97235 }, { "epoch": 86.12931798051373, "grad_norm": 0.21805907785892487, "learning_rate": 1e-05, "loss": 1.0009, "step": 97240 }, { "epoch": 86.13374667847653, "grad_norm": 0.294930636882782, "learning_rate": 1e-05, "loss": 0.9854, "step": 97245 }, { "epoch": 86.13817537643932, "grad_norm": 0.26988664269447327, "learning_rate": 1e-05, "loss": 0.9919, "step": 97250 }, { "epoch": 86.14260407440213, "grad_norm": 0.24270285665988922, "learning_rate": 1e-05, "loss": 0.9395, "step": 97255 }, { "epoch": 86.14703277236492, "grad_norm": 0.298213392496109, "learning_rate": 1e-05, "loss": 0.9583, "step": 97260 }, { "epoch": 86.15146147032772, "grad_norm": 0.22539810836315155, "learning_rate": 1e-05, "loss": 1.0006, "step": 97265 }, { "epoch": 86.15589016829053, "grad_norm": 0.2500755488872528, "learning_rate": 1e-05, "loss": 1.0194, "step": 97270 }, { "epoch": 86.16031886625332, "grad_norm": 0.2239968478679657, "learning_rate": 1e-05, "loss": 1.0032, "step": 97275 }, { "epoch": 86.16474756421611, "grad_norm": 0.2412382960319519, "learning_rate": 1e-05, "loss": 0.9491, "step": 97280 }, { "epoch": 86.16917626217892, "grad_norm": 0.24195455014705658, "learning_rate": 1e-05, "loss": 0.9458, "step": 97285 }, { "epoch": 86.17360496014172, "grad_norm": 0.25617069005966187, "learning_rate": 1e-05, "loss": 0.9848, "step": 97290 }, { "epoch": 86.17803365810451, "grad_norm": 0.24802014231681824, "learning_rate": 1e-05, "loss": 0.9344, "step": 97295 }, { "epoch": 86.18246235606732, "grad_norm": 0.29795777797698975, "learning_rate": 1e-05, "loss": 0.981, "step": 97300 }, { "epoch": 86.18689105403011, "grad_norm": 0.26427629590034485, "learning_rate": 1e-05, "loss": 0.9646, "step": 97305 }, { "epoch": 86.1913197519929, "grad_norm": 0.2762966752052307, "learning_rate": 1e-05, "loss": 0.9804, "step": 97310 }, { "epoch": 86.19574844995572, "grad_norm": 0.24291293323040009, "learning_rate": 1e-05, "loss": 0.9439, "step": 97315 }, { "epoch": 86.20017714791851, "grad_norm": 0.2579694390296936, "learning_rate": 1e-05, "loss": 0.9797, "step": 97320 }, { "epoch": 86.20460584588132, "grad_norm": 0.2655971944332123, "learning_rate": 1e-05, "loss": 0.9113, "step": 97325 }, { "epoch": 86.20903454384411, "grad_norm": 0.21086034178733826, "learning_rate": 1e-05, "loss": 1.0157, "step": 97330 }, { "epoch": 86.2134632418069, "grad_norm": 0.23919431865215302, "learning_rate": 1e-05, "loss": 0.9885, "step": 97335 }, { "epoch": 86.21789193976971, "grad_norm": 0.24885676801204681, "learning_rate": 1e-05, "loss": 0.9468, "step": 97340 }, { "epoch": 86.22232063773251, "grad_norm": 0.25179407000541687, "learning_rate": 1e-05, "loss": 0.915, "step": 97345 }, { "epoch": 86.2267493356953, "grad_norm": 0.23263004422187805, "learning_rate": 1e-05, "loss": 1.0035, "step": 97350 }, { "epoch": 86.23117803365811, "grad_norm": 0.21684429049491882, "learning_rate": 1e-05, "loss": 0.918, "step": 97355 }, { "epoch": 86.2356067316209, "grad_norm": 0.2501088082790375, "learning_rate": 1e-05, "loss": 0.9823, "step": 97360 }, { "epoch": 86.2400354295837, "grad_norm": 0.265235036611557, "learning_rate": 1e-05, "loss": 0.9816, "step": 97365 }, { "epoch": 86.24446412754651, "grad_norm": 0.3170413672924042, "learning_rate": 1e-05, "loss": 0.9679, "step": 97370 }, { "epoch": 86.2488928255093, "grad_norm": 0.22767452895641327, "learning_rate": 1e-05, "loss": 0.9334, "step": 97375 }, { "epoch": 86.2533215234721, "grad_norm": 0.29185739159584045, "learning_rate": 1e-05, "loss": 0.9368, "step": 97380 }, { "epoch": 86.2577502214349, "grad_norm": 0.23806607723236084, "learning_rate": 1e-05, "loss": 0.9628, "step": 97385 }, { "epoch": 86.2621789193977, "grad_norm": 0.2752698063850403, "learning_rate": 1e-05, "loss": 0.9415, "step": 97390 }, { "epoch": 86.26660761736049, "grad_norm": 0.226464182138443, "learning_rate": 1e-05, "loss": 0.9602, "step": 97395 }, { "epoch": 86.2710363153233, "grad_norm": 0.2214711308479309, "learning_rate": 1e-05, "loss": 0.9941, "step": 97400 }, { "epoch": 86.2754650132861, "grad_norm": 0.22089965641498566, "learning_rate": 1e-05, "loss": 0.9187, "step": 97405 }, { "epoch": 86.27989371124889, "grad_norm": 0.2389654815196991, "learning_rate": 1e-05, "loss": 0.9846, "step": 97410 }, { "epoch": 86.2843224092117, "grad_norm": 0.2774227261543274, "learning_rate": 1e-05, "loss": 1.0066, "step": 97415 }, { "epoch": 86.28875110717449, "grad_norm": 0.3148418962955475, "learning_rate": 1e-05, "loss": 0.9009, "step": 97420 }, { "epoch": 86.29317980513729, "grad_norm": 0.28654780983924866, "learning_rate": 1e-05, "loss": 0.918, "step": 97425 }, { "epoch": 86.2976085031001, "grad_norm": 0.2448236495256424, "learning_rate": 1e-05, "loss": 0.9718, "step": 97430 }, { "epoch": 86.30203720106289, "grad_norm": 0.26177218556404114, "learning_rate": 1e-05, "loss": 0.9077, "step": 97435 }, { "epoch": 86.30646589902568, "grad_norm": 0.2374694049358368, "learning_rate": 1e-05, "loss": 0.9321, "step": 97440 }, { "epoch": 86.31089459698849, "grad_norm": 0.23216263949871063, "learning_rate": 1e-05, "loss": 1.0455, "step": 97445 }, { "epoch": 86.31532329495128, "grad_norm": 0.2625068426132202, "learning_rate": 1e-05, "loss": 0.9996, "step": 97450 }, { "epoch": 86.31975199291408, "grad_norm": 0.23084689676761627, "learning_rate": 1e-05, "loss": 0.9548, "step": 97455 }, { "epoch": 86.32418069087689, "grad_norm": 0.2509269118309021, "learning_rate": 1e-05, "loss": 0.9405, "step": 97460 }, { "epoch": 86.32860938883968, "grad_norm": 0.26281052827835083, "learning_rate": 1e-05, "loss": 1.0168, "step": 97465 }, { "epoch": 86.33303808680247, "grad_norm": 0.2245265543460846, "learning_rate": 1e-05, "loss": 0.9154, "step": 97470 }, { "epoch": 86.33746678476528, "grad_norm": 0.24464735388755798, "learning_rate": 1e-05, "loss": 0.9514, "step": 97475 }, { "epoch": 86.34189548272808, "grad_norm": 0.2500230371952057, "learning_rate": 1e-05, "loss": 0.9556, "step": 97480 }, { "epoch": 86.34632418069087, "grad_norm": 0.2138415426015854, "learning_rate": 1e-05, "loss": 1.0278, "step": 97485 }, { "epoch": 86.35075287865368, "grad_norm": 0.2069804072380066, "learning_rate": 1e-05, "loss": 0.9927, "step": 97490 }, { "epoch": 86.35518157661647, "grad_norm": 0.2307499647140503, "learning_rate": 1e-05, "loss": 0.9638, "step": 97495 }, { "epoch": 86.35961027457927, "grad_norm": 0.22184212505817413, "learning_rate": 1e-05, "loss": 0.9307, "step": 97500 }, { "epoch": 86.36403897254208, "grad_norm": 0.23166102170944214, "learning_rate": 1e-05, "loss": 0.9585, "step": 97505 }, { "epoch": 86.36846767050487, "grad_norm": 0.2785622775554657, "learning_rate": 1e-05, "loss": 0.9809, "step": 97510 }, { "epoch": 86.37289636846766, "grad_norm": 0.23424622416496277, "learning_rate": 1e-05, "loss": 0.9336, "step": 97515 }, { "epoch": 86.37732506643047, "grad_norm": 0.2433195263147354, "learning_rate": 1e-05, "loss": 1.0421, "step": 97520 }, { "epoch": 86.38175376439327, "grad_norm": 0.2580808103084564, "learning_rate": 1e-05, "loss": 0.9781, "step": 97525 }, { "epoch": 86.38618246235606, "grad_norm": 0.26988548040390015, "learning_rate": 1e-05, "loss": 0.978, "step": 97530 }, { "epoch": 86.39061116031887, "grad_norm": 0.337377667427063, "learning_rate": 1e-05, "loss": 0.9562, "step": 97535 }, { "epoch": 86.39503985828166, "grad_norm": 0.2984538674354553, "learning_rate": 1e-05, "loss": 0.9319, "step": 97540 }, { "epoch": 86.39946855624446, "grad_norm": 0.2478247433900833, "learning_rate": 1e-05, "loss": 0.9671, "step": 97545 }, { "epoch": 86.40389725420727, "grad_norm": 0.2710384428501129, "learning_rate": 1e-05, "loss": 0.9756, "step": 97550 }, { "epoch": 86.40832595217006, "grad_norm": 0.2437439113855362, "learning_rate": 1e-05, "loss": 0.9693, "step": 97555 }, { "epoch": 86.41275465013285, "grad_norm": 0.28736335039138794, "learning_rate": 1e-05, "loss": 0.9824, "step": 97560 }, { "epoch": 86.41718334809566, "grad_norm": 0.2324373722076416, "learning_rate": 1e-05, "loss": 0.944, "step": 97565 }, { "epoch": 86.42161204605846, "grad_norm": 0.2412739396095276, "learning_rate": 1e-05, "loss": 0.9353, "step": 97570 }, { "epoch": 86.42604074402126, "grad_norm": 0.24981698393821716, "learning_rate": 1e-05, "loss": 0.9649, "step": 97575 }, { "epoch": 86.43046944198406, "grad_norm": 0.2564948499202728, "learning_rate": 1e-05, "loss": 0.9439, "step": 97580 }, { "epoch": 86.43489813994685, "grad_norm": 0.27079471945762634, "learning_rate": 1e-05, "loss": 0.9305, "step": 97585 }, { "epoch": 86.43932683790966, "grad_norm": 0.2288156896829605, "learning_rate": 1e-05, "loss": 0.8891, "step": 97590 }, { "epoch": 86.44375553587246, "grad_norm": 0.2102874368429184, "learning_rate": 1e-05, "loss": 0.9836, "step": 97595 }, { "epoch": 86.44818423383525, "grad_norm": 0.23175379633903503, "learning_rate": 1e-05, "loss": 1.0107, "step": 97600 }, { "epoch": 86.45261293179806, "grad_norm": 0.2514692544937134, "learning_rate": 1e-05, "loss": 0.9449, "step": 97605 }, { "epoch": 86.45704162976085, "grad_norm": 0.22433802485466003, "learning_rate": 1e-05, "loss": 0.9703, "step": 97610 }, { "epoch": 86.46147032772365, "grad_norm": 0.24838802218437195, "learning_rate": 1e-05, "loss": 0.9238, "step": 97615 }, { "epoch": 86.46589902568645, "grad_norm": 0.2515776455402374, "learning_rate": 1e-05, "loss": 0.9214, "step": 97620 }, { "epoch": 86.47032772364925, "grad_norm": 0.2791557312011719, "learning_rate": 1e-05, "loss": 0.9796, "step": 97625 }, { "epoch": 86.47475642161204, "grad_norm": 0.28195664286613464, "learning_rate": 1e-05, "loss": 1.0154, "step": 97630 }, { "epoch": 86.47918511957485, "grad_norm": 0.22671641409397125, "learning_rate": 1e-05, "loss": 0.9702, "step": 97635 }, { "epoch": 86.48361381753764, "grad_norm": 0.219867542386055, "learning_rate": 1e-05, "loss": 0.9645, "step": 97640 }, { "epoch": 86.48804251550044, "grad_norm": 0.21709716320037842, "learning_rate": 1e-05, "loss": 0.9675, "step": 97645 }, { "epoch": 86.49247121346325, "grad_norm": 0.22821044921875, "learning_rate": 1e-05, "loss": 0.993, "step": 97650 }, { "epoch": 86.49689991142604, "grad_norm": 0.22244662046432495, "learning_rate": 1e-05, "loss": 0.9457, "step": 97655 }, { "epoch": 86.50132860938884, "grad_norm": 0.22795841097831726, "learning_rate": 1e-05, "loss": 0.955, "step": 97660 }, { "epoch": 86.50575730735164, "grad_norm": 0.25087156891822815, "learning_rate": 1e-05, "loss": 0.942, "step": 97665 }, { "epoch": 86.51018600531444, "grad_norm": 0.2219347506761551, "learning_rate": 1e-05, "loss": 0.9134, "step": 97670 }, { "epoch": 86.51461470327723, "grad_norm": 0.2347313016653061, "learning_rate": 1e-05, "loss": 0.9628, "step": 97675 }, { "epoch": 86.51904340124004, "grad_norm": 0.2769928574562073, "learning_rate": 1e-05, "loss": 0.9315, "step": 97680 }, { "epoch": 86.52347209920283, "grad_norm": 0.2632846236228943, "learning_rate": 1e-05, "loss": 0.957, "step": 97685 }, { "epoch": 86.52790079716563, "grad_norm": 0.2277977466583252, "learning_rate": 1e-05, "loss": 0.9691, "step": 97690 }, { "epoch": 86.53232949512844, "grad_norm": 0.21311065554618835, "learning_rate": 1e-05, "loss": 0.9577, "step": 97695 }, { "epoch": 86.53675819309123, "grad_norm": 0.22003811597824097, "learning_rate": 1e-05, "loss": 0.9305, "step": 97700 }, { "epoch": 86.54118689105402, "grad_norm": 0.21697616577148438, "learning_rate": 1e-05, "loss": 0.9763, "step": 97705 }, { "epoch": 86.54561558901683, "grad_norm": 0.24771209061145782, "learning_rate": 1e-05, "loss": 0.9585, "step": 97710 }, { "epoch": 86.55004428697963, "grad_norm": 0.27790436148643494, "learning_rate": 1e-05, "loss": 0.9549, "step": 97715 }, { "epoch": 86.55447298494242, "grad_norm": 0.21535947918891907, "learning_rate": 1e-05, "loss": 0.9449, "step": 97720 }, { "epoch": 86.55890168290523, "grad_norm": 0.23929442465305328, "learning_rate": 1e-05, "loss": 0.9303, "step": 97725 }, { "epoch": 86.56333038086802, "grad_norm": 0.225034698843956, "learning_rate": 1e-05, "loss": 0.9814, "step": 97730 }, { "epoch": 86.56775907883082, "grad_norm": 0.24152566492557526, "learning_rate": 1e-05, "loss": 0.9137, "step": 97735 }, { "epoch": 86.57218777679363, "grad_norm": 0.22860662639141083, "learning_rate": 1e-05, "loss": 0.9791, "step": 97740 }, { "epoch": 86.57661647475642, "grad_norm": 0.23329578340053558, "learning_rate": 1e-05, "loss": 0.9615, "step": 97745 }, { "epoch": 86.58104517271921, "grad_norm": 0.23391984403133392, "learning_rate": 1e-05, "loss": 0.9703, "step": 97750 }, { "epoch": 86.58547387068202, "grad_norm": 0.2556324303150177, "learning_rate": 1e-05, "loss": 0.8946, "step": 97755 }, { "epoch": 86.58990256864482, "grad_norm": 0.26190054416656494, "learning_rate": 1e-05, "loss": 0.9195, "step": 97760 }, { "epoch": 86.59433126660761, "grad_norm": 0.2326575666666031, "learning_rate": 1e-05, "loss": 0.9774, "step": 97765 }, { "epoch": 86.59875996457042, "grad_norm": 0.2641197741031647, "learning_rate": 1e-05, "loss": 0.9527, "step": 97770 }, { "epoch": 86.60318866253321, "grad_norm": 0.2541007995605469, "learning_rate": 1e-05, "loss": 0.9328, "step": 97775 }, { "epoch": 86.60761736049601, "grad_norm": 0.22842933237552643, "learning_rate": 1e-05, "loss": 0.9399, "step": 97780 }, { "epoch": 86.61204605845882, "grad_norm": 0.2426004260778427, "learning_rate": 1e-05, "loss": 0.9686, "step": 97785 }, { "epoch": 86.61647475642161, "grad_norm": 0.2489362359046936, "learning_rate": 1e-05, "loss": 0.9845, "step": 97790 }, { "epoch": 86.6209034543844, "grad_norm": 0.2868337035179138, "learning_rate": 1e-05, "loss": 0.9339, "step": 97795 }, { "epoch": 86.62533215234721, "grad_norm": 0.20236356556415558, "learning_rate": 1e-05, "loss": 0.9909, "step": 97800 }, { "epoch": 86.62976085031, "grad_norm": 0.20789262652397156, "learning_rate": 1e-05, "loss": 0.9994, "step": 97805 }, { "epoch": 86.63418954827281, "grad_norm": 0.23282887041568756, "learning_rate": 1e-05, "loss": 0.9866, "step": 97810 }, { "epoch": 86.63861824623561, "grad_norm": 0.2431809902191162, "learning_rate": 1e-05, "loss": 0.9513, "step": 97815 }, { "epoch": 86.6430469441984, "grad_norm": 0.25418025255203247, "learning_rate": 1e-05, "loss": 0.9633, "step": 97820 }, { "epoch": 86.64747564216121, "grad_norm": 0.25638049840927124, "learning_rate": 1e-05, "loss": 0.9966, "step": 97825 }, { "epoch": 86.651904340124, "grad_norm": 0.2520405352115631, "learning_rate": 1e-05, "loss": 0.926, "step": 97830 }, { "epoch": 86.6563330380868, "grad_norm": 0.24076220393180847, "learning_rate": 1e-05, "loss": 0.9583, "step": 97835 }, { "epoch": 86.66076173604961, "grad_norm": 0.2553439736366272, "learning_rate": 1e-05, "loss": 0.952, "step": 97840 }, { "epoch": 86.6651904340124, "grad_norm": 0.23596803843975067, "learning_rate": 1e-05, "loss": 0.9773, "step": 97845 }, { "epoch": 86.6696191319752, "grad_norm": 0.23852095007896423, "learning_rate": 1e-05, "loss": 1.0229, "step": 97850 }, { "epoch": 86.674047829938, "grad_norm": 0.2198798656463623, "learning_rate": 1e-05, "loss": 0.9618, "step": 97855 }, { "epoch": 86.6784765279008, "grad_norm": 0.2180701345205307, "learning_rate": 1e-05, "loss": 0.9866, "step": 97860 }, { "epoch": 86.68290522586359, "grad_norm": 0.23095166683197021, "learning_rate": 1e-05, "loss": 0.9304, "step": 97865 }, { "epoch": 86.6873339238264, "grad_norm": 0.2211650311946869, "learning_rate": 1e-05, "loss": 0.932, "step": 97870 }, { "epoch": 86.6917626217892, "grad_norm": 0.2256811410188675, "learning_rate": 1e-05, "loss": 0.9374, "step": 97875 }, { "epoch": 86.69619131975199, "grad_norm": 0.21950854361057281, "learning_rate": 1e-05, "loss": 0.9619, "step": 97880 }, { "epoch": 86.7006200177148, "grad_norm": 0.23210366070270538, "learning_rate": 1e-05, "loss": 0.9182, "step": 97885 }, { "epoch": 86.70504871567759, "grad_norm": 0.25355851650238037, "learning_rate": 1e-05, "loss": 0.9921, "step": 97890 }, { "epoch": 86.70947741364039, "grad_norm": 0.2949579656124115, "learning_rate": 1e-05, "loss": 0.9662, "step": 97895 }, { "epoch": 86.7139061116032, "grad_norm": 0.2376047819852829, "learning_rate": 1e-05, "loss": 0.9494, "step": 97900 }, { "epoch": 86.71833480956599, "grad_norm": 0.20439501106739044, "learning_rate": 1e-05, "loss": 1.022, "step": 97905 }, { "epoch": 86.72276350752878, "grad_norm": 0.2237773984670639, "learning_rate": 1e-05, "loss": 0.9741, "step": 97910 }, { "epoch": 86.72719220549159, "grad_norm": 0.21593715250492096, "learning_rate": 1e-05, "loss": 0.975, "step": 97915 }, { "epoch": 86.73162090345438, "grad_norm": 0.23074491322040558, "learning_rate": 1e-05, "loss": 0.9203, "step": 97920 }, { "epoch": 86.73604960141718, "grad_norm": 0.21759459376335144, "learning_rate": 1e-05, "loss": 1.0382, "step": 97925 }, { "epoch": 86.74047829937999, "grad_norm": 0.24667830765247345, "learning_rate": 1e-05, "loss": 0.9226, "step": 97930 }, { "epoch": 86.74490699734278, "grad_norm": 0.2486148178577423, "learning_rate": 1e-05, "loss": 0.9405, "step": 97935 }, { "epoch": 86.74933569530558, "grad_norm": 0.21728363633155823, "learning_rate": 1e-05, "loss": 0.9239, "step": 97940 }, { "epoch": 86.75376439326838, "grad_norm": 0.2204359620809555, "learning_rate": 1e-05, "loss": 0.9251, "step": 97945 }, { "epoch": 86.75819309123118, "grad_norm": 0.24216429889202118, "learning_rate": 1e-05, "loss": 0.8985, "step": 97950 }, { "epoch": 86.76262178919397, "grad_norm": 0.26172107458114624, "learning_rate": 1e-05, "loss": 1.0401, "step": 97955 }, { "epoch": 86.76705048715678, "grad_norm": 0.2145540416240692, "learning_rate": 1e-05, "loss": 0.9823, "step": 97960 }, { "epoch": 86.77147918511957, "grad_norm": 0.22526490688323975, "learning_rate": 1e-05, "loss": 0.9669, "step": 97965 }, { "epoch": 86.77590788308237, "grad_norm": 0.22993145883083344, "learning_rate": 1e-05, "loss": 0.9586, "step": 97970 }, { "epoch": 86.78033658104518, "grad_norm": 0.24860502779483795, "learning_rate": 1e-05, "loss": 0.9626, "step": 97975 }, { "epoch": 86.78476527900797, "grad_norm": 0.23333078622817993, "learning_rate": 1e-05, "loss": 0.9631, "step": 97980 }, { "epoch": 86.78919397697076, "grad_norm": 0.2229924350976944, "learning_rate": 1e-05, "loss": 0.9426, "step": 97985 }, { "epoch": 86.79362267493357, "grad_norm": 0.25023791193962097, "learning_rate": 1e-05, "loss": 1.0038, "step": 97990 }, { "epoch": 86.79805137289637, "grad_norm": 0.22050027549266815, "learning_rate": 1e-05, "loss": 0.8818, "step": 97995 }, { "epoch": 86.80248007085916, "grad_norm": 0.23029173910617828, "learning_rate": 1e-05, "loss": 1.0337, "step": 98000 }, { "epoch": 86.80690876882197, "grad_norm": 0.18844686448574066, "learning_rate": 1e-05, "loss": 1.0224, "step": 98005 }, { "epoch": 86.81133746678476, "grad_norm": 0.2100907415151596, "learning_rate": 1e-05, "loss": 0.9691, "step": 98010 }, { "epoch": 86.81576616474756, "grad_norm": 0.2497255802154541, "learning_rate": 1e-05, "loss": 0.9287, "step": 98015 }, { "epoch": 86.82019486271037, "grad_norm": 0.22252006828784943, "learning_rate": 1e-05, "loss": 0.9927, "step": 98020 }, { "epoch": 86.82462356067316, "grad_norm": 0.25199493765830994, "learning_rate": 1e-05, "loss": 0.9517, "step": 98025 }, { "epoch": 86.82905225863595, "grad_norm": 0.2313881665468216, "learning_rate": 1e-05, "loss": 0.989, "step": 98030 }, { "epoch": 86.83348095659876, "grad_norm": 0.2628002166748047, "learning_rate": 1e-05, "loss": 0.9738, "step": 98035 }, { "epoch": 86.83790965456156, "grad_norm": 0.25987300276756287, "learning_rate": 1e-05, "loss": 0.9448, "step": 98040 }, { "epoch": 86.84233835252435, "grad_norm": 0.23391568660736084, "learning_rate": 1e-05, "loss": 0.9927, "step": 98045 }, { "epoch": 86.84676705048716, "grad_norm": 0.2406986951828003, "learning_rate": 1e-05, "loss": 0.9801, "step": 98050 }, { "epoch": 86.85119574844995, "grad_norm": 0.24355019629001617, "learning_rate": 1e-05, "loss": 0.9284, "step": 98055 }, { "epoch": 86.85562444641276, "grad_norm": 0.21033534407615662, "learning_rate": 1e-05, "loss": 0.9719, "step": 98060 }, { "epoch": 86.86005314437556, "grad_norm": 0.22992965579032898, "learning_rate": 1e-05, "loss": 0.9159, "step": 98065 }, { "epoch": 86.86448184233835, "grad_norm": 0.2060347944498062, "learning_rate": 1e-05, "loss": 0.9391, "step": 98070 }, { "epoch": 86.86891054030116, "grad_norm": 0.23132586479187012, "learning_rate": 1e-05, "loss": 0.9486, "step": 98075 }, { "epoch": 86.87333923826395, "grad_norm": 0.21193315088748932, "learning_rate": 1e-05, "loss": 0.9973, "step": 98080 }, { "epoch": 86.87776793622675, "grad_norm": 0.2275550812482834, "learning_rate": 1e-05, "loss": 0.982, "step": 98085 }, { "epoch": 86.88219663418955, "grad_norm": 0.21882900595664978, "learning_rate": 1e-05, "loss": 0.9979, "step": 98090 }, { "epoch": 86.88662533215235, "grad_norm": 0.21325819194316864, "learning_rate": 1e-05, "loss": 0.9852, "step": 98095 }, { "epoch": 86.89105403011514, "grad_norm": 0.2026692032814026, "learning_rate": 1e-05, "loss": 0.9958, "step": 98100 }, { "epoch": 86.89548272807795, "grad_norm": 0.226902037858963, "learning_rate": 1e-05, "loss": 1.0039, "step": 98105 }, { "epoch": 86.89991142604075, "grad_norm": 0.20858511328697205, "learning_rate": 1e-05, "loss": 1.0131, "step": 98110 }, { "epoch": 86.90434012400354, "grad_norm": 0.24579277634620667, "learning_rate": 1e-05, "loss": 0.9308, "step": 98115 }, { "epoch": 86.90876882196635, "grad_norm": 0.22798758745193481, "learning_rate": 1e-05, "loss": 0.9596, "step": 98120 }, { "epoch": 86.91319751992914, "grad_norm": 0.24776817858219147, "learning_rate": 1e-05, "loss": 0.9331, "step": 98125 }, { "epoch": 86.91762621789194, "grad_norm": 0.230184867978096, "learning_rate": 1e-05, "loss": 0.9805, "step": 98130 }, { "epoch": 86.92205491585474, "grad_norm": 0.235616073012352, "learning_rate": 1e-05, "loss": 0.9762, "step": 98135 }, { "epoch": 86.92648361381754, "grad_norm": 0.25655829906463623, "learning_rate": 1e-05, "loss": 0.9984, "step": 98140 }, { "epoch": 86.93091231178033, "grad_norm": 0.20392318069934845, "learning_rate": 1e-05, "loss": 0.9208, "step": 98145 }, { "epoch": 86.93534100974314, "grad_norm": 0.22574551403522491, "learning_rate": 1e-05, "loss": 0.9295, "step": 98150 }, { "epoch": 86.93976970770593, "grad_norm": 0.22375118732452393, "learning_rate": 1e-05, "loss": 0.9465, "step": 98155 }, { "epoch": 86.94419840566873, "grad_norm": 0.2875608503818512, "learning_rate": 1e-05, "loss": 0.9452, "step": 98160 }, { "epoch": 86.94862710363154, "grad_norm": 0.2933945655822754, "learning_rate": 1e-05, "loss": 0.9464, "step": 98165 }, { "epoch": 86.95305580159433, "grad_norm": 0.25477349758148193, "learning_rate": 1e-05, "loss": 0.997, "step": 98170 }, { "epoch": 86.95748449955713, "grad_norm": 0.24826818704605103, "learning_rate": 1e-05, "loss": 0.9083, "step": 98175 }, { "epoch": 86.96191319751993, "grad_norm": 0.22769761085510254, "learning_rate": 1e-05, "loss": 0.9986, "step": 98180 }, { "epoch": 86.96634189548273, "grad_norm": 0.23774100840091705, "learning_rate": 1e-05, "loss": 0.9747, "step": 98185 }, { "epoch": 86.97077059344552, "grad_norm": 0.3010922968387604, "learning_rate": 1e-05, "loss": 0.906, "step": 98190 }, { "epoch": 86.97519929140833, "grad_norm": 0.2281743437051773, "learning_rate": 1e-05, "loss": 0.9868, "step": 98195 }, { "epoch": 86.97962798937112, "grad_norm": 0.25352758169174194, "learning_rate": 1e-05, "loss": 0.9674, "step": 98200 }, { "epoch": 86.98405668733392, "grad_norm": 0.24387317895889282, "learning_rate": 1e-05, "loss": 0.9255, "step": 98205 }, { "epoch": 86.98848538529673, "grad_norm": 0.22576482594013214, "learning_rate": 1e-05, "loss": 0.9661, "step": 98210 }, { "epoch": 86.99291408325952, "grad_norm": 0.2359921783208847, "learning_rate": 1e-05, "loss": 0.9662, "step": 98215 }, { "epoch": 86.99734278122232, "grad_norm": 0.2172839492559433, "learning_rate": 1e-05, "loss": 0.9176, "step": 98220 }, { "epoch": 87.00177147918512, "grad_norm": 0.22878891229629517, "learning_rate": 1e-05, "loss": 0.9996, "step": 98225 }, { "epoch": 87.00620017714792, "grad_norm": 0.23352956771850586, "learning_rate": 1e-05, "loss": 0.9354, "step": 98230 }, { "epoch": 87.01062887511071, "grad_norm": 0.2274307906627655, "learning_rate": 1e-05, "loss": 0.9573, "step": 98235 }, { "epoch": 87.01505757307352, "grad_norm": 0.24780115485191345, "learning_rate": 1e-05, "loss": 0.9816, "step": 98240 }, { "epoch": 87.01948627103631, "grad_norm": 0.23804190754890442, "learning_rate": 1e-05, "loss": 1.0147, "step": 98245 }, { "epoch": 87.02391496899911, "grad_norm": 0.26680079102516174, "learning_rate": 1e-05, "loss": 0.9922, "step": 98250 }, { "epoch": 87.02834366696192, "grad_norm": 0.27159383893013, "learning_rate": 1e-05, "loss": 0.9582, "step": 98255 }, { "epoch": 87.03277236492471, "grad_norm": 0.2324649691581726, "learning_rate": 1e-05, "loss": 1.0112, "step": 98260 }, { "epoch": 87.0372010628875, "grad_norm": 0.2546885907649994, "learning_rate": 1e-05, "loss": 0.951, "step": 98265 }, { "epoch": 87.04162976085031, "grad_norm": 0.2547507882118225, "learning_rate": 1e-05, "loss": 0.9996, "step": 98270 }, { "epoch": 87.0460584588131, "grad_norm": 0.2586901783943176, "learning_rate": 1e-05, "loss": 0.9407, "step": 98275 }, { "epoch": 87.0504871567759, "grad_norm": 0.2597416639328003, "learning_rate": 1e-05, "loss": 0.9057, "step": 98280 }, { "epoch": 87.05491585473871, "grad_norm": 0.27232998609542847, "learning_rate": 1e-05, "loss": 0.9948, "step": 98285 }, { "epoch": 87.0593445527015, "grad_norm": 0.1909365952014923, "learning_rate": 1e-05, "loss": 0.9421, "step": 98290 }, { "epoch": 87.0637732506643, "grad_norm": 0.20015062391757965, "learning_rate": 1e-05, "loss": 0.9806, "step": 98295 }, { "epoch": 87.0682019486271, "grad_norm": 0.23849888145923615, "learning_rate": 1e-05, "loss": 0.9946, "step": 98300 }, { "epoch": 87.0726306465899, "grad_norm": 0.2270723283290863, "learning_rate": 1e-05, "loss": 1.0301, "step": 98305 }, { "epoch": 87.07705934455271, "grad_norm": 0.23387481272220612, "learning_rate": 1e-05, "loss": 0.967, "step": 98310 }, { "epoch": 87.0814880425155, "grad_norm": 0.27684634923934937, "learning_rate": 1e-05, "loss": 0.9951, "step": 98315 }, { "epoch": 87.0859167404783, "grad_norm": 0.23975835740566254, "learning_rate": 1e-05, "loss": 0.9618, "step": 98320 }, { "epoch": 87.0903454384411, "grad_norm": 0.27283918857574463, "learning_rate": 1e-05, "loss": 0.9588, "step": 98325 }, { "epoch": 87.0947741364039, "grad_norm": 0.23532938957214355, "learning_rate": 1e-05, "loss": 1.0117, "step": 98330 }, { "epoch": 87.0992028343667, "grad_norm": 0.2911739647388458, "learning_rate": 1e-05, "loss": 0.978, "step": 98335 }, { "epoch": 87.1036315323295, "grad_norm": 0.24351496994495392, "learning_rate": 1e-05, "loss": 0.9295, "step": 98340 }, { "epoch": 87.1080602302923, "grad_norm": 0.23794305324554443, "learning_rate": 1e-05, "loss": 0.9521, "step": 98345 }, { "epoch": 87.11248892825509, "grad_norm": 0.23275792598724365, "learning_rate": 1e-05, "loss": 0.9498, "step": 98350 }, { "epoch": 87.1169176262179, "grad_norm": 0.24212241172790527, "learning_rate": 1e-05, "loss": 0.9096, "step": 98355 }, { "epoch": 87.12134632418069, "grad_norm": 0.22095568478107452, "learning_rate": 1e-05, "loss": 0.9113, "step": 98360 }, { "epoch": 87.12577502214349, "grad_norm": 0.20863617956638336, "learning_rate": 1e-05, "loss": 1.0098, "step": 98365 }, { "epoch": 87.1302037201063, "grad_norm": 0.21951468288898468, "learning_rate": 1e-05, "loss": 1.0132, "step": 98370 }, { "epoch": 87.13463241806909, "grad_norm": 0.22290784120559692, "learning_rate": 1e-05, "loss": 0.9455, "step": 98375 }, { "epoch": 87.13906111603188, "grad_norm": 0.22808930277824402, "learning_rate": 1e-05, "loss": 1.0361, "step": 98380 }, { "epoch": 87.14348981399469, "grad_norm": 0.2338210493326187, "learning_rate": 1e-05, "loss": 0.9473, "step": 98385 }, { "epoch": 87.14791851195749, "grad_norm": 0.26032376289367676, "learning_rate": 1e-05, "loss": 0.9348, "step": 98390 }, { "epoch": 87.15234720992028, "grad_norm": 0.2538096010684967, "learning_rate": 1e-05, "loss": 0.9659, "step": 98395 }, { "epoch": 87.15677590788309, "grad_norm": 0.21519728004932404, "learning_rate": 1e-05, "loss": 0.943, "step": 98400 }, { "epoch": 87.16120460584588, "grad_norm": 0.2721410393714905, "learning_rate": 1e-05, "loss": 0.9933, "step": 98405 }, { "epoch": 87.16563330380868, "grad_norm": 0.24952802062034607, "learning_rate": 1e-05, "loss": 0.9156, "step": 98410 }, { "epoch": 87.17006200177148, "grad_norm": 0.23069506883621216, "learning_rate": 1e-05, "loss": 0.9524, "step": 98415 }, { "epoch": 87.17449069973428, "grad_norm": 0.2460014373064041, "learning_rate": 1e-05, "loss": 0.9853, "step": 98420 }, { "epoch": 87.17891939769707, "grad_norm": 0.22733376920223236, "learning_rate": 1e-05, "loss": 0.9775, "step": 98425 }, { "epoch": 87.18334809565988, "grad_norm": 0.275743305683136, "learning_rate": 1e-05, "loss": 1.0098, "step": 98430 }, { "epoch": 87.18777679362267, "grad_norm": 0.24603989720344543, "learning_rate": 1e-05, "loss": 0.9853, "step": 98435 }, { "epoch": 87.19220549158547, "grad_norm": 0.21844574809074402, "learning_rate": 1e-05, "loss": 0.9016, "step": 98440 }, { "epoch": 87.19663418954828, "grad_norm": 0.23349353671073914, "learning_rate": 1e-05, "loss": 0.9573, "step": 98445 }, { "epoch": 87.20106288751107, "grad_norm": 0.2166444957256317, "learning_rate": 1e-05, "loss": 0.9212, "step": 98450 }, { "epoch": 87.20549158547387, "grad_norm": 0.2517101466655731, "learning_rate": 1e-05, "loss": 1.0132, "step": 98455 }, { "epoch": 87.20992028343667, "grad_norm": 0.21668767929077148, "learning_rate": 1e-05, "loss": 0.9646, "step": 98460 }, { "epoch": 87.21434898139947, "grad_norm": 0.19566695392131805, "learning_rate": 1e-05, "loss": 0.9695, "step": 98465 }, { "epoch": 87.21877767936226, "grad_norm": 0.2491917759180069, "learning_rate": 1e-05, "loss": 0.9414, "step": 98470 }, { "epoch": 87.22320637732507, "grad_norm": 0.23572002351284027, "learning_rate": 1e-05, "loss": 0.9346, "step": 98475 }, { "epoch": 87.22763507528786, "grad_norm": 0.20109450817108154, "learning_rate": 1e-05, "loss": 0.9557, "step": 98480 }, { "epoch": 87.23206377325066, "grad_norm": 0.25902730226516724, "learning_rate": 1e-05, "loss": 0.9119, "step": 98485 }, { "epoch": 87.23649247121347, "grad_norm": 0.24835005402565002, "learning_rate": 1e-05, "loss": 0.9291, "step": 98490 }, { "epoch": 87.24092116917626, "grad_norm": 0.2484293133020401, "learning_rate": 1e-05, "loss": 0.9864, "step": 98495 }, { "epoch": 87.24534986713905, "grad_norm": 0.22117173671722412, "learning_rate": 1e-05, "loss": 0.9517, "step": 98500 }, { "epoch": 87.24977856510186, "grad_norm": 0.20593170821666718, "learning_rate": 1e-05, "loss": 0.9837, "step": 98505 }, { "epoch": 87.25420726306466, "grad_norm": 0.22248238325119019, "learning_rate": 1e-05, "loss": 0.9473, "step": 98510 }, { "epoch": 87.25863596102745, "grad_norm": 0.2633008658885956, "learning_rate": 1e-05, "loss": 0.9696, "step": 98515 }, { "epoch": 87.26306465899026, "grad_norm": 0.2506774663925171, "learning_rate": 1e-05, "loss": 0.9537, "step": 98520 }, { "epoch": 87.26749335695305, "grad_norm": 0.28996458649635315, "learning_rate": 1e-05, "loss": 1.0041, "step": 98525 }, { "epoch": 87.27192205491585, "grad_norm": 0.21098190546035767, "learning_rate": 1e-05, "loss": 0.9589, "step": 98530 }, { "epoch": 87.27635075287866, "grad_norm": 0.23079082369804382, "learning_rate": 1e-05, "loss": 1.0202, "step": 98535 }, { "epoch": 87.28077945084145, "grad_norm": 0.2727959454059601, "learning_rate": 1e-05, "loss": 0.9532, "step": 98540 }, { "epoch": 87.28520814880426, "grad_norm": 0.2653674781322479, "learning_rate": 1e-05, "loss": 0.9749, "step": 98545 }, { "epoch": 87.28963684676705, "grad_norm": 0.2709977924823761, "learning_rate": 1e-05, "loss": 1.0632, "step": 98550 }, { "epoch": 87.29406554472985, "grad_norm": 0.27767375111579895, "learning_rate": 1e-05, "loss": 0.948, "step": 98555 }, { "epoch": 87.29849424269266, "grad_norm": 0.3140869140625, "learning_rate": 1e-05, "loss": 0.9264, "step": 98560 }, { "epoch": 87.30292294065545, "grad_norm": 0.23338013887405396, "learning_rate": 1e-05, "loss": 0.9697, "step": 98565 }, { "epoch": 87.30735163861824, "grad_norm": 0.25078892707824707, "learning_rate": 1e-05, "loss": 0.9662, "step": 98570 }, { "epoch": 87.31178033658105, "grad_norm": 0.24289509654045105, "learning_rate": 1e-05, "loss": 1.008, "step": 98575 }, { "epoch": 87.31620903454385, "grad_norm": 0.24767766892910004, "learning_rate": 1e-05, "loss": 0.9244, "step": 98580 }, { "epoch": 87.32063773250664, "grad_norm": 0.24860763549804688, "learning_rate": 1e-05, "loss": 0.9563, "step": 98585 }, { "epoch": 87.32506643046945, "grad_norm": 0.22739925980567932, "learning_rate": 1e-05, "loss": 0.9414, "step": 98590 }, { "epoch": 87.32949512843224, "grad_norm": 0.27999404072761536, "learning_rate": 1e-05, "loss": 0.9478, "step": 98595 }, { "epoch": 87.33392382639504, "grad_norm": 0.23120373487472534, "learning_rate": 1e-05, "loss": 0.9863, "step": 98600 }, { "epoch": 87.33835252435784, "grad_norm": 0.2214059978723526, "learning_rate": 1e-05, "loss": 1.0179, "step": 98605 }, { "epoch": 87.34278122232064, "grad_norm": 0.25575026869773865, "learning_rate": 1e-05, "loss": 0.9686, "step": 98610 }, { "epoch": 87.34720992028343, "grad_norm": 0.20689494907855988, "learning_rate": 1e-05, "loss": 0.9231, "step": 98615 }, { "epoch": 87.35163861824624, "grad_norm": 0.23888923227787018, "learning_rate": 1e-05, "loss": 0.9692, "step": 98620 }, { "epoch": 87.35606731620904, "grad_norm": 0.2670289874076843, "learning_rate": 1e-05, "loss": 0.9974, "step": 98625 }, { "epoch": 87.36049601417183, "grad_norm": 0.29295748472213745, "learning_rate": 1e-05, "loss": 0.9398, "step": 98630 }, { "epoch": 87.36492471213464, "grad_norm": 0.24720269441604614, "learning_rate": 1e-05, "loss": 0.9303, "step": 98635 }, { "epoch": 87.36935341009743, "grad_norm": 0.23388172686100006, "learning_rate": 1e-05, "loss": 0.9739, "step": 98640 }, { "epoch": 87.37378210806023, "grad_norm": 0.21629522740840912, "learning_rate": 1e-05, "loss": 1.0148, "step": 98645 }, { "epoch": 87.37821080602303, "grad_norm": 0.25006499886512756, "learning_rate": 1e-05, "loss": 0.9281, "step": 98650 }, { "epoch": 87.38263950398583, "grad_norm": 0.22009262442588806, "learning_rate": 1e-05, "loss": 0.9104, "step": 98655 }, { "epoch": 87.38706820194862, "grad_norm": 0.2552126348018646, "learning_rate": 1e-05, "loss": 0.9542, "step": 98660 }, { "epoch": 87.39149689991143, "grad_norm": 0.2860468924045563, "learning_rate": 1e-05, "loss": 0.9288, "step": 98665 }, { "epoch": 87.39592559787422, "grad_norm": 0.24340319633483887, "learning_rate": 1e-05, "loss": 0.9983, "step": 98670 }, { "epoch": 87.40035429583702, "grad_norm": 0.22523324191570282, "learning_rate": 1e-05, "loss": 0.9245, "step": 98675 }, { "epoch": 87.40478299379983, "grad_norm": 0.2452406883239746, "learning_rate": 1e-05, "loss": 0.9327, "step": 98680 }, { "epoch": 87.40921169176262, "grad_norm": 0.25363871455192566, "learning_rate": 1e-05, "loss": 0.9989, "step": 98685 }, { "epoch": 87.41364038972542, "grad_norm": 0.22660808265209198, "learning_rate": 1e-05, "loss": 1.0025, "step": 98690 }, { "epoch": 87.41806908768822, "grad_norm": 0.2833593785762787, "learning_rate": 1e-05, "loss": 0.9359, "step": 98695 }, { "epoch": 87.42249778565102, "grad_norm": 0.2170831710100174, "learning_rate": 1e-05, "loss": 0.8968, "step": 98700 }, { "epoch": 87.42692648361381, "grad_norm": 0.24000315368175507, "learning_rate": 1e-05, "loss": 0.9699, "step": 98705 }, { "epoch": 87.43135518157662, "grad_norm": 0.23038068413734436, "learning_rate": 1e-05, "loss": 0.9372, "step": 98710 }, { "epoch": 87.43578387953941, "grad_norm": 0.24463027715682983, "learning_rate": 1e-05, "loss": 0.9355, "step": 98715 }, { "epoch": 87.44021257750221, "grad_norm": 0.25267237424850464, "learning_rate": 1e-05, "loss": 0.9557, "step": 98720 }, { "epoch": 87.44464127546502, "grad_norm": 0.21883666515350342, "learning_rate": 1e-05, "loss": 0.9311, "step": 98725 }, { "epoch": 87.44906997342781, "grad_norm": 0.23028209805488586, "learning_rate": 1e-05, "loss": 0.9507, "step": 98730 }, { "epoch": 87.4534986713906, "grad_norm": 0.2352771759033203, "learning_rate": 1e-05, "loss": 0.9045, "step": 98735 }, { "epoch": 87.45792736935341, "grad_norm": 0.234528049826622, "learning_rate": 1e-05, "loss": 0.9401, "step": 98740 }, { "epoch": 87.46235606731621, "grad_norm": 0.2326662689447403, "learning_rate": 1e-05, "loss": 0.9921, "step": 98745 }, { "epoch": 87.466784765279, "grad_norm": 0.25211647152900696, "learning_rate": 1e-05, "loss": 0.9538, "step": 98750 }, { "epoch": 87.47121346324181, "grad_norm": 0.21470589935779572, "learning_rate": 1e-05, "loss": 0.9515, "step": 98755 }, { "epoch": 87.4756421612046, "grad_norm": 0.29836538434028625, "learning_rate": 1e-05, "loss": 0.9436, "step": 98760 }, { "epoch": 87.4800708591674, "grad_norm": 0.2584453225135803, "learning_rate": 1e-05, "loss": 0.9122, "step": 98765 }, { "epoch": 87.4844995571302, "grad_norm": 0.20180445909500122, "learning_rate": 1e-05, "loss": 0.9375, "step": 98770 }, { "epoch": 87.488928255093, "grad_norm": 0.24020785093307495, "learning_rate": 1e-05, "loss": 0.9707, "step": 98775 }, { "epoch": 87.4933569530558, "grad_norm": 0.22691568732261658, "learning_rate": 1e-05, "loss": 0.9453, "step": 98780 }, { "epoch": 87.4977856510186, "grad_norm": 0.2516210973262787, "learning_rate": 1e-05, "loss": 0.9629, "step": 98785 }, { "epoch": 87.5022143489814, "grad_norm": 0.22353962063789368, "learning_rate": 1e-05, "loss": 0.9921, "step": 98790 }, { "epoch": 87.5066430469442, "grad_norm": 0.26746803522109985, "learning_rate": 1e-05, "loss": 1.0186, "step": 98795 }, { "epoch": 87.511071744907, "grad_norm": 0.27643832564353943, "learning_rate": 1e-05, "loss": 0.9806, "step": 98800 }, { "epoch": 87.5155004428698, "grad_norm": 0.2607962191104889, "learning_rate": 1e-05, "loss": 0.9527, "step": 98805 }, { "epoch": 87.5199291408326, "grad_norm": 0.25980764627456665, "learning_rate": 1e-05, "loss": 0.9701, "step": 98810 }, { "epoch": 87.5243578387954, "grad_norm": 0.2470128983259201, "learning_rate": 1e-05, "loss": 0.944, "step": 98815 }, { "epoch": 87.52878653675819, "grad_norm": 0.24734635651111603, "learning_rate": 1e-05, "loss": 1.0, "step": 98820 }, { "epoch": 87.533215234721, "grad_norm": 0.24346600472927094, "learning_rate": 1e-05, "loss": 0.9744, "step": 98825 }, { "epoch": 87.53764393268379, "grad_norm": 0.2174825370311737, "learning_rate": 1e-05, "loss": 0.9605, "step": 98830 }, { "epoch": 87.54207263064659, "grad_norm": 0.24301636219024658, "learning_rate": 1e-05, "loss": 0.9563, "step": 98835 }, { "epoch": 87.5465013286094, "grad_norm": 0.2106200009584427, "learning_rate": 1e-05, "loss": 0.9362, "step": 98840 }, { "epoch": 87.55093002657219, "grad_norm": 0.2418515682220459, "learning_rate": 1e-05, "loss": 0.9966, "step": 98845 }, { "epoch": 87.55535872453498, "grad_norm": 0.25311174988746643, "learning_rate": 1e-05, "loss": 0.9601, "step": 98850 }, { "epoch": 87.55978742249779, "grad_norm": 0.19423292577266693, "learning_rate": 1e-05, "loss": 0.9168, "step": 98855 }, { "epoch": 87.56421612046059, "grad_norm": 0.24214698374271393, "learning_rate": 1e-05, "loss": 0.9646, "step": 98860 }, { "epoch": 87.56864481842338, "grad_norm": 0.20991046726703644, "learning_rate": 1e-05, "loss": 0.9279, "step": 98865 }, { "epoch": 87.57307351638619, "grad_norm": 0.2326023131608963, "learning_rate": 1e-05, "loss": 0.9563, "step": 98870 }, { "epoch": 87.57750221434898, "grad_norm": 0.26416683197021484, "learning_rate": 1e-05, "loss": 0.9715, "step": 98875 }, { "epoch": 87.58193091231178, "grad_norm": 0.21936079859733582, "learning_rate": 1e-05, "loss": 0.9007, "step": 98880 }, { "epoch": 87.58635961027458, "grad_norm": 0.23663152754306793, "learning_rate": 1e-05, "loss": 0.9639, "step": 98885 }, { "epoch": 87.59078830823738, "grad_norm": 0.2777262330055237, "learning_rate": 1e-05, "loss": 0.9653, "step": 98890 }, { "epoch": 87.59521700620017, "grad_norm": 0.22472350299358368, "learning_rate": 1e-05, "loss": 0.9095, "step": 98895 }, { "epoch": 87.59964570416298, "grad_norm": 0.3032849431037903, "learning_rate": 1e-05, "loss": 0.9398, "step": 98900 }, { "epoch": 87.60407440212578, "grad_norm": 0.30397355556488037, "learning_rate": 1e-05, "loss": 0.9657, "step": 98905 }, { "epoch": 87.60850310008857, "grad_norm": 0.29726454615592957, "learning_rate": 1e-05, "loss": 0.9654, "step": 98910 }, { "epoch": 87.61293179805138, "grad_norm": 0.30529820919036865, "learning_rate": 1e-05, "loss": 0.9709, "step": 98915 }, { "epoch": 87.61736049601417, "grad_norm": 0.25008270144462585, "learning_rate": 1e-05, "loss": 1.0253, "step": 98920 }, { "epoch": 87.62178919397697, "grad_norm": 0.247336283326149, "learning_rate": 1e-05, "loss": 0.9636, "step": 98925 }, { "epoch": 87.62621789193977, "grad_norm": 0.21041858196258545, "learning_rate": 1e-05, "loss": 0.9754, "step": 98930 }, { "epoch": 87.63064658990257, "grad_norm": 0.20626723766326904, "learning_rate": 1e-05, "loss": 0.9963, "step": 98935 }, { "epoch": 87.63507528786536, "grad_norm": 0.2532220184803009, "learning_rate": 1e-05, "loss": 0.9644, "step": 98940 }, { "epoch": 87.63950398582817, "grad_norm": 0.24755412340164185, "learning_rate": 1e-05, "loss": 0.9176, "step": 98945 }, { "epoch": 87.64393268379096, "grad_norm": 0.23819641768932343, "learning_rate": 1e-05, "loss": 0.971, "step": 98950 }, { "epoch": 87.64836138175376, "grad_norm": 0.22585979104042053, "learning_rate": 1e-05, "loss": 0.9602, "step": 98955 }, { "epoch": 87.65279007971657, "grad_norm": 0.28521087765693665, "learning_rate": 1e-05, "loss": 0.9105, "step": 98960 }, { "epoch": 87.65721877767936, "grad_norm": 0.22838784754276276, "learning_rate": 1e-05, "loss": 0.954, "step": 98965 }, { "epoch": 87.66164747564216, "grad_norm": 0.2483397126197815, "learning_rate": 1e-05, "loss": 0.9401, "step": 98970 }, { "epoch": 87.66607617360496, "grad_norm": 0.23880240321159363, "learning_rate": 1e-05, "loss": 0.9579, "step": 98975 }, { "epoch": 87.67050487156776, "grad_norm": 0.31222718954086304, "learning_rate": 1e-05, "loss": 1.0046, "step": 98980 }, { "epoch": 87.67493356953055, "grad_norm": 0.25406596064567566, "learning_rate": 1e-05, "loss": 0.9511, "step": 98985 }, { "epoch": 87.67936226749336, "grad_norm": 0.2309110462665558, "learning_rate": 1e-05, "loss": 0.93, "step": 98990 }, { "epoch": 87.68379096545615, "grad_norm": 0.2634873390197754, "learning_rate": 1e-05, "loss": 0.9242, "step": 98995 }, { "epoch": 87.68821966341895, "grad_norm": 0.2261311262845993, "learning_rate": 1e-05, "loss": 0.9652, "step": 99000 }, { "epoch": 87.69264836138176, "grad_norm": 0.2464703917503357, "learning_rate": 1e-05, "loss": 0.9855, "step": 99005 }, { "epoch": 87.69707705934455, "grad_norm": 0.22071000933647156, "learning_rate": 1e-05, "loss": 0.9372, "step": 99010 }, { "epoch": 87.70150575730734, "grad_norm": 0.2115013599395752, "learning_rate": 1e-05, "loss": 0.9237, "step": 99015 }, { "epoch": 87.70593445527015, "grad_norm": 0.21408239006996155, "learning_rate": 1e-05, "loss": 0.9131, "step": 99020 }, { "epoch": 87.71036315323295, "grad_norm": 0.23356370627880096, "learning_rate": 1e-05, "loss": 0.9981, "step": 99025 }, { "epoch": 87.71479185119574, "grad_norm": 0.20626841485500336, "learning_rate": 1e-05, "loss": 1.0212, "step": 99030 }, { "epoch": 87.71922054915855, "grad_norm": 0.26001638174057007, "learning_rate": 1e-05, "loss": 0.9538, "step": 99035 }, { "epoch": 87.72364924712134, "grad_norm": 0.2766408920288086, "learning_rate": 1e-05, "loss": 1.0407, "step": 99040 }, { "epoch": 87.72807794508415, "grad_norm": 0.23147602379322052, "learning_rate": 1e-05, "loss": 0.9394, "step": 99045 }, { "epoch": 87.73250664304695, "grad_norm": 0.2566184401512146, "learning_rate": 1e-05, "loss": 0.9828, "step": 99050 }, { "epoch": 87.73693534100974, "grad_norm": 0.22532761096954346, "learning_rate": 1e-05, "loss": 0.9668, "step": 99055 }, { "epoch": 87.74136403897255, "grad_norm": 0.21976672112941742, "learning_rate": 1e-05, "loss": 0.9363, "step": 99060 }, { "epoch": 87.74579273693534, "grad_norm": 0.25288182497024536, "learning_rate": 1e-05, "loss": 0.9388, "step": 99065 }, { "epoch": 87.75022143489814, "grad_norm": 0.23161187767982483, "learning_rate": 1e-05, "loss": 0.9664, "step": 99070 }, { "epoch": 87.75465013286095, "grad_norm": 0.23595760762691498, "learning_rate": 1e-05, "loss": 0.9407, "step": 99075 }, { "epoch": 87.75907883082374, "grad_norm": 0.2467581033706665, "learning_rate": 1e-05, "loss": 1.0116, "step": 99080 }, { "epoch": 87.76350752878653, "grad_norm": 0.21384133398532867, "learning_rate": 1e-05, "loss": 0.9007, "step": 99085 }, { "epoch": 87.76793622674934, "grad_norm": 0.2870183289051056, "learning_rate": 1e-05, "loss": 0.9339, "step": 99090 }, { "epoch": 87.77236492471214, "grad_norm": 0.22450602054595947, "learning_rate": 1e-05, "loss": 0.9403, "step": 99095 }, { "epoch": 87.77679362267493, "grad_norm": 0.24323642253875732, "learning_rate": 1e-05, "loss": 0.9978, "step": 99100 }, { "epoch": 87.78122232063774, "grad_norm": 0.26496610045433044, "learning_rate": 1e-05, "loss": 0.9938, "step": 99105 }, { "epoch": 87.78565101860053, "grad_norm": 0.23802362382411957, "learning_rate": 1e-05, "loss": 0.9852, "step": 99110 }, { "epoch": 87.79007971656333, "grad_norm": 0.2635282278060913, "learning_rate": 1e-05, "loss": 0.9518, "step": 99115 }, { "epoch": 87.79450841452613, "grad_norm": 0.22189420461654663, "learning_rate": 1e-05, "loss": 0.9385, "step": 99120 }, { "epoch": 87.79893711248893, "grad_norm": 0.24880361557006836, "learning_rate": 1e-05, "loss": 0.9498, "step": 99125 }, { "epoch": 87.80336581045172, "grad_norm": 0.24588751792907715, "learning_rate": 1e-05, "loss": 0.9066, "step": 99130 }, { "epoch": 87.80779450841453, "grad_norm": 0.25989171862602234, "learning_rate": 1e-05, "loss": 0.9744, "step": 99135 }, { "epoch": 87.81222320637733, "grad_norm": 0.2163795381784439, "learning_rate": 1e-05, "loss": 0.9202, "step": 99140 }, { "epoch": 87.81665190434012, "grad_norm": 0.24971000850200653, "learning_rate": 1e-05, "loss": 0.9513, "step": 99145 }, { "epoch": 87.82108060230293, "grad_norm": 0.22685950994491577, "learning_rate": 1e-05, "loss": 0.9422, "step": 99150 }, { "epoch": 87.82550930026572, "grad_norm": 0.23735003173351288, "learning_rate": 1e-05, "loss": 0.9225, "step": 99155 }, { "epoch": 87.82993799822852, "grad_norm": 0.2772241234779358, "learning_rate": 1e-05, "loss": 0.926, "step": 99160 }, { "epoch": 87.83436669619132, "grad_norm": 0.21943655610084534, "learning_rate": 1e-05, "loss": 0.9562, "step": 99165 }, { "epoch": 87.83879539415412, "grad_norm": 0.241880863904953, "learning_rate": 1e-05, "loss": 0.9348, "step": 99170 }, { "epoch": 87.84322409211691, "grad_norm": 0.26835131645202637, "learning_rate": 1e-05, "loss": 0.9458, "step": 99175 }, { "epoch": 87.84765279007972, "grad_norm": 0.2499963492155075, "learning_rate": 1e-05, "loss": 0.9688, "step": 99180 }, { "epoch": 87.85208148804251, "grad_norm": 0.21494348347187042, "learning_rate": 1e-05, "loss": 0.9992, "step": 99185 }, { "epoch": 87.85651018600531, "grad_norm": 0.2531016767024994, "learning_rate": 1e-05, "loss": 0.9402, "step": 99190 }, { "epoch": 87.86093888396812, "grad_norm": 0.2302526831626892, "learning_rate": 1e-05, "loss": 0.9589, "step": 99195 }, { "epoch": 87.86536758193091, "grad_norm": 0.24988315999507904, "learning_rate": 1e-05, "loss": 0.9632, "step": 99200 }, { "epoch": 87.8697962798937, "grad_norm": 0.2678731083869934, "learning_rate": 1e-05, "loss": 0.8867, "step": 99205 }, { "epoch": 87.87422497785651, "grad_norm": 0.23875993490219116, "learning_rate": 1e-05, "loss": 0.9842, "step": 99210 }, { "epoch": 87.87865367581931, "grad_norm": 0.21770165860652924, "learning_rate": 1e-05, "loss": 0.9624, "step": 99215 }, { "epoch": 87.8830823737821, "grad_norm": 0.22409257292747498, "learning_rate": 1e-05, "loss": 0.947, "step": 99220 }, { "epoch": 87.88751107174491, "grad_norm": 0.24944385886192322, "learning_rate": 1e-05, "loss": 0.938, "step": 99225 }, { "epoch": 87.8919397697077, "grad_norm": 0.24507126212120056, "learning_rate": 1e-05, "loss": 0.9004, "step": 99230 }, { "epoch": 87.8963684676705, "grad_norm": 0.24941736459732056, "learning_rate": 1e-05, "loss": 0.9581, "step": 99235 }, { "epoch": 87.9007971656333, "grad_norm": 0.2289191484451294, "learning_rate": 1e-05, "loss": 1.0077, "step": 99240 }, { "epoch": 87.9052258635961, "grad_norm": 0.20393109321594238, "learning_rate": 1e-05, "loss": 0.986, "step": 99245 }, { "epoch": 87.9096545615589, "grad_norm": 0.26088833808898926, "learning_rate": 1e-05, "loss": 1.0182, "step": 99250 }, { "epoch": 87.9140832595217, "grad_norm": 0.259015828371048, "learning_rate": 1e-05, "loss": 0.9609, "step": 99255 }, { "epoch": 87.9185119574845, "grad_norm": 0.27033761143684387, "learning_rate": 1e-05, "loss": 0.9725, "step": 99260 }, { "epoch": 87.92294065544729, "grad_norm": 0.19690492749214172, "learning_rate": 1e-05, "loss": 0.9556, "step": 99265 }, { "epoch": 87.9273693534101, "grad_norm": 0.2180897295475006, "learning_rate": 1e-05, "loss": 0.9521, "step": 99270 }, { "epoch": 87.9317980513729, "grad_norm": 0.25377869606018066, "learning_rate": 1e-05, "loss": 0.947, "step": 99275 }, { "epoch": 87.9362267493357, "grad_norm": 0.2251621037721634, "learning_rate": 1e-05, "loss": 0.8883, "step": 99280 }, { "epoch": 87.9406554472985, "grad_norm": 0.25489184260368347, "learning_rate": 1e-05, "loss": 0.9733, "step": 99285 }, { "epoch": 87.94508414526129, "grad_norm": 0.21676458418369293, "learning_rate": 1e-05, "loss": 0.9893, "step": 99290 }, { "epoch": 87.9495128432241, "grad_norm": 0.2695954442024231, "learning_rate": 1e-05, "loss": 0.9915, "step": 99295 }, { "epoch": 87.9539415411869, "grad_norm": 0.2721954882144928, "learning_rate": 1e-05, "loss": 0.9366, "step": 99300 }, { "epoch": 87.95837023914969, "grad_norm": 0.22035151720046997, "learning_rate": 1e-05, "loss": 0.9388, "step": 99305 }, { "epoch": 87.9627989371125, "grad_norm": 0.20629064738750458, "learning_rate": 1e-05, "loss": 0.9336, "step": 99310 }, { "epoch": 87.96722763507529, "grad_norm": 0.25649625062942505, "learning_rate": 1e-05, "loss": 0.9918, "step": 99315 }, { "epoch": 87.97165633303808, "grad_norm": 0.2524641156196594, "learning_rate": 1e-05, "loss": 0.939, "step": 99320 }, { "epoch": 87.97608503100089, "grad_norm": 0.24857378005981445, "learning_rate": 1e-05, "loss": 0.9624, "step": 99325 }, { "epoch": 87.98051372896369, "grad_norm": 0.23496681451797485, "learning_rate": 1e-05, "loss": 0.9715, "step": 99330 }, { "epoch": 87.98494242692648, "grad_norm": 0.2633059620857239, "learning_rate": 1e-05, "loss": 0.9444, "step": 99335 }, { "epoch": 87.98937112488929, "grad_norm": 0.2400464117527008, "learning_rate": 1e-05, "loss": 0.9938, "step": 99340 }, { "epoch": 87.99379982285208, "grad_norm": 0.23970000445842743, "learning_rate": 1e-05, "loss": 0.9409, "step": 99345 }, { "epoch": 87.99822852081488, "grad_norm": 0.24878472089767456, "learning_rate": 1e-05, "loss": 0.9596, "step": 99350 }, { "epoch": 88.00265721877768, "grad_norm": 0.3312890827655792, "learning_rate": 1e-05, "loss": 0.9341, "step": 99355 }, { "epoch": 88.00708591674048, "grad_norm": 0.253065824508667, "learning_rate": 1e-05, "loss": 0.8935, "step": 99360 }, { "epoch": 88.01151461470327, "grad_norm": 0.26019906997680664, "learning_rate": 1e-05, "loss": 0.9276, "step": 99365 }, { "epoch": 88.01594331266608, "grad_norm": 0.22965823113918304, "learning_rate": 1e-05, "loss": 0.9646, "step": 99370 }, { "epoch": 88.02037201062888, "grad_norm": 0.23158767819404602, "learning_rate": 1e-05, "loss": 0.9756, "step": 99375 }, { "epoch": 88.02480070859167, "grad_norm": 0.24661968648433685, "learning_rate": 1e-05, "loss": 0.922, "step": 99380 }, { "epoch": 88.02922940655448, "grad_norm": 0.26257190108299255, "learning_rate": 1e-05, "loss": 0.9603, "step": 99385 }, { "epoch": 88.03365810451727, "grad_norm": 0.27869170904159546, "learning_rate": 1e-05, "loss": 0.9696, "step": 99390 }, { "epoch": 88.03808680248007, "grad_norm": 0.2570676803588867, "learning_rate": 1e-05, "loss": 0.8894, "step": 99395 }, { "epoch": 88.04251550044287, "grad_norm": 0.25156939029693604, "learning_rate": 1e-05, "loss": 0.9731, "step": 99400 }, { "epoch": 88.04694419840567, "grad_norm": 0.21452170610427856, "learning_rate": 1e-05, "loss": 0.8991, "step": 99405 }, { "epoch": 88.05137289636846, "grad_norm": 0.23744866251945496, "learning_rate": 1e-05, "loss": 0.8967, "step": 99410 }, { "epoch": 88.05580159433127, "grad_norm": 0.2338162511587143, "learning_rate": 1e-05, "loss": 0.9855, "step": 99415 }, { "epoch": 88.06023029229407, "grad_norm": 0.250712126493454, "learning_rate": 1e-05, "loss": 0.9116, "step": 99420 }, { "epoch": 88.06465899025686, "grad_norm": 0.22308313846588135, "learning_rate": 1e-05, "loss": 0.9896, "step": 99425 }, { "epoch": 88.06908768821967, "grad_norm": 0.21670743823051453, "learning_rate": 1e-05, "loss": 0.9551, "step": 99430 }, { "epoch": 88.07351638618246, "grad_norm": 0.23527945578098297, "learning_rate": 1e-05, "loss": 0.9809, "step": 99435 }, { "epoch": 88.07794508414526, "grad_norm": 0.23743298649787903, "learning_rate": 1e-05, "loss": 0.997, "step": 99440 }, { "epoch": 88.08237378210806, "grad_norm": 0.2462809681892395, "learning_rate": 1e-05, "loss": 0.9912, "step": 99445 }, { "epoch": 88.08680248007086, "grad_norm": 0.26181820034980774, "learning_rate": 1e-05, "loss": 0.9708, "step": 99450 }, { "epoch": 88.09123117803365, "grad_norm": 0.23750481009483337, "learning_rate": 1e-05, "loss": 0.9902, "step": 99455 }, { "epoch": 88.09565987599646, "grad_norm": 0.27057144045829773, "learning_rate": 1e-05, "loss": 0.9968, "step": 99460 }, { "epoch": 88.10008857395925, "grad_norm": 0.2528458535671234, "learning_rate": 1e-05, "loss": 0.9932, "step": 99465 }, { "epoch": 88.10451727192205, "grad_norm": 0.2581286132335663, "learning_rate": 1e-05, "loss": 0.9413, "step": 99470 }, { "epoch": 88.10894596988486, "grad_norm": 0.24771030247211456, "learning_rate": 1e-05, "loss": 0.9127, "step": 99475 }, { "epoch": 88.11337466784765, "grad_norm": 0.28207796812057495, "learning_rate": 1e-05, "loss": 0.908, "step": 99480 }, { "epoch": 88.11780336581045, "grad_norm": 0.25008776783943176, "learning_rate": 1e-05, "loss": 0.9378, "step": 99485 }, { "epoch": 88.12223206377325, "grad_norm": 0.22862808406352997, "learning_rate": 1e-05, "loss": 0.9378, "step": 99490 }, { "epoch": 88.12666076173605, "grad_norm": 0.22968505322933197, "learning_rate": 1e-05, "loss": 0.9844, "step": 99495 }, { "epoch": 88.13108945969884, "grad_norm": 0.2297797054052353, "learning_rate": 1e-05, "loss": 0.9612, "step": 99500 }, { "epoch": 88.13551815766165, "grad_norm": 0.2316872924566269, "learning_rate": 1e-05, "loss": 1.0568, "step": 99505 }, { "epoch": 88.13994685562444, "grad_norm": 0.2860967516899109, "learning_rate": 1e-05, "loss": 0.9026, "step": 99510 }, { "epoch": 88.14437555358724, "grad_norm": 0.22373518347740173, "learning_rate": 1e-05, "loss": 0.9561, "step": 99515 }, { "epoch": 88.14880425155005, "grad_norm": 0.2924445569515228, "learning_rate": 1e-05, "loss": 0.958, "step": 99520 }, { "epoch": 88.15323294951284, "grad_norm": 0.24673527479171753, "learning_rate": 1e-05, "loss": 1.0068, "step": 99525 }, { "epoch": 88.15766164747565, "grad_norm": 0.2519305348396301, "learning_rate": 1e-05, "loss": 0.9608, "step": 99530 }, { "epoch": 88.16209034543844, "grad_norm": 0.2540664076805115, "learning_rate": 1e-05, "loss": 0.9813, "step": 99535 }, { "epoch": 88.16651904340124, "grad_norm": 0.28010979294776917, "learning_rate": 1e-05, "loss": 0.9658, "step": 99540 }, { "epoch": 88.17094774136405, "grad_norm": 0.2572025954723358, "learning_rate": 1e-05, "loss": 0.9543, "step": 99545 }, { "epoch": 88.17537643932684, "grad_norm": 0.2626935541629791, "learning_rate": 1e-05, "loss": 0.9448, "step": 99550 }, { "epoch": 88.17980513728963, "grad_norm": 0.23510734736919403, "learning_rate": 1e-05, "loss": 0.9491, "step": 99555 }, { "epoch": 88.18423383525244, "grad_norm": 0.2211681753396988, "learning_rate": 1e-05, "loss": 0.9962, "step": 99560 }, { "epoch": 88.18866253321524, "grad_norm": 0.25278133153915405, "learning_rate": 1e-05, "loss": 1.0079, "step": 99565 }, { "epoch": 88.19309123117803, "grad_norm": 0.2636018395423889, "learning_rate": 1e-05, "loss": 0.9203, "step": 99570 }, { "epoch": 88.19751992914084, "grad_norm": 0.29537665843963623, "learning_rate": 1e-05, "loss": 1.0384, "step": 99575 }, { "epoch": 88.20194862710363, "grad_norm": 0.2552962601184845, "learning_rate": 1e-05, "loss": 0.974, "step": 99580 }, { "epoch": 88.20637732506643, "grad_norm": 0.2730923891067505, "learning_rate": 1e-05, "loss": 0.9715, "step": 99585 }, { "epoch": 88.21080602302924, "grad_norm": 0.27667036652565, "learning_rate": 1e-05, "loss": 0.9334, "step": 99590 }, { "epoch": 88.21523472099203, "grad_norm": 0.23600807785987854, "learning_rate": 1e-05, "loss": 0.9452, "step": 99595 }, { "epoch": 88.21966341895482, "grad_norm": 0.23295293748378754, "learning_rate": 1e-05, "loss": 0.9267, "step": 99600 }, { "epoch": 88.22409211691763, "grad_norm": 0.22966763377189636, "learning_rate": 1e-05, "loss": 0.9771, "step": 99605 }, { "epoch": 88.22852081488043, "grad_norm": 0.23855608701705933, "learning_rate": 1e-05, "loss": 0.9531, "step": 99610 }, { "epoch": 88.23294951284322, "grad_norm": 0.2854846119880676, "learning_rate": 1e-05, "loss": 0.9737, "step": 99615 }, { "epoch": 88.23737821080603, "grad_norm": 0.23175206780433655, "learning_rate": 1e-05, "loss": 0.8973, "step": 99620 }, { "epoch": 88.24180690876882, "grad_norm": 0.23563654720783234, "learning_rate": 1e-05, "loss": 0.9761, "step": 99625 }, { "epoch": 88.24623560673162, "grad_norm": 0.2592059373855591, "learning_rate": 1e-05, "loss": 0.9451, "step": 99630 }, { "epoch": 88.25066430469442, "grad_norm": 0.27779364585876465, "learning_rate": 1e-05, "loss": 0.9727, "step": 99635 }, { "epoch": 88.25509300265722, "grad_norm": 0.21161754429340363, "learning_rate": 1e-05, "loss": 0.9338, "step": 99640 }, { "epoch": 88.25952170062001, "grad_norm": 0.2226719707250595, "learning_rate": 1e-05, "loss": 0.9934, "step": 99645 }, { "epoch": 88.26395039858282, "grad_norm": 0.2077556699514389, "learning_rate": 1e-05, "loss": 1.0088, "step": 99650 }, { "epoch": 88.26837909654562, "grad_norm": 0.2503184676170349, "learning_rate": 1e-05, "loss": 0.9927, "step": 99655 }, { "epoch": 88.27280779450841, "grad_norm": 0.26173096895217896, "learning_rate": 1e-05, "loss": 0.9483, "step": 99660 }, { "epoch": 88.27723649247122, "grad_norm": 0.24127070605754852, "learning_rate": 1e-05, "loss": 0.9147, "step": 99665 }, { "epoch": 88.28166519043401, "grad_norm": 0.26643872261047363, "learning_rate": 1e-05, "loss": 0.9091, "step": 99670 }, { "epoch": 88.2860938883968, "grad_norm": 0.22323083877563477, "learning_rate": 1e-05, "loss": 0.9702, "step": 99675 }, { "epoch": 88.29052258635961, "grad_norm": 0.2844523787498474, "learning_rate": 1e-05, "loss": 0.902, "step": 99680 }, { "epoch": 88.29495128432241, "grad_norm": 0.25690388679504395, "learning_rate": 1e-05, "loss": 0.9819, "step": 99685 }, { "epoch": 88.2993799822852, "grad_norm": 0.2573966681957245, "learning_rate": 1e-05, "loss": 0.9521, "step": 99690 }, { "epoch": 88.30380868024801, "grad_norm": 0.20467689633369446, "learning_rate": 1e-05, "loss": 0.8746, "step": 99695 }, { "epoch": 88.3082373782108, "grad_norm": 0.2291315495967865, "learning_rate": 1e-05, "loss": 0.9294, "step": 99700 }, { "epoch": 88.3126660761736, "grad_norm": 0.2611003816127777, "learning_rate": 1e-05, "loss": 1.0118, "step": 99705 }, { "epoch": 88.31709477413641, "grad_norm": 0.22807160019874573, "learning_rate": 1e-05, "loss": 0.9761, "step": 99710 }, { "epoch": 88.3215234720992, "grad_norm": 0.20707808434963226, "learning_rate": 1e-05, "loss": 0.9642, "step": 99715 }, { "epoch": 88.325952170062, "grad_norm": 0.23027831315994263, "learning_rate": 1e-05, "loss": 0.9398, "step": 99720 }, { "epoch": 88.3303808680248, "grad_norm": 0.22548729181289673, "learning_rate": 1e-05, "loss": 0.9848, "step": 99725 }, { "epoch": 88.3348095659876, "grad_norm": 0.2556377649307251, "learning_rate": 1e-05, "loss": 0.9176, "step": 99730 }, { "epoch": 88.33923826395039, "grad_norm": 0.2506229281425476, "learning_rate": 1e-05, "loss": 0.9219, "step": 99735 }, { "epoch": 88.3436669619132, "grad_norm": 0.20109428465366364, "learning_rate": 1e-05, "loss": 0.9415, "step": 99740 }, { "epoch": 88.348095659876, "grad_norm": 0.2601589560508728, "learning_rate": 1e-05, "loss": 0.975, "step": 99745 }, { "epoch": 88.35252435783879, "grad_norm": 0.23489879071712494, "learning_rate": 1e-05, "loss": 0.9559, "step": 99750 }, { "epoch": 88.3569530558016, "grad_norm": 0.2710605263710022, "learning_rate": 1e-05, "loss": 0.9434, "step": 99755 }, { "epoch": 88.36138175376439, "grad_norm": 0.2892511188983917, "learning_rate": 1e-05, "loss": 0.9601, "step": 99760 }, { "epoch": 88.36581045172719, "grad_norm": 0.25111764669418335, "learning_rate": 1e-05, "loss": 0.9576, "step": 99765 }, { "epoch": 88.37023914969, "grad_norm": 0.24800343811511993, "learning_rate": 1e-05, "loss": 0.9688, "step": 99770 }, { "epoch": 88.37466784765279, "grad_norm": 0.24540822207927704, "learning_rate": 1e-05, "loss": 0.9469, "step": 99775 }, { "epoch": 88.3790965456156, "grad_norm": 0.21082238852977753, "learning_rate": 1e-05, "loss": 0.9087, "step": 99780 }, { "epoch": 88.38352524357839, "grad_norm": 0.21847842633724213, "learning_rate": 1e-05, "loss": 1.0169, "step": 99785 }, { "epoch": 88.38795394154118, "grad_norm": 0.24009904265403748, "learning_rate": 1e-05, "loss": 0.9535, "step": 99790 }, { "epoch": 88.39238263950399, "grad_norm": 0.23809295892715454, "learning_rate": 1e-05, "loss": 0.9546, "step": 99795 }, { "epoch": 88.39681133746679, "grad_norm": 0.24205884337425232, "learning_rate": 1e-05, "loss": 0.985, "step": 99800 }, { "epoch": 88.40124003542958, "grad_norm": 0.2568868398666382, "learning_rate": 1e-05, "loss": 0.9644, "step": 99805 }, { "epoch": 88.40566873339239, "grad_norm": 0.2555282413959503, "learning_rate": 1e-05, "loss": 1.0064, "step": 99810 }, { "epoch": 88.41009743135518, "grad_norm": 0.23748254776000977, "learning_rate": 1e-05, "loss": 0.9319, "step": 99815 }, { "epoch": 88.41452612931798, "grad_norm": 0.24691760540008545, "learning_rate": 1e-05, "loss": 0.9241, "step": 99820 }, { "epoch": 88.41895482728079, "grad_norm": 0.28536152839660645, "learning_rate": 1e-05, "loss": 0.9535, "step": 99825 }, { "epoch": 88.42338352524358, "grad_norm": 0.217886820435524, "learning_rate": 1e-05, "loss": 0.9235, "step": 99830 }, { "epoch": 88.42781222320637, "grad_norm": 0.2699647545814514, "learning_rate": 1e-05, "loss": 0.9237, "step": 99835 }, { "epoch": 88.43224092116918, "grad_norm": 0.2297172099351883, "learning_rate": 1e-05, "loss": 0.9257, "step": 99840 }, { "epoch": 88.43666961913198, "grad_norm": 0.23674717545509338, "learning_rate": 1e-05, "loss": 0.9725, "step": 99845 }, { "epoch": 88.44109831709477, "grad_norm": 0.22396232187747955, "learning_rate": 1e-05, "loss": 0.9278, "step": 99850 }, { "epoch": 88.44552701505758, "grad_norm": 0.1978616565465927, "learning_rate": 1e-05, "loss": 0.9665, "step": 99855 }, { "epoch": 88.44995571302037, "grad_norm": 0.22385986149311066, "learning_rate": 1e-05, "loss": 0.9905, "step": 99860 }, { "epoch": 88.45438441098317, "grad_norm": 0.1950816959142685, "learning_rate": 1e-05, "loss": 0.9502, "step": 99865 }, { "epoch": 88.45881310894598, "grad_norm": 0.1992218941450119, "learning_rate": 1e-05, "loss": 0.9235, "step": 99870 }, { "epoch": 88.46324180690877, "grad_norm": 0.23768770694732666, "learning_rate": 1e-05, "loss": 0.9715, "step": 99875 }, { "epoch": 88.46767050487156, "grad_norm": 0.2011580467224121, "learning_rate": 1e-05, "loss": 0.9439, "step": 99880 }, { "epoch": 88.47209920283437, "grad_norm": 0.21293315291404724, "learning_rate": 1e-05, "loss": 0.9228, "step": 99885 }, { "epoch": 88.47652790079717, "grad_norm": 0.22952774167060852, "learning_rate": 1e-05, "loss": 0.9249, "step": 99890 }, { "epoch": 88.48095659875996, "grad_norm": 0.2631010115146637, "learning_rate": 1e-05, "loss": 0.9349, "step": 99895 }, { "epoch": 88.48538529672277, "grad_norm": 0.21053150296211243, "learning_rate": 1e-05, "loss": 0.9453, "step": 99900 }, { "epoch": 88.48981399468556, "grad_norm": 0.22930829226970673, "learning_rate": 1e-05, "loss": 0.9559, "step": 99905 }, { "epoch": 88.49424269264836, "grad_norm": 0.2742099463939667, "learning_rate": 1e-05, "loss": 1.039, "step": 99910 }, { "epoch": 88.49867139061116, "grad_norm": 0.26076045632362366, "learning_rate": 1e-05, "loss": 0.9922, "step": 99915 }, { "epoch": 88.50310008857396, "grad_norm": 0.22787722945213318, "learning_rate": 1e-05, "loss": 0.9578, "step": 99920 }, { "epoch": 88.50752878653675, "grad_norm": 0.24036970734596252, "learning_rate": 1e-05, "loss": 0.9528, "step": 99925 }, { "epoch": 88.51195748449956, "grad_norm": 0.2343783974647522, "learning_rate": 1e-05, "loss": 0.9788, "step": 99930 }, { "epoch": 88.51638618246236, "grad_norm": 0.24172183871269226, "learning_rate": 1e-05, "loss": 0.9868, "step": 99935 }, { "epoch": 88.52081488042515, "grad_norm": 0.26128506660461426, "learning_rate": 1e-05, "loss": 0.9899, "step": 99940 }, { "epoch": 88.52524357838796, "grad_norm": 0.27385762333869934, "learning_rate": 1e-05, "loss": 0.9382, "step": 99945 }, { "epoch": 88.52967227635075, "grad_norm": 0.26363852620124817, "learning_rate": 1e-05, "loss": 0.9205, "step": 99950 }, { "epoch": 88.53410097431355, "grad_norm": 0.29527363181114197, "learning_rate": 1e-05, "loss": 1.0199, "step": 99955 }, { "epoch": 88.53852967227635, "grad_norm": 0.2481592297554016, "learning_rate": 1e-05, "loss": 0.9641, "step": 99960 }, { "epoch": 88.54295837023915, "grad_norm": 0.30731603503227234, "learning_rate": 1e-05, "loss": 0.9601, "step": 99965 }, { "epoch": 88.54738706820194, "grad_norm": 0.27373579144477844, "learning_rate": 1e-05, "loss": 0.9727, "step": 99970 }, { "epoch": 88.55181576616475, "grad_norm": 0.23890335857868195, "learning_rate": 1e-05, "loss": 0.9446, "step": 99975 }, { "epoch": 88.55624446412754, "grad_norm": 0.2325640767812729, "learning_rate": 1e-05, "loss": 0.9862, "step": 99980 }, { "epoch": 88.56067316209034, "grad_norm": 0.2493053525686264, "learning_rate": 1e-05, "loss": 0.9594, "step": 99985 }, { "epoch": 88.56510186005315, "grad_norm": 0.22135327756404877, "learning_rate": 1e-05, "loss": 0.9603, "step": 99990 }, { "epoch": 88.56953055801594, "grad_norm": 0.22047559916973114, "learning_rate": 1e-05, "loss": 1.0289, "step": 99995 }, { "epoch": 88.57395925597874, "grad_norm": 0.22702765464782715, "learning_rate": 1e-05, "loss": 0.9203, "step": 100000 }, { "epoch": 88.57838795394154, "grad_norm": 0.21299892663955688, "learning_rate": 1e-05, "loss": 0.9861, "step": 100005 }, { "epoch": 88.58281665190434, "grad_norm": 0.24758689105510712, "learning_rate": 1e-05, "loss": 0.9625, "step": 100010 }, { "epoch": 88.58724534986715, "grad_norm": 0.2863371670246124, "learning_rate": 1e-05, "loss": 0.9404, "step": 100015 }, { "epoch": 88.59167404782994, "grad_norm": 0.25844478607177734, "learning_rate": 1e-05, "loss": 0.9247, "step": 100020 }, { "epoch": 88.59610274579273, "grad_norm": 0.27765244245529175, "learning_rate": 1e-05, "loss": 0.9736, "step": 100025 }, { "epoch": 88.60053144375554, "grad_norm": 0.29093942046165466, "learning_rate": 1e-05, "loss": 0.9345, "step": 100030 }, { "epoch": 88.60496014171834, "grad_norm": 0.3137884736061096, "learning_rate": 1e-05, "loss": 0.9532, "step": 100035 }, { "epoch": 88.60938883968113, "grad_norm": 0.30530211329460144, "learning_rate": 1e-05, "loss": 0.97, "step": 100040 }, { "epoch": 88.61381753764394, "grad_norm": 0.27440449595451355, "learning_rate": 1e-05, "loss": 0.9419, "step": 100045 }, { "epoch": 88.61824623560673, "grad_norm": 0.27623388171195984, "learning_rate": 1e-05, "loss": 0.9534, "step": 100050 }, { "epoch": 88.62267493356953, "grad_norm": 0.2780875265598297, "learning_rate": 1e-05, "loss": 0.9457, "step": 100055 }, { "epoch": 88.62710363153234, "grad_norm": 0.2932929992675781, "learning_rate": 1e-05, "loss": 0.9398, "step": 100060 }, { "epoch": 88.63153232949513, "grad_norm": 0.2343708574771881, "learning_rate": 1e-05, "loss": 0.9778, "step": 100065 }, { "epoch": 88.63596102745792, "grad_norm": 0.20966751873493195, "learning_rate": 1e-05, "loss": 0.9222, "step": 100070 }, { "epoch": 88.64038972542073, "grad_norm": 0.24885550141334534, "learning_rate": 1e-05, "loss": 0.9453, "step": 100075 }, { "epoch": 88.64481842338353, "grad_norm": 0.29896649718284607, "learning_rate": 1e-05, "loss": 0.9474, "step": 100080 }, { "epoch": 88.64924712134632, "grad_norm": 0.238017275929451, "learning_rate": 1e-05, "loss": 0.9457, "step": 100085 }, { "epoch": 88.65367581930913, "grad_norm": 0.2601117789745331, "learning_rate": 1e-05, "loss": 0.9446, "step": 100090 }, { "epoch": 88.65810451727192, "grad_norm": 0.25714123249053955, "learning_rate": 1e-05, "loss": 0.939, "step": 100095 }, { "epoch": 88.66253321523472, "grad_norm": 0.24347330629825592, "learning_rate": 1e-05, "loss": 0.8983, "step": 100100 }, { "epoch": 88.66696191319753, "grad_norm": 0.24118779599666595, "learning_rate": 1e-05, "loss": 0.9781, "step": 100105 }, { "epoch": 88.67139061116032, "grad_norm": 0.2053503841161728, "learning_rate": 1e-05, "loss": 0.9707, "step": 100110 }, { "epoch": 88.67581930912311, "grad_norm": 0.20410113036632538, "learning_rate": 1e-05, "loss": 0.9485, "step": 100115 }, { "epoch": 88.68024800708592, "grad_norm": 0.20267923176288605, "learning_rate": 1e-05, "loss": 0.9216, "step": 100120 }, { "epoch": 88.68467670504872, "grad_norm": 0.23829081654548645, "learning_rate": 1e-05, "loss": 0.9863, "step": 100125 }, { "epoch": 88.68910540301151, "grad_norm": 0.22634829580783844, "learning_rate": 1e-05, "loss": 0.9789, "step": 100130 }, { "epoch": 88.69353410097432, "grad_norm": 0.25587794184684753, "learning_rate": 1e-05, "loss": 0.9498, "step": 100135 }, { "epoch": 88.69796279893711, "grad_norm": 0.22228361666202545, "learning_rate": 1e-05, "loss": 0.9567, "step": 100140 }, { "epoch": 88.7023914968999, "grad_norm": 0.2489021271467209, "learning_rate": 1e-05, "loss": 0.979, "step": 100145 }, { "epoch": 88.70682019486271, "grad_norm": 0.2050846666097641, "learning_rate": 1e-05, "loss": 0.9668, "step": 100150 }, { "epoch": 88.71124889282551, "grad_norm": 0.28455400466918945, "learning_rate": 1e-05, "loss": 0.9862, "step": 100155 }, { "epoch": 88.7156775907883, "grad_norm": 0.2465212345123291, "learning_rate": 1e-05, "loss": 0.9281, "step": 100160 }, { "epoch": 88.72010628875111, "grad_norm": 0.22401882708072662, "learning_rate": 1e-05, "loss": 1.0004, "step": 100165 }, { "epoch": 88.7245349867139, "grad_norm": 0.23985005915164948, "learning_rate": 1e-05, "loss": 0.9783, "step": 100170 }, { "epoch": 88.7289636846767, "grad_norm": 0.21802686154842377, "learning_rate": 1e-05, "loss": 0.9402, "step": 100175 }, { "epoch": 88.73339238263951, "grad_norm": 0.26026055216789246, "learning_rate": 1e-05, "loss": 0.9403, "step": 100180 }, { "epoch": 88.7378210806023, "grad_norm": 0.22029007971286774, "learning_rate": 1e-05, "loss": 0.9857, "step": 100185 }, { "epoch": 88.7422497785651, "grad_norm": 0.2248016893863678, "learning_rate": 1e-05, "loss": 0.909, "step": 100190 }, { "epoch": 88.7466784765279, "grad_norm": 0.2388961911201477, "learning_rate": 1e-05, "loss": 0.9287, "step": 100195 }, { "epoch": 88.7511071744907, "grad_norm": 0.2143191248178482, "learning_rate": 1e-05, "loss": 0.9594, "step": 100200 }, { "epoch": 88.75553587245349, "grad_norm": 0.2264724224805832, "learning_rate": 1e-05, "loss": 0.9864, "step": 100205 }, { "epoch": 88.7599645704163, "grad_norm": 0.22710593044757843, "learning_rate": 1e-05, "loss": 0.9229, "step": 100210 }, { "epoch": 88.7643932683791, "grad_norm": 0.2247508019208908, "learning_rate": 1e-05, "loss": 0.9728, "step": 100215 }, { "epoch": 88.76882196634189, "grad_norm": 0.29041588306427, "learning_rate": 1e-05, "loss": 0.9735, "step": 100220 }, { "epoch": 88.7732506643047, "grad_norm": 0.2267577201128006, "learning_rate": 1e-05, "loss": 0.9782, "step": 100225 }, { "epoch": 88.77767936226749, "grad_norm": 0.19997918605804443, "learning_rate": 1e-05, "loss": 0.9515, "step": 100230 }, { "epoch": 88.78210806023029, "grad_norm": 0.26032817363739014, "learning_rate": 1e-05, "loss": 0.9817, "step": 100235 }, { "epoch": 88.7865367581931, "grad_norm": 0.269196093082428, "learning_rate": 1e-05, "loss": 0.9615, "step": 100240 }, { "epoch": 88.79096545615589, "grad_norm": 0.24599489569664001, "learning_rate": 1e-05, "loss": 0.9723, "step": 100245 }, { "epoch": 88.79539415411868, "grad_norm": 0.2428746521472931, "learning_rate": 1e-05, "loss": 0.9537, "step": 100250 }, { "epoch": 88.79982285208149, "grad_norm": 0.2269238531589508, "learning_rate": 1e-05, "loss": 0.9839, "step": 100255 }, { "epoch": 88.80425155004428, "grad_norm": 0.22356781363487244, "learning_rate": 1e-05, "loss": 0.9574, "step": 100260 }, { "epoch": 88.8086802480071, "grad_norm": 0.21390698850154877, "learning_rate": 1e-05, "loss": 0.9719, "step": 100265 }, { "epoch": 88.81310894596989, "grad_norm": 0.24619929492473602, "learning_rate": 1e-05, "loss": 0.9059, "step": 100270 }, { "epoch": 88.81753764393268, "grad_norm": 0.29885417222976685, "learning_rate": 1e-05, "loss": 0.9728, "step": 100275 }, { "epoch": 88.82196634189549, "grad_norm": 0.2692887783050537, "learning_rate": 1e-05, "loss": 0.99, "step": 100280 }, { "epoch": 88.82639503985828, "grad_norm": 0.2062106728553772, "learning_rate": 1e-05, "loss": 0.9804, "step": 100285 }, { "epoch": 88.83082373782108, "grad_norm": 0.22567851841449738, "learning_rate": 1e-05, "loss": 0.9985, "step": 100290 }, { "epoch": 88.83525243578389, "grad_norm": 0.2369624674320221, "learning_rate": 1e-05, "loss": 0.9546, "step": 100295 }, { "epoch": 88.83968113374668, "grad_norm": 0.23463581502437592, "learning_rate": 1e-05, "loss": 0.9932, "step": 100300 }, { "epoch": 88.84410983170947, "grad_norm": 0.2131398469209671, "learning_rate": 1e-05, "loss": 0.9492, "step": 100305 }, { "epoch": 88.84853852967228, "grad_norm": 0.22814461588859558, "learning_rate": 1e-05, "loss": 0.9903, "step": 100310 }, { "epoch": 88.85296722763508, "grad_norm": 0.24123625457286835, "learning_rate": 1e-05, "loss": 0.9091, "step": 100315 }, { "epoch": 88.85739592559787, "grad_norm": 0.23458488285541534, "learning_rate": 1e-05, "loss": 0.9477, "step": 100320 }, { "epoch": 88.86182462356068, "grad_norm": 0.2231205850839615, "learning_rate": 1e-05, "loss": 1.0035, "step": 100325 }, { "epoch": 88.86625332152347, "grad_norm": 0.26181745529174805, "learning_rate": 1e-05, "loss": 0.9582, "step": 100330 }, { "epoch": 88.87068201948627, "grad_norm": 0.2155875861644745, "learning_rate": 1e-05, "loss": 0.9196, "step": 100335 }, { "epoch": 88.87511071744908, "grad_norm": 0.21791820228099823, "learning_rate": 1e-05, "loss": 0.9088, "step": 100340 }, { "epoch": 88.87953941541187, "grad_norm": 0.22697681188583374, "learning_rate": 1e-05, "loss": 0.9726, "step": 100345 }, { "epoch": 88.88396811337466, "grad_norm": 0.2161789983510971, "learning_rate": 1e-05, "loss": 0.9721, "step": 100350 }, { "epoch": 88.88839681133747, "grad_norm": 0.26885396242141724, "learning_rate": 1e-05, "loss": 0.9668, "step": 100355 }, { "epoch": 88.89282550930027, "grad_norm": 0.26077714562416077, "learning_rate": 1e-05, "loss": 0.9295, "step": 100360 }, { "epoch": 88.89725420726306, "grad_norm": 0.2938379943370819, "learning_rate": 1e-05, "loss": 0.9382, "step": 100365 }, { "epoch": 88.90168290522587, "grad_norm": 0.2660319209098816, "learning_rate": 1e-05, "loss": 0.9487, "step": 100370 }, { "epoch": 88.90611160318866, "grad_norm": 0.24503764510154724, "learning_rate": 1e-05, "loss": 0.9201, "step": 100375 }, { "epoch": 88.91054030115146, "grad_norm": 0.2380249798297882, "learning_rate": 1e-05, "loss": 0.9262, "step": 100380 }, { "epoch": 88.91496899911427, "grad_norm": 0.25015366077423096, "learning_rate": 1e-05, "loss": 1.0392, "step": 100385 }, { "epoch": 88.91939769707706, "grad_norm": 0.2481878399848938, "learning_rate": 1e-05, "loss": 0.9403, "step": 100390 }, { "epoch": 88.92382639503985, "grad_norm": 0.24644330143928528, "learning_rate": 1e-05, "loss": 0.9484, "step": 100395 }, { "epoch": 88.92825509300266, "grad_norm": 0.2675034999847412, "learning_rate": 1e-05, "loss": 0.9839, "step": 100400 }, { "epoch": 88.93268379096546, "grad_norm": 0.21234582364559174, "learning_rate": 1e-05, "loss": 0.9998, "step": 100405 }, { "epoch": 88.93711248892825, "grad_norm": 0.2354971170425415, "learning_rate": 1e-05, "loss": 0.96, "step": 100410 }, { "epoch": 88.94154118689106, "grad_norm": 0.21754314005374908, "learning_rate": 1e-05, "loss": 0.9491, "step": 100415 }, { "epoch": 88.94596988485385, "grad_norm": 0.24992431700229645, "learning_rate": 1e-05, "loss": 0.9612, "step": 100420 }, { "epoch": 88.95039858281665, "grad_norm": 0.2641519606113434, "learning_rate": 1e-05, "loss": 0.9776, "step": 100425 }, { "epoch": 88.95482728077945, "grad_norm": 0.24620196223258972, "learning_rate": 1e-05, "loss": 0.981, "step": 100430 }, { "epoch": 88.95925597874225, "grad_norm": 0.25087955594062805, "learning_rate": 1e-05, "loss": 0.9852, "step": 100435 }, { "epoch": 88.96368467670504, "grad_norm": 0.217938631772995, "learning_rate": 1e-05, "loss": 0.9821, "step": 100440 }, { "epoch": 88.96811337466785, "grad_norm": 0.23816604912281036, "learning_rate": 1e-05, "loss": 0.9471, "step": 100445 }, { "epoch": 88.97254207263065, "grad_norm": 0.2639926075935364, "learning_rate": 1e-05, "loss": 0.9502, "step": 100450 }, { "epoch": 88.97697077059344, "grad_norm": 0.27573248744010925, "learning_rate": 1e-05, "loss": 0.9396, "step": 100455 }, { "epoch": 88.98139946855625, "grad_norm": 0.24717433750629425, "learning_rate": 1e-05, "loss": 0.9482, "step": 100460 }, { "epoch": 88.98582816651904, "grad_norm": 0.2584714889526367, "learning_rate": 1e-05, "loss": 0.9431, "step": 100465 }, { "epoch": 88.99025686448184, "grad_norm": 0.2804734408855438, "learning_rate": 1e-05, "loss": 0.9823, "step": 100470 }, { "epoch": 88.99468556244464, "grad_norm": 0.2646898627281189, "learning_rate": 1e-05, "loss": 0.9181, "step": 100475 }, { "epoch": 88.99911426040744, "grad_norm": 0.26005032658576965, "learning_rate": 1e-05, "loss": 0.9521, "step": 100480 }, { "epoch": 89.00354295837023, "grad_norm": 0.22763916850090027, "learning_rate": 1e-05, "loss": 0.9263, "step": 100485 }, { "epoch": 89.00797165633304, "grad_norm": 0.2225470393896103, "learning_rate": 1e-05, "loss": 0.9038, "step": 100490 }, { "epoch": 89.01240035429583, "grad_norm": 0.28239724040031433, "learning_rate": 1e-05, "loss": 0.9433, "step": 100495 }, { "epoch": 89.01682905225863, "grad_norm": 0.22660139203071594, "learning_rate": 1e-05, "loss": 1.0072, "step": 100500 }, { "epoch": 89.02125775022144, "grad_norm": 0.2755179703235626, "learning_rate": 1e-05, "loss": 0.9601, "step": 100505 }, { "epoch": 89.02568644818423, "grad_norm": 0.2388431429862976, "learning_rate": 1e-05, "loss": 0.9904, "step": 100510 }, { "epoch": 89.03011514614704, "grad_norm": 0.2778083384037018, "learning_rate": 1e-05, "loss": 0.9399, "step": 100515 }, { "epoch": 89.03454384410983, "grad_norm": 0.22600561380386353, "learning_rate": 1e-05, "loss": 0.9505, "step": 100520 }, { "epoch": 89.03897254207263, "grad_norm": 0.23254676163196564, "learning_rate": 1e-05, "loss": 0.984, "step": 100525 }, { "epoch": 89.04340124003544, "grad_norm": 0.26569318771362305, "learning_rate": 1e-05, "loss": 0.9672, "step": 100530 }, { "epoch": 89.04782993799823, "grad_norm": 0.23208293318748474, "learning_rate": 1e-05, "loss": 1.0029, "step": 100535 }, { "epoch": 89.05225863596102, "grad_norm": 0.22812440991401672, "learning_rate": 1e-05, "loss": 1.0451, "step": 100540 }, { "epoch": 89.05668733392383, "grad_norm": 0.2540033161640167, "learning_rate": 1e-05, "loss": 0.9297, "step": 100545 }, { "epoch": 89.06111603188663, "grad_norm": 0.25510844588279724, "learning_rate": 1e-05, "loss": 0.9832, "step": 100550 }, { "epoch": 89.06554472984942, "grad_norm": 0.22745384275913239, "learning_rate": 1e-05, "loss": 0.9618, "step": 100555 }, { "epoch": 89.06997342781223, "grad_norm": 0.21563567221164703, "learning_rate": 1e-05, "loss": 0.9902, "step": 100560 }, { "epoch": 89.07440212577502, "grad_norm": 0.24625031650066376, "learning_rate": 1e-05, "loss": 0.9934, "step": 100565 }, { "epoch": 89.07883082373782, "grad_norm": 0.22668561339378357, "learning_rate": 1e-05, "loss": 0.9687, "step": 100570 }, { "epoch": 89.08325952170063, "grad_norm": 0.21900923550128937, "learning_rate": 1e-05, "loss": 0.9574, "step": 100575 }, { "epoch": 89.08768821966342, "grad_norm": 0.24717196822166443, "learning_rate": 1e-05, "loss": 0.933, "step": 100580 }, { "epoch": 89.09211691762621, "grad_norm": 0.25774258375167847, "learning_rate": 1e-05, "loss": 0.9591, "step": 100585 }, { "epoch": 89.09654561558902, "grad_norm": 0.30301162600517273, "learning_rate": 1e-05, "loss": 0.9852, "step": 100590 }, { "epoch": 89.10097431355182, "grad_norm": 0.24284137785434723, "learning_rate": 1e-05, "loss": 0.9287, "step": 100595 }, { "epoch": 89.10540301151461, "grad_norm": 0.25048893690109253, "learning_rate": 1e-05, "loss": 0.8973, "step": 100600 }, { "epoch": 89.10983170947742, "grad_norm": 0.27334481477737427, "learning_rate": 1e-05, "loss": 0.9538, "step": 100605 }, { "epoch": 89.11426040744021, "grad_norm": 0.2897074222564697, "learning_rate": 1e-05, "loss": 0.9165, "step": 100610 }, { "epoch": 89.118689105403, "grad_norm": 0.2764134705066681, "learning_rate": 1e-05, "loss": 0.9756, "step": 100615 }, { "epoch": 89.12311780336582, "grad_norm": 0.2903238832950592, "learning_rate": 1e-05, "loss": 0.9257, "step": 100620 }, { "epoch": 89.12754650132861, "grad_norm": 0.24302980303764343, "learning_rate": 1e-05, "loss": 0.9499, "step": 100625 }, { "epoch": 89.1319751992914, "grad_norm": 0.3113268315792084, "learning_rate": 1e-05, "loss": 0.9438, "step": 100630 }, { "epoch": 89.13640389725421, "grad_norm": 0.214813694357872, "learning_rate": 1e-05, "loss": 0.9323, "step": 100635 }, { "epoch": 89.140832595217, "grad_norm": 0.23509356379508972, "learning_rate": 1e-05, "loss": 0.9728, "step": 100640 }, { "epoch": 89.1452612931798, "grad_norm": 0.23588000237941742, "learning_rate": 1e-05, "loss": 0.9344, "step": 100645 }, { "epoch": 89.14968999114261, "grad_norm": 0.22670619189739227, "learning_rate": 1e-05, "loss": 1.0286, "step": 100650 }, { "epoch": 89.1541186891054, "grad_norm": 0.2659050226211548, "learning_rate": 1e-05, "loss": 0.9904, "step": 100655 }, { "epoch": 89.1585473870682, "grad_norm": 0.23737093806266785, "learning_rate": 1e-05, "loss": 0.9302, "step": 100660 }, { "epoch": 89.162976085031, "grad_norm": 0.24317654967308044, "learning_rate": 1e-05, "loss": 0.9512, "step": 100665 }, { "epoch": 89.1674047829938, "grad_norm": 0.24173079431056976, "learning_rate": 1e-05, "loss": 0.9369, "step": 100670 }, { "epoch": 89.1718334809566, "grad_norm": 0.25410574674606323, "learning_rate": 1e-05, "loss": 0.9499, "step": 100675 }, { "epoch": 89.1762621789194, "grad_norm": 0.2642264664173126, "learning_rate": 1e-05, "loss": 1.0312, "step": 100680 }, { "epoch": 89.1806908768822, "grad_norm": 0.2311367243528366, "learning_rate": 1e-05, "loss": 0.9849, "step": 100685 }, { "epoch": 89.18511957484499, "grad_norm": 0.22274249792099, "learning_rate": 1e-05, "loss": 0.9622, "step": 100690 }, { "epoch": 89.1895482728078, "grad_norm": 0.22816705703735352, "learning_rate": 1e-05, "loss": 0.9219, "step": 100695 }, { "epoch": 89.19397697077059, "grad_norm": 0.22125868499279022, "learning_rate": 1e-05, "loss": 0.9821, "step": 100700 }, { "epoch": 89.19840566873339, "grad_norm": 0.24668489396572113, "learning_rate": 1e-05, "loss": 0.9073, "step": 100705 }, { "epoch": 89.2028343666962, "grad_norm": 0.20868946611881256, "learning_rate": 1e-05, "loss": 0.9137, "step": 100710 }, { "epoch": 89.20726306465899, "grad_norm": 0.2574494183063507, "learning_rate": 1e-05, "loss": 0.944, "step": 100715 }, { "epoch": 89.21169176262178, "grad_norm": 0.28075528144836426, "learning_rate": 1e-05, "loss": 0.9643, "step": 100720 }, { "epoch": 89.21612046058459, "grad_norm": 0.22525160014629364, "learning_rate": 1e-05, "loss": 0.9339, "step": 100725 }, { "epoch": 89.22054915854739, "grad_norm": 0.2189497947692871, "learning_rate": 1e-05, "loss": 0.9953, "step": 100730 }, { "epoch": 89.22497785651018, "grad_norm": 0.2552158534526825, "learning_rate": 1e-05, "loss": 0.9464, "step": 100735 }, { "epoch": 89.22940655447299, "grad_norm": 0.2355300486087799, "learning_rate": 1e-05, "loss": 0.9715, "step": 100740 }, { "epoch": 89.23383525243578, "grad_norm": 0.23161034286022186, "learning_rate": 1e-05, "loss": 0.9571, "step": 100745 }, { "epoch": 89.23826395039858, "grad_norm": 0.24285735189914703, "learning_rate": 1e-05, "loss": 0.9713, "step": 100750 }, { "epoch": 89.24269264836138, "grad_norm": 0.24322988092899323, "learning_rate": 1e-05, "loss": 0.9578, "step": 100755 }, { "epoch": 89.24712134632418, "grad_norm": 0.2284044325351715, "learning_rate": 1e-05, "loss": 0.8853, "step": 100760 }, { "epoch": 89.25155004428699, "grad_norm": 0.21393656730651855, "learning_rate": 1e-05, "loss": 0.9605, "step": 100765 }, { "epoch": 89.25597874224978, "grad_norm": 0.2537887394428253, "learning_rate": 1e-05, "loss": 0.9591, "step": 100770 }, { "epoch": 89.26040744021257, "grad_norm": 0.24150004982948303, "learning_rate": 1e-05, "loss": 0.9376, "step": 100775 }, { "epoch": 89.26483613817538, "grad_norm": 0.22063104808330536, "learning_rate": 1e-05, "loss": 0.9831, "step": 100780 }, { "epoch": 89.26926483613818, "grad_norm": 0.250154584646225, "learning_rate": 1e-05, "loss": 0.9092, "step": 100785 }, { "epoch": 89.27369353410097, "grad_norm": 0.2574433982372284, "learning_rate": 1e-05, "loss": 0.9318, "step": 100790 }, { "epoch": 89.27812223206378, "grad_norm": 0.21480466425418854, "learning_rate": 1e-05, "loss": 0.9683, "step": 100795 }, { "epoch": 89.28255093002657, "grad_norm": 0.2599238157272339, "learning_rate": 1e-05, "loss": 0.9915, "step": 100800 }, { "epoch": 89.28697962798937, "grad_norm": 0.23576489090919495, "learning_rate": 1e-05, "loss": 0.9796, "step": 100805 }, { "epoch": 89.29140832595218, "grad_norm": 0.2433391511440277, "learning_rate": 1e-05, "loss": 0.9321, "step": 100810 }, { "epoch": 89.29583702391497, "grad_norm": 0.19983533024787903, "learning_rate": 1e-05, "loss": 0.898, "step": 100815 }, { "epoch": 89.30026572187776, "grad_norm": 0.2664582133293152, "learning_rate": 1e-05, "loss": 0.965, "step": 100820 }, { "epoch": 89.30469441984057, "grad_norm": 0.20401927828788757, "learning_rate": 1e-05, "loss": 0.9939, "step": 100825 }, { "epoch": 89.30912311780337, "grad_norm": 0.22246390581130981, "learning_rate": 1e-05, "loss": 0.9065, "step": 100830 }, { "epoch": 89.31355181576616, "grad_norm": 0.2207382321357727, "learning_rate": 1e-05, "loss": 0.9763, "step": 100835 }, { "epoch": 89.31798051372897, "grad_norm": 0.20225244760513306, "learning_rate": 1e-05, "loss": 0.9245, "step": 100840 }, { "epoch": 89.32240921169176, "grad_norm": 0.23379398882389069, "learning_rate": 1e-05, "loss": 0.9908, "step": 100845 }, { "epoch": 89.32683790965456, "grad_norm": 0.24309130012989044, "learning_rate": 1e-05, "loss": 0.9475, "step": 100850 }, { "epoch": 89.33126660761737, "grad_norm": 0.22226004302501678, "learning_rate": 1e-05, "loss": 0.9364, "step": 100855 }, { "epoch": 89.33569530558016, "grad_norm": 0.2278764247894287, "learning_rate": 1e-05, "loss": 0.9046, "step": 100860 }, { "epoch": 89.34012400354295, "grad_norm": 0.2293863594532013, "learning_rate": 1e-05, "loss": 0.9944, "step": 100865 }, { "epoch": 89.34455270150576, "grad_norm": 0.22405551373958588, "learning_rate": 1e-05, "loss": 0.9268, "step": 100870 }, { "epoch": 89.34898139946856, "grad_norm": 0.25798124074935913, "learning_rate": 1e-05, "loss": 0.9548, "step": 100875 }, { "epoch": 89.35341009743135, "grad_norm": 0.21349021792411804, "learning_rate": 1e-05, "loss": 0.9668, "step": 100880 }, { "epoch": 89.35783879539416, "grad_norm": 0.26878902316093445, "learning_rate": 1e-05, "loss": 0.9386, "step": 100885 }, { "epoch": 89.36226749335695, "grad_norm": 0.2652953267097473, "learning_rate": 1e-05, "loss": 0.9649, "step": 100890 }, { "epoch": 89.36669619131975, "grad_norm": 0.30907854437828064, "learning_rate": 1e-05, "loss": 0.9373, "step": 100895 }, { "epoch": 89.37112488928256, "grad_norm": 0.2491472363471985, "learning_rate": 1e-05, "loss": 0.9765, "step": 100900 }, { "epoch": 89.37555358724535, "grad_norm": 0.22202803194522858, "learning_rate": 1e-05, "loss": 0.9343, "step": 100905 }, { "epoch": 89.37998228520814, "grad_norm": 0.2564280033111572, "learning_rate": 1e-05, "loss": 1.0078, "step": 100910 }, { "epoch": 89.38441098317095, "grad_norm": 0.22271765768527985, "learning_rate": 1e-05, "loss": 0.932, "step": 100915 }, { "epoch": 89.38883968113375, "grad_norm": 0.2571192979812622, "learning_rate": 1e-05, "loss": 0.9194, "step": 100920 }, { "epoch": 89.39326837909654, "grad_norm": 0.2326420545578003, "learning_rate": 1e-05, "loss": 0.9698, "step": 100925 }, { "epoch": 89.39769707705935, "grad_norm": 0.21721535921096802, "learning_rate": 1e-05, "loss": 0.9553, "step": 100930 }, { "epoch": 89.40212577502214, "grad_norm": 0.19836705923080444, "learning_rate": 1e-05, "loss": 0.9861, "step": 100935 }, { "epoch": 89.40655447298494, "grad_norm": 0.24120672047138214, "learning_rate": 1e-05, "loss": 0.9285, "step": 100940 }, { "epoch": 89.41098317094774, "grad_norm": 0.21410706639289856, "learning_rate": 1e-05, "loss": 0.9315, "step": 100945 }, { "epoch": 89.41541186891054, "grad_norm": 0.2139817774295807, "learning_rate": 1e-05, "loss": 0.9614, "step": 100950 }, { "epoch": 89.41984056687333, "grad_norm": 0.2646258771419525, "learning_rate": 1e-05, "loss": 0.9472, "step": 100955 }, { "epoch": 89.42426926483614, "grad_norm": 0.2344912439584732, "learning_rate": 1e-05, "loss": 0.9853, "step": 100960 }, { "epoch": 89.42869796279894, "grad_norm": 0.2454075962305069, "learning_rate": 1e-05, "loss": 0.9803, "step": 100965 }, { "epoch": 89.43312666076173, "grad_norm": 0.24018558859825134, "learning_rate": 1e-05, "loss": 0.9892, "step": 100970 }, { "epoch": 89.43755535872454, "grad_norm": 0.2437729835510254, "learning_rate": 1e-05, "loss": 1.0073, "step": 100975 }, { "epoch": 89.44198405668733, "grad_norm": 0.24139168858528137, "learning_rate": 1e-05, "loss": 0.9701, "step": 100980 }, { "epoch": 89.44641275465013, "grad_norm": 0.3099621832370758, "learning_rate": 1e-05, "loss": 0.9877, "step": 100985 }, { "epoch": 89.45084145261293, "grad_norm": 0.2625819742679596, "learning_rate": 1e-05, "loss": 0.9261, "step": 100990 }, { "epoch": 89.45527015057573, "grad_norm": 0.24205012619495392, "learning_rate": 1e-05, "loss": 0.9463, "step": 100995 }, { "epoch": 89.45969884853854, "grad_norm": 0.29166045784950256, "learning_rate": 1e-05, "loss": 0.9464, "step": 101000 }, { "epoch": 89.46412754650133, "grad_norm": 0.23231516778469086, "learning_rate": 1e-05, "loss": 0.9088, "step": 101005 }, { "epoch": 89.46855624446412, "grad_norm": 0.22558072209358215, "learning_rate": 1e-05, "loss": 0.9286, "step": 101010 }, { "epoch": 89.47298494242693, "grad_norm": 0.30114856362342834, "learning_rate": 1e-05, "loss": 0.9268, "step": 101015 }, { "epoch": 89.47741364038973, "grad_norm": 0.24628520011901855, "learning_rate": 1e-05, "loss": 0.937, "step": 101020 }, { "epoch": 89.48184233835252, "grad_norm": 0.21452312171459198, "learning_rate": 1e-05, "loss": 0.918, "step": 101025 }, { "epoch": 89.48627103631533, "grad_norm": 0.25621384382247925, "learning_rate": 1e-05, "loss": 0.9401, "step": 101030 }, { "epoch": 89.49069973427812, "grad_norm": 0.20334744453430176, "learning_rate": 1e-05, "loss": 0.9702, "step": 101035 }, { "epoch": 89.49512843224092, "grad_norm": 0.219874769449234, "learning_rate": 1e-05, "loss": 0.9798, "step": 101040 }, { "epoch": 89.49955713020373, "grad_norm": 0.21250200271606445, "learning_rate": 1e-05, "loss": 0.9288, "step": 101045 }, { "epoch": 89.50398582816652, "grad_norm": 0.2596737742424011, "learning_rate": 1e-05, "loss": 1.0099, "step": 101050 }, { "epoch": 89.50841452612931, "grad_norm": 0.27819231152534485, "learning_rate": 1e-05, "loss": 0.9187, "step": 101055 }, { "epoch": 89.51284322409212, "grad_norm": 0.2287324219942093, "learning_rate": 1e-05, "loss": 0.8767, "step": 101060 }, { "epoch": 89.51727192205492, "grad_norm": 0.24213194847106934, "learning_rate": 1e-05, "loss": 1.0104, "step": 101065 }, { "epoch": 89.52170062001771, "grad_norm": 0.2717299163341522, "learning_rate": 1e-05, "loss": 0.9455, "step": 101070 }, { "epoch": 89.52612931798052, "grad_norm": 0.23695769906044006, "learning_rate": 1e-05, "loss": 0.9618, "step": 101075 }, { "epoch": 89.53055801594331, "grad_norm": 0.24120908975601196, "learning_rate": 1e-05, "loss": 0.944, "step": 101080 }, { "epoch": 89.53498671390611, "grad_norm": 0.25059378147125244, "learning_rate": 1e-05, "loss": 0.994, "step": 101085 }, { "epoch": 89.53941541186892, "grad_norm": 0.25819337368011475, "learning_rate": 1e-05, "loss": 0.9268, "step": 101090 }, { "epoch": 89.54384410983171, "grad_norm": 0.2337779700756073, "learning_rate": 1e-05, "loss": 0.9328, "step": 101095 }, { "epoch": 89.5482728077945, "grad_norm": 0.22248628735542297, "learning_rate": 1e-05, "loss": 1.0027, "step": 101100 }, { "epoch": 89.55270150575731, "grad_norm": 0.29555508494377136, "learning_rate": 1e-05, "loss": 0.9367, "step": 101105 }, { "epoch": 89.5571302037201, "grad_norm": 0.26678186655044556, "learning_rate": 1e-05, "loss": 0.9485, "step": 101110 }, { "epoch": 89.5615589016829, "grad_norm": 0.20290780067443848, "learning_rate": 1e-05, "loss": 0.9438, "step": 101115 }, { "epoch": 89.56598759964571, "grad_norm": 0.2417866438627243, "learning_rate": 1e-05, "loss": 0.9611, "step": 101120 }, { "epoch": 89.5704162976085, "grad_norm": 0.25751861929893494, "learning_rate": 1e-05, "loss": 0.9297, "step": 101125 }, { "epoch": 89.5748449955713, "grad_norm": 0.22269615530967712, "learning_rate": 1e-05, "loss": 0.9668, "step": 101130 }, { "epoch": 89.5792736935341, "grad_norm": 0.23602698743343353, "learning_rate": 1e-05, "loss": 0.9104, "step": 101135 }, { "epoch": 89.5837023914969, "grad_norm": 0.27046141028404236, "learning_rate": 1e-05, "loss": 0.9257, "step": 101140 }, { "epoch": 89.5881310894597, "grad_norm": 0.2539267838001251, "learning_rate": 1e-05, "loss": 0.9758, "step": 101145 }, { "epoch": 89.5925597874225, "grad_norm": 0.28185129165649414, "learning_rate": 1e-05, "loss": 0.9597, "step": 101150 }, { "epoch": 89.5969884853853, "grad_norm": 0.2545854449272156, "learning_rate": 1e-05, "loss": 0.9792, "step": 101155 }, { "epoch": 89.60141718334809, "grad_norm": 0.23088335990905762, "learning_rate": 1e-05, "loss": 0.9472, "step": 101160 }, { "epoch": 89.6058458813109, "grad_norm": 0.23168882727622986, "learning_rate": 1e-05, "loss": 0.9971, "step": 101165 }, { "epoch": 89.61027457927369, "grad_norm": 0.23449388146400452, "learning_rate": 1e-05, "loss": 0.9677, "step": 101170 }, { "epoch": 89.61470327723649, "grad_norm": 0.26414287090301514, "learning_rate": 1e-05, "loss": 0.9456, "step": 101175 }, { "epoch": 89.6191319751993, "grad_norm": 0.31195399165153503, "learning_rate": 1e-05, "loss": 0.9889, "step": 101180 }, { "epoch": 89.62356067316209, "grad_norm": 0.22076132893562317, "learning_rate": 1e-05, "loss": 0.9578, "step": 101185 }, { "epoch": 89.62798937112488, "grad_norm": 0.23083002865314484, "learning_rate": 1e-05, "loss": 0.9915, "step": 101190 }, { "epoch": 89.63241806908769, "grad_norm": 0.20712360739707947, "learning_rate": 1e-05, "loss": 0.9201, "step": 101195 }, { "epoch": 89.63684676705049, "grad_norm": 0.21525146067142487, "learning_rate": 1e-05, "loss": 0.9693, "step": 101200 }, { "epoch": 89.64127546501328, "grad_norm": 0.2378852814435959, "learning_rate": 1e-05, "loss": 0.9352, "step": 101205 }, { "epoch": 89.64570416297609, "grad_norm": 0.24321012198925018, "learning_rate": 1e-05, "loss": 0.9338, "step": 101210 }, { "epoch": 89.65013286093888, "grad_norm": 0.2689436972141266, "learning_rate": 1e-05, "loss": 1.0165, "step": 101215 }, { "epoch": 89.65456155890168, "grad_norm": 0.21890833973884583, "learning_rate": 1e-05, "loss": 0.9169, "step": 101220 }, { "epoch": 89.65899025686448, "grad_norm": 0.23341307044029236, "learning_rate": 1e-05, "loss": 0.9667, "step": 101225 }, { "epoch": 89.66341895482728, "grad_norm": 0.2196146845817566, "learning_rate": 1e-05, "loss": 0.9754, "step": 101230 }, { "epoch": 89.66784765279007, "grad_norm": 0.30614590644836426, "learning_rate": 1e-05, "loss": 0.9534, "step": 101235 }, { "epoch": 89.67227635075288, "grad_norm": 0.31138527393341064, "learning_rate": 1e-05, "loss": 0.9678, "step": 101240 }, { "epoch": 89.67670504871568, "grad_norm": 0.28225719928741455, "learning_rate": 1e-05, "loss": 0.998, "step": 101245 }, { "epoch": 89.68113374667848, "grad_norm": 0.2552628815174103, "learning_rate": 1e-05, "loss": 0.9577, "step": 101250 }, { "epoch": 89.68556244464128, "grad_norm": 0.21399152278900146, "learning_rate": 1e-05, "loss": 0.9959, "step": 101255 }, { "epoch": 89.68999114260407, "grad_norm": 0.2505783438682556, "learning_rate": 1e-05, "loss": 0.9517, "step": 101260 }, { "epoch": 89.69441984056688, "grad_norm": 0.2451978623867035, "learning_rate": 1e-05, "loss": 0.8823, "step": 101265 }, { "epoch": 89.69884853852967, "grad_norm": 0.2342648208141327, "learning_rate": 1e-05, "loss": 0.969, "step": 101270 }, { "epoch": 89.70327723649247, "grad_norm": 0.2624475359916687, "learning_rate": 1e-05, "loss": 0.9627, "step": 101275 }, { "epoch": 89.70770593445528, "grad_norm": 0.23838311433792114, "learning_rate": 1e-05, "loss": 0.9557, "step": 101280 }, { "epoch": 89.71213463241807, "grad_norm": 0.3192882835865021, "learning_rate": 1e-05, "loss": 0.9804, "step": 101285 }, { "epoch": 89.71656333038086, "grad_norm": 0.26912227272987366, "learning_rate": 1e-05, "loss": 1.0317, "step": 101290 }, { "epoch": 89.72099202834367, "grad_norm": 0.21121801435947418, "learning_rate": 1e-05, "loss": 0.9502, "step": 101295 }, { "epoch": 89.72542072630647, "grad_norm": 0.2362804114818573, "learning_rate": 1e-05, "loss": 0.9161, "step": 101300 }, { "epoch": 89.72984942426926, "grad_norm": 0.22550930082798004, "learning_rate": 1e-05, "loss": 0.9674, "step": 101305 }, { "epoch": 89.73427812223207, "grad_norm": 0.3003152906894684, "learning_rate": 1e-05, "loss": 0.958, "step": 101310 }, { "epoch": 89.73870682019486, "grad_norm": 0.21749617159366608, "learning_rate": 1e-05, "loss": 0.934, "step": 101315 }, { "epoch": 89.74313551815766, "grad_norm": 0.2600935101509094, "learning_rate": 1e-05, "loss": 0.9903, "step": 101320 }, { "epoch": 89.74756421612047, "grad_norm": 0.27593857049942017, "learning_rate": 1e-05, "loss": 0.9469, "step": 101325 }, { "epoch": 89.75199291408326, "grad_norm": 0.25915616750717163, "learning_rate": 1e-05, "loss": 0.9477, "step": 101330 }, { "epoch": 89.75642161204605, "grad_norm": 0.2346881777048111, "learning_rate": 1e-05, "loss": 0.9631, "step": 101335 }, { "epoch": 89.76085031000886, "grad_norm": 0.20665375888347626, "learning_rate": 1e-05, "loss": 0.9754, "step": 101340 }, { "epoch": 89.76527900797166, "grad_norm": 0.23834674060344696, "learning_rate": 1e-05, "loss": 0.9516, "step": 101345 }, { "epoch": 89.76970770593445, "grad_norm": 0.2480219155550003, "learning_rate": 1e-05, "loss": 0.9806, "step": 101350 }, { "epoch": 89.77413640389726, "grad_norm": 0.22663390636444092, "learning_rate": 1e-05, "loss": 0.9829, "step": 101355 }, { "epoch": 89.77856510186005, "grad_norm": 0.2249283343553543, "learning_rate": 1e-05, "loss": 0.932, "step": 101360 }, { "epoch": 89.78299379982285, "grad_norm": 0.27349337935447693, "learning_rate": 1e-05, "loss": 0.9401, "step": 101365 }, { "epoch": 89.78742249778566, "grad_norm": 0.2681344449520111, "learning_rate": 1e-05, "loss": 0.914, "step": 101370 }, { "epoch": 89.79185119574845, "grad_norm": 0.2435312569141388, "learning_rate": 1e-05, "loss": 0.9333, "step": 101375 }, { "epoch": 89.79627989371124, "grad_norm": 0.22463159263134003, "learning_rate": 1e-05, "loss": 0.9237, "step": 101380 }, { "epoch": 89.80070859167405, "grad_norm": 0.22619637846946716, "learning_rate": 1e-05, "loss": 0.946, "step": 101385 }, { "epoch": 89.80513728963685, "grad_norm": 0.2531736493110657, "learning_rate": 1e-05, "loss": 0.9633, "step": 101390 }, { "epoch": 89.80956598759964, "grad_norm": 0.26272615790367126, "learning_rate": 1e-05, "loss": 0.9531, "step": 101395 }, { "epoch": 89.81399468556245, "grad_norm": 0.21618276834487915, "learning_rate": 1e-05, "loss": 0.9045, "step": 101400 }, { "epoch": 89.81842338352524, "grad_norm": 0.2515418231487274, "learning_rate": 1e-05, "loss": 0.9358, "step": 101405 }, { "epoch": 89.82285208148804, "grad_norm": 0.25076019763946533, "learning_rate": 1e-05, "loss": 0.9866, "step": 101410 }, { "epoch": 89.82728077945085, "grad_norm": 0.2517538368701935, "learning_rate": 1e-05, "loss": 0.9265, "step": 101415 }, { "epoch": 89.83170947741364, "grad_norm": 0.20044861733913422, "learning_rate": 1e-05, "loss": 0.9733, "step": 101420 }, { "epoch": 89.83613817537643, "grad_norm": 0.23109887540340424, "learning_rate": 1e-05, "loss": 0.984, "step": 101425 }, { "epoch": 89.84056687333924, "grad_norm": 0.24378788471221924, "learning_rate": 1e-05, "loss": 0.9593, "step": 101430 }, { "epoch": 89.84499557130204, "grad_norm": 0.20190908014774323, "learning_rate": 1e-05, "loss": 0.9226, "step": 101435 }, { "epoch": 89.84942426926483, "grad_norm": 0.24551331996917725, "learning_rate": 1e-05, "loss": 0.9262, "step": 101440 }, { "epoch": 89.85385296722764, "grad_norm": 0.2267090380191803, "learning_rate": 1e-05, "loss": 0.8884, "step": 101445 }, { "epoch": 89.85828166519043, "grad_norm": 0.2133764922618866, "learning_rate": 1e-05, "loss": 0.9845, "step": 101450 }, { "epoch": 89.86271036315323, "grad_norm": 0.21808011829853058, "learning_rate": 1e-05, "loss": 1.0096, "step": 101455 }, { "epoch": 89.86713906111603, "grad_norm": 0.23564226925373077, "learning_rate": 1e-05, "loss": 0.9752, "step": 101460 }, { "epoch": 89.87156775907883, "grad_norm": 0.2676661014556885, "learning_rate": 1e-05, "loss": 0.9575, "step": 101465 }, { "epoch": 89.87599645704162, "grad_norm": 0.2114715725183487, "learning_rate": 1e-05, "loss": 0.9715, "step": 101470 }, { "epoch": 89.88042515500443, "grad_norm": 0.22617237269878387, "learning_rate": 1e-05, "loss": 0.939, "step": 101475 }, { "epoch": 89.88485385296723, "grad_norm": 0.22413133084774017, "learning_rate": 1e-05, "loss": 0.9932, "step": 101480 }, { "epoch": 89.88928255093003, "grad_norm": 0.28835514187812805, "learning_rate": 1e-05, "loss": 0.9621, "step": 101485 }, { "epoch": 89.89371124889283, "grad_norm": 0.26613354682922363, "learning_rate": 1e-05, "loss": 0.9658, "step": 101490 }, { "epoch": 89.89813994685562, "grad_norm": 0.2634512782096863, "learning_rate": 1e-05, "loss": 0.964, "step": 101495 }, { "epoch": 89.90256864481843, "grad_norm": 0.24423576891422272, "learning_rate": 1e-05, "loss": 1.0027, "step": 101500 }, { "epoch": 89.90699734278122, "grad_norm": 0.2354712188243866, "learning_rate": 1e-05, "loss": 0.9243, "step": 101505 }, { "epoch": 89.91142604074402, "grad_norm": 0.2415490448474884, "learning_rate": 1e-05, "loss": 0.9353, "step": 101510 }, { "epoch": 89.91585473870683, "grad_norm": 0.21499145030975342, "learning_rate": 1e-05, "loss": 0.995, "step": 101515 }, { "epoch": 89.92028343666962, "grad_norm": 0.21010562777519226, "learning_rate": 1e-05, "loss": 0.9115, "step": 101520 }, { "epoch": 89.92471213463241, "grad_norm": 0.21463963389396667, "learning_rate": 1e-05, "loss": 0.9165, "step": 101525 }, { "epoch": 89.92914083259522, "grad_norm": 0.2589956223964691, "learning_rate": 1e-05, "loss": 0.8772, "step": 101530 }, { "epoch": 89.93356953055802, "grad_norm": 0.23022598028182983, "learning_rate": 1e-05, "loss": 0.9619, "step": 101535 }, { "epoch": 89.93799822852081, "grad_norm": 0.24445129930973053, "learning_rate": 1e-05, "loss": 0.953, "step": 101540 }, { "epoch": 89.94242692648362, "grad_norm": 0.2590886950492859, "learning_rate": 1e-05, "loss": 0.9039, "step": 101545 }, { "epoch": 89.94685562444641, "grad_norm": 0.2533506453037262, "learning_rate": 1e-05, "loss": 0.9011, "step": 101550 }, { "epoch": 89.95128432240921, "grad_norm": 0.24357374012470245, "learning_rate": 1e-05, "loss": 0.9339, "step": 101555 }, { "epoch": 89.95571302037202, "grad_norm": 0.23195527493953705, "learning_rate": 1e-05, "loss": 0.9594, "step": 101560 }, { "epoch": 89.96014171833481, "grad_norm": 0.2404184639453888, "learning_rate": 1e-05, "loss": 0.9536, "step": 101565 }, { "epoch": 89.9645704162976, "grad_norm": 0.21428319811820984, "learning_rate": 1e-05, "loss": 0.959, "step": 101570 }, { "epoch": 89.96899911426041, "grad_norm": 0.2308656871318817, "learning_rate": 1e-05, "loss": 0.8916, "step": 101575 }, { "epoch": 89.9734278122232, "grad_norm": 0.2555184066295624, "learning_rate": 1e-05, "loss": 1.0202, "step": 101580 }, { "epoch": 89.977856510186, "grad_norm": 0.2408762276172638, "learning_rate": 1e-05, "loss": 0.9182, "step": 101585 }, { "epoch": 89.98228520814881, "grad_norm": 0.23161953687667847, "learning_rate": 1e-05, "loss": 0.9095, "step": 101590 }, { "epoch": 89.9867139061116, "grad_norm": 0.2480221390724182, "learning_rate": 1e-05, "loss": 0.9747, "step": 101595 }, { "epoch": 89.9911426040744, "grad_norm": 0.23990701138973236, "learning_rate": 1e-05, "loss": 0.9489, "step": 101600 }, { "epoch": 89.9955713020372, "grad_norm": 0.27477511763572693, "learning_rate": 1e-05, "loss": 0.933, "step": 101605 }, { "epoch": 90.0, "grad_norm": 0.2638186514377594, "learning_rate": 1e-05, "loss": 0.9944, "step": 101610 }, { "epoch": 90.0044286979628, "grad_norm": 0.21731531620025635, "learning_rate": 1e-05, "loss": 0.9111, "step": 101615 }, { "epoch": 90.0088573959256, "grad_norm": 0.22683370113372803, "learning_rate": 1e-05, "loss": 0.9709, "step": 101620 }, { "epoch": 90.0132860938884, "grad_norm": 0.21489375829696655, "learning_rate": 1e-05, "loss": 0.985, "step": 101625 }, { "epoch": 90.01771479185119, "grad_norm": 0.21699175238609314, "learning_rate": 1e-05, "loss": 0.9572, "step": 101630 }, { "epoch": 90.022143489814, "grad_norm": 0.2774849832057953, "learning_rate": 1e-05, "loss": 0.9579, "step": 101635 }, { "epoch": 90.0265721877768, "grad_norm": 0.23622387647628784, "learning_rate": 1e-05, "loss": 1.0085, "step": 101640 }, { "epoch": 90.03100088573959, "grad_norm": 0.2810623049736023, "learning_rate": 1e-05, "loss": 0.991, "step": 101645 }, { "epoch": 90.0354295837024, "grad_norm": 0.27120575308799744, "learning_rate": 1e-05, "loss": 0.9153, "step": 101650 }, { "epoch": 90.03985828166519, "grad_norm": 0.22164461016654968, "learning_rate": 1e-05, "loss": 0.9705, "step": 101655 }, { "epoch": 90.04428697962798, "grad_norm": 0.2398461103439331, "learning_rate": 1e-05, "loss": 0.984, "step": 101660 }, { "epoch": 90.04871567759079, "grad_norm": 0.2734074294567108, "learning_rate": 1e-05, "loss": 0.8878, "step": 101665 }, { "epoch": 90.05314437555359, "grad_norm": 0.2700138986110687, "learning_rate": 1e-05, "loss": 0.9974, "step": 101670 }, { "epoch": 90.05757307351638, "grad_norm": 0.29804757237434387, "learning_rate": 1e-05, "loss": 0.9271, "step": 101675 }, { "epoch": 90.06200177147919, "grad_norm": 0.2095632404088974, "learning_rate": 1e-05, "loss": 0.8943, "step": 101680 }, { "epoch": 90.06643046944198, "grad_norm": 0.22926340997219086, "learning_rate": 1e-05, "loss": 0.971, "step": 101685 }, { "epoch": 90.07085916740478, "grad_norm": 0.23530203104019165, "learning_rate": 1e-05, "loss": 0.9995, "step": 101690 }, { "epoch": 90.07528786536759, "grad_norm": 0.21834242343902588, "learning_rate": 1e-05, "loss": 0.9731, "step": 101695 }, { "epoch": 90.07971656333038, "grad_norm": 0.25071537494659424, "learning_rate": 1e-05, "loss": 0.9326, "step": 101700 }, { "epoch": 90.08414526129317, "grad_norm": 0.32564565539360046, "learning_rate": 1e-05, "loss": 0.9487, "step": 101705 }, { "epoch": 90.08857395925598, "grad_norm": 0.2299775630235672, "learning_rate": 1e-05, "loss": 1.0234, "step": 101710 }, { "epoch": 90.09300265721878, "grad_norm": 0.23191912472248077, "learning_rate": 1e-05, "loss": 0.9342, "step": 101715 }, { "epoch": 90.09743135518157, "grad_norm": 0.22682884335517883, "learning_rate": 1e-05, "loss": 0.9756, "step": 101720 }, { "epoch": 90.10186005314438, "grad_norm": 0.2016848772764206, "learning_rate": 1e-05, "loss": 0.961, "step": 101725 }, { "epoch": 90.10628875110717, "grad_norm": 0.22069627046585083, "learning_rate": 1e-05, "loss": 0.9793, "step": 101730 }, { "epoch": 90.11071744906998, "grad_norm": 0.23564662039279938, "learning_rate": 1e-05, "loss": 0.97, "step": 101735 }, { "epoch": 90.11514614703277, "grad_norm": 0.2687149941921234, "learning_rate": 1e-05, "loss": 1.0093, "step": 101740 }, { "epoch": 90.11957484499557, "grad_norm": 0.27545130252838135, "learning_rate": 1e-05, "loss": 0.9399, "step": 101745 }, { "epoch": 90.12400354295838, "grad_norm": 0.2593679428100586, "learning_rate": 1e-05, "loss": 0.9327, "step": 101750 }, { "epoch": 90.12843224092117, "grad_norm": 0.2439153641462326, "learning_rate": 1e-05, "loss": 0.9679, "step": 101755 }, { "epoch": 90.13286093888397, "grad_norm": 0.22765451669692993, "learning_rate": 1e-05, "loss": 1.0048, "step": 101760 }, { "epoch": 90.13728963684677, "grad_norm": 0.2597343921661377, "learning_rate": 1e-05, "loss": 0.9477, "step": 101765 }, { "epoch": 90.14171833480957, "grad_norm": 0.24106836318969727, "learning_rate": 1e-05, "loss": 0.963, "step": 101770 }, { "epoch": 90.14614703277236, "grad_norm": 0.2886425256729126, "learning_rate": 1e-05, "loss": 0.9554, "step": 101775 }, { "epoch": 90.15057573073517, "grad_norm": 0.2757726013660431, "learning_rate": 1e-05, "loss": 0.9902, "step": 101780 }, { "epoch": 90.15500442869796, "grad_norm": 0.2336672991514206, "learning_rate": 1e-05, "loss": 0.9419, "step": 101785 }, { "epoch": 90.15943312666076, "grad_norm": 0.24112758040428162, "learning_rate": 1e-05, "loss": 0.9604, "step": 101790 }, { "epoch": 90.16386182462357, "grad_norm": 0.22675307095050812, "learning_rate": 1e-05, "loss": 0.9349, "step": 101795 }, { "epoch": 90.16829052258636, "grad_norm": 0.29166585206985474, "learning_rate": 1e-05, "loss": 0.947, "step": 101800 }, { "epoch": 90.17271922054915, "grad_norm": 0.2473752200603485, "learning_rate": 1e-05, "loss": 0.9796, "step": 101805 }, { "epoch": 90.17714791851196, "grad_norm": 0.25430354475975037, "learning_rate": 1e-05, "loss": 0.9648, "step": 101810 }, { "epoch": 90.18157661647476, "grad_norm": 0.2628345489501953, "learning_rate": 1e-05, "loss": 0.9522, "step": 101815 }, { "epoch": 90.18600531443755, "grad_norm": 0.2560077905654907, "learning_rate": 1e-05, "loss": 0.9028, "step": 101820 }, { "epoch": 90.19043401240036, "grad_norm": 0.2639038562774658, "learning_rate": 1e-05, "loss": 0.9118, "step": 101825 }, { "epoch": 90.19486271036315, "grad_norm": 0.22030985355377197, "learning_rate": 1e-05, "loss": 0.9123, "step": 101830 }, { "epoch": 90.19929140832595, "grad_norm": 0.24887403845787048, "learning_rate": 1e-05, "loss": 0.9596, "step": 101835 }, { "epoch": 90.20372010628876, "grad_norm": 0.22400911152362823, "learning_rate": 1e-05, "loss": 0.8926, "step": 101840 }, { "epoch": 90.20814880425155, "grad_norm": 0.2544662356376648, "learning_rate": 1e-05, "loss": 0.9169, "step": 101845 }, { "epoch": 90.21257750221434, "grad_norm": 0.25758492946624756, "learning_rate": 1e-05, "loss": 0.9538, "step": 101850 }, { "epoch": 90.21700620017715, "grad_norm": 0.22494804859161377, "learning_rate": 1e-05, "loss": 0.993, "step": 101855 }, { "epoch": 90.22143489813995, "grad_norm": 0.3007393479347229, "learning_rate": 1e-05, "loss": 0.9514, "step": 101860 }, { "epoch": 90.22586359610274, "grad_norm": 0.2298215627670288, "learning_rate": 1e-05, "loss": 0.954, "step": 101865 }, { "epoch": 90.23029229406555, "grad_norm": 0.22728070616722107, "learning_rate": 1e-05, "loss": 0.9144, "step": 101870 }, { "epoch": 90.23472099202834, "grad_norm": 0.21045230329036713, "learning_rate": 1e-05, "loss": 0.9209, "step": 101875 }, { "epoch": 90.23914968999114, "grad_norm": 0.22615285217761993, "learning_rate": 1e-05, "loss": 0.9666, "step": 101880 }, { "epoch": 90.24357838795395, "grad_norm": 0.2574142515659332, "learning_rate": 1e-05, "loss": 1.0011, "step": 101885 }, { "epoch": 90.24800708591674, "grad_norm": 0.24605898559093475, "learning_rate": 1e-05, "loss": 0.9924, "step": 101890 }, { "epoch": 90.25243578387953, "grad_norm": 0.23315763473510742, "learning_rate": 1e-05, "loss": 0.9565, "step": 101895 }, { "epoch": 90.25686448184234, "grad_norm": 0.24179145693778992, "learning_rate": 1e-05, "loss": 0.9951, "step": 101900 }, { "epoch": 90.26129317980514, "grad_norm": 0.2404242306947708, "learning_rate": 1e-05, "loss": 0.9189, "step": 101905 }, { "epoch": 90.26572187776793, "grad_norm": 0.26178833842277527, "learning_rate": 1e-05, "loss": 0.9402, "step": 101910 }, { "epoch": 90.27015057573074, "grad_norm": 0.24088487029075623, "learning_rate": 1e-05, "loss": 0.9617, "step": 101915 }, { "epoch": 90.27457927369353, "grad_norm": 0.24002672731876373, "learning_rate": 1e-05, "loss": 0.963, "step": 101920 }, { "epoch": 90.27900797165633, "grad_norm": 0.2565065920352936, "learning_rate": 1e-05, "loss": 0.9227, "step": 101925 }, { "epoch": 90.28343666961914, "grad_norm": 0.24138912558555603, "learning_rate": 1e-05, "loss": 0.9647, "step": 101930 }, { "epoch": 90.28786536758193, "grad_norm": 0.2719273567199707, "learning_rate": 1e-05, "loss": 1.0016, "step": 101935 }, { "epoch": 90.29229406554472, "grad_norm": 0.20821243524551392, "learning_rate": 1e-05, "loss": 0.9483, "step": 101940 }, { "epoch": 90.29672276350753, "grad_norm": 0.22810816764831543, "learning_rate": 1e-05, "loss": 0.9592, "step": 101945 }, { "epoch": 90.30115146147033, "grad_norm": 0.23754476010799408, "learning_rate": 1e-05, "loss": 0.9445, "step": 101950 }, { "epoch": 90.30558015943312, "grad_norm": 0.21767400205135345, "learning_rate": 1e-05, "loss": 0.9383, "step": 101955 }, { "epoch": 90.31000885739593, "grad_norm": 0.2229224145412445, "learning_rate": 1e-05, "loss": 0.9733, "step": 101960 }, { "epoch": 90.31443755535872, "grad_norm": 0.24212396144866943, "learning_rate": 1e-05, "loss": 0.9265, "step": 101965 }, { "epoch": 90.31886625332152, "grad_norm": 0.26202520728111267, "learning_rate": 1e-05, "loss": 0.9184, "step": 101970 }, { "epoch": 90.32329495128432, "grad_norm": 0.2544514536857605, "learning_rate": 1e-05, "loss": 0.9863, "step": 101975 }, { "epoch": 90.32772364924712, "grad_norm": 0.24189475178718567, "learning_rate": 1e-05, "loss": 0.9957, "step": 101980 }, { "epoch": 90.33215234720993, "grad_norm": 0.22791410982608795, "learning_rate": 1e-05, "loss": 0.9166, "step": 101985 }, { "epoch": 90.33658104517272, "grad_norm": 0.25980496406555176, "learning_rate": 1e-05, "loss": 0.9404, "step": 101990 }, { "epoch": 90.34100974313552, "grad_norm": 0.2330830991268158, "learning_rate": 1e-05, "loss": 1.0184, "step": 101995 }, { "epoch": 90.34543844109832, "grad_norm": 0.25266340374946594, "learning_rate": 1e-05, "loss": 0.9682, "step": 102000 }, { "epoch": 90.34986713906112, "grad_norm": 0.22988548874855042, "learning_rate": 1e-05, "loss": 0.988, "step": 102005 }, { "epoch": 90.35429583702391, "grad_norm": 0.2298308163881302, "learning_rate": 1e-05, "loss": 0.9663, "step": 102010 }, { "epoch": 90.35872453498672, "grad_norm": 0.25408226251602173, "learning_rate": 1e-05, "loss": 0.953, "step": 102015 }, { "epoch": 90.36315323294951, "grad_norm": 0.24033193290233612, "learning_rate": 1e-05, "loss": 1.0222, "step": 102020 }, { "epoch": 90.36758193091231, "grad_norm": 0.22981905937194824, "learning_rate": 1e-05, "loss": 0.9658, "step": 102025 }, { "epoch": 90.37201062887512, "grad_norm": 0.22549563646316528, "learning_rate": 1e-05, "loss": 1.0316, "step": 102030 }, { "epoch": 90.37643932683791, "grad_norm": 0.2371857762336731, "learning_rate": 1e-05, "loss": 0.9717, "step": 102035 }, { "epoch": 90.3808680248007, "grad_norm": 0.2649405002593994, "learning_rate": 1e-05, "loss": 0.992, "step": 102040 }, { "epoch": 90.38529672276351, "grad_norm": 0.25831612944602966, "learning_rate": 1e-05, "loss": 0.9348, "step": 102045 }, { "epoch": 90.38972542072631, "grad_norm": 0.2512674331665039, "learning_rate": 1e-05, "loss": 0.9246, "step": 102050 }, { "epoch": 90.3941541186891, "grad_norm": 0.24431277811527252, "learning_rate": 1e-05, "loss": 0.9693, "step": 102055 }, { "epoch": 90.39858281665191, "grad_norm": 0.20237493515014648, "learning_rate": 1e-05, "loss": 0.9653, "step": 102060 }, { "epoch": 90.4030115146147, "grad_norm": 0.24072594940662384, "learning_rate": 1e-05, "loss": 0.9385, "step": 102065 }, { "epoch": 90.4074402125775, "grad_norm": 0.22452762722969055, "learning_rate": 1e-05, "loss": 0.9744, "step": 102070 }, { "epoch": 90.4118689105403, "grad_norm": 0.2582663893699646, "learning_rate": 1e-05, "loss": 0.9247, "step": 102075 }, { "epoch": 90.4162976085031, "grad_norm": 0.2441079318523407, "learning_rate": 1e-05, "loss": 0.9571, "step": 102080 }, { "epoch": 90.4207263064659, "grad_norm": 0.22792191803455353, "learning_rate": 1e-05, "loss": 0.9917, "step": 102085 }, { "epoch": 90.4251550044287, "grad_norm": 0.29045307636260986, "learning_rate": 1e-05, "loss": 0.9601, "step": 102090 }, { "epoch": 90.4295837023915, "grad_norm": 0.23156116902828217, "learning_rate": 1e-05, "loss": 0.9637, "step": 102095 }, { "epoch": 90.43401240035429, "grad_norm": 0.21465878188610077, "learning_rate": 1e-05, "loss": 0.9365, "step": 102100 }, { "epoch": 90.4384410983171, "grad_norm": 0.22731177508831024, "learning_rate": 1e-05, "loss": 0.9225, "step": 102105 }, { "epoch": 90.4428697962799, "grad_norm": 0.2856611907482147, "learning_rate": 1e-05, "loss": 1.0158, "step": 102110 }, { "epoch": 90.44729849424269, "grad_norm": 0.2671787142753601, "learning_rate": 1e-05, "loss": 0.9965, "step": 102115 }, { "epoch": 90.4517271922055, "grad_norm": 0.2776314616203308, "learning_rate": 1e-05, "loss": 0.9983, "step": 102120 }, { "epoch": 90.45615589016829, "grad_norm": 0.22591732442378998, "learning_rate": 1e-05, "loss": 0.9568, "step": 102125 }, { "epoch": 90.46058458813108, "grad_norm": 0.22665360569953918, "learning_rate": 1e-05, "loss": 0.9751, "step": 102130 }, { "epoch": 90.46501328609389, "grad_norm": 0.21546079218387604, "learning_rate": 1e-05, "loss": 0.9688, "step": 102135 }, { "epoch": 90.46944198405669, "grad_norm": 0.2616395056247711, "learning_rate": 1e-05, "loss": 0.947, "step": 102140 }, { "epoch": 90.47387068201948, "grad_norm": 0.27524682879447937, "learning_rate": 1e-05, "loss": 0.9867, "step": 102145 }, { "epoch": 90.47829937998229, "grad_norm": 0.25979822874069214, "learning_rate": 1e-05, "loss": 0.935, "step": 102150 }, { "epoch": 90.48272807794508, "grad_norm": 0.23312847316265106, "learning_rate": 1e-05, "loss": 0.9842, "step": 102155 }, { "epoch": 90.48715677590788, "grad_norm": 0.2386724352836609, "learning_rate": 1e-05, "loss": 0.971, "step": 102160 }, { "epoch": 90.49158547387069, "grad_norm": 0.20194567739963531, "learning_rate": 1e-05, "loss": 0.9589, "step": 102165 }, { "epoch": 90.49601417183348, "grad_norm": 0.2279941290616989, "learning_rate": 1e-05, "loss": 0.9765, "step": 102170 }, { "epoch": 90.50044286979627, "grad_norm": 0.24980227649211884, "learning_rate": 1e-05, "loss": 0.9357, "step": 102175 }, { "epoch": 90.50487156775908, "grad_norm": 0.26709261536598206, "learning_rate": 1e-05, "loss": 0.9695, "step": 102180 }, { "epoch": 90.50930026572188, "grad_norm": 0.23109747469425201, "learning_rate": 1e-05, "loss": 0.9693, "step": 102185 }, { "epoch": 90.51372896368467, "grad_norm": 0.2044699341058731, "learning_rate": 1e-05, "loss": 0.9545, "step": 102190 }, { "epoch": 90.51815766164748, "grad_norm": 0.23382556438446045, "learning_rate": 1e-05, "loss": 0.9493, "step": 102195 }, { "epoch": 90.52258635961027, "grad_norm": 0.2366105318069458, "learning_rate": 1e-05, "loss": 0.9729, "step": 102200 }, { "epoch": 90.52701505757307, "grad_norm": 0.23021741211414337, "learning_rate": 1e-05, "loss": 1.0134, "step": 102205 }, { "epoch": 90.53144375553588, "grad_norm": 0.2401573657989502, "learning_rate": 1e-05, "loss": 0.9891, "step": 102210 }, { "epoch": 90.53587245349867, "grad_norm": 0.21014755964279175, "learning_rate": 1e-05, "loss": 1.0152, "step": 102215 }, { "epoch": 90.54030115146146, "grad_norm": 0.2495400309562683, "learning_rate": 1e-05, "loss": 1.0276, "step": 102220 }, { "epoch": 90.54472984942427, "grad_norm": 0.24934224784374237, "learning_rate": 1e-05, "loss": 0.9505, "step": 102225 }, { "epoch": 90.54915854738707, "grad_norm": 0.21330714225769043, "learning_rate": 1e-05, "loss": 0.9939, "step": 102230 }, { "epoch": 90.55358724534987, "grad_norm": 0.2508140206336975, "learning_rate": 1e-05, "loss": 1.0099, "step": 102235 }, { "epoch": 90.55801594331267, "grad_norm": 0.22362473607063293, "learning_rate": 1e-05, "loss": 0.9522, "step": 102240 }, { "epoch": 90.56244464127546, "grad_norm": 0.3272494971752167, "learning_rate": 1e-05, "loss": 0.9747, "step": 102245 }, { "epoch": 90.56687333923827, "grad_norm": 0.23910930752754211, "learning_rate": 1e-05, "loss": 0.9939, "step": 102250 }, { "epoch": 90.57130203720106, "grad_norm": 0.2771513760089874, "learning_rate": 1e-05, "loss": 0.9653, "step": 102255 }, { "epoch": 90.57573073516386, "grad_norm": 0.2514110505580902, "learning_rate": 1e-05, "loss": 0.9685, "step": 102260 }, { "epoch": 90.58015943312667, "grad_norm": 0.22452853620052338, "learning_rate": 1e-05, "loss": 0.9759, "step": 102265 }, { "epoch": 90.58458813108946, "grad_norm": 0.22370469570159912, "learning_rate": 1e-05, "loss": 1.0365, "step": 102270 }, { "epoch": 90.58901682905226, "grad_norm": 0.2513195872306824, "learning_rate": 1e-05, "loss": 0.9891, "step": 102275 }, { "epoch": 90.59344552701506, "grad_norm": 0.24970132112503052, "learning_rate": 1e-05, "loss": 0.9202, "step": 102280 }, { "epoch": 90.59787422497786, "grad_norm": 0.24576792120933533, "learning_rate": 1e-05, "loss": 0.9977, "step": 102285 }, { "epoch": 90.60230292294065, "grad_norm": 0.23058372735977173, "learning_rate": 1e-05, "loss": 0.9655, "step": 102290 }, { "epoch": 90.60673162090346, "grad_norm": 0.22476179897785187, "learning_rate": 1e-05, "loss": 0.987, "step": 102295 }, { "epoch": 90.61116031886625, "grad_norm": 0.2078927457332611, "learning_rate": 1e-05, "loss": 0.9759, "step": 102300 }, { "epoch": 90.61558901682905, "grad_norm": 0.2763863503932953, "learning_rate": 1e-05, "loss": 0.9829, "step": 102305 }, { "epoch": 90.62001771479186, "grad_norm": 0.20340348780155182, "learning_rate": 1e-05, "loss": 0.965, "step": 102310 }, { "epoch": 90.62444641275465, "grad_norm": 0.2515465021133423, "learning_rate": 1e-05, "loss": 0.924, "step": 102315 }, { "epoch": 90.62887511071744, "grad_norm": 0.23996025323867798, "learning_rate": 1e-05, "loss": 0.9613, "step": 102320 }, { "epoch": 90.63330380868025, "grad_norm": 0.242412731051445, "learning_rate": 1e-05, "loss": 0.9876, "step": 102325 }, { "epoch": 90.63773250664305, "grad_norm": 0.27623438835144043, "learning_rate": 1e-05, "loss": 0.9135, "step": 102330 }, { "epoch": 90.64216120460584, "grad_norm": 0.288758248090744, "learning_rate": 1e-05, "loss": 0.9439, "step": 102335 }, { "epoch": 90.64658990256865, "grad_norm": 0.27220550179481506, "learning_rate": 1e-05, "loss": 0.963, "step": 102340 }, { "epoch": 90.65101860053144, "grad_norm": 0.2263362556695938, "learning_rate": 1e-05, "loss": 0.9806, "step": 102345 }, { "epoch": 90.65544729849424, "grad_norm": 0.24110384285449982, "learning_rate": 1e-05, "loss": 0.967, "step": 102350 }, { "epoch": 90.65987599645705, "grad_norm": 0.2227737307548523, "learning_rate": 1e-05, "loss": 0.9875, "step": 102355 }, { "epoch": 90.66430469441984, "grad_norm": 0.23778122663497925, "learning_rate": 1e-05, "loss": 0.9416, "step": 102360 }, { "epoch": 90.66873339238263, "grad_norm": 0.263927161693573, "learning_rate": 1e-05, "loss": 0.9422, "step": 102365 }, { "epoch": 90.67316209034544, "grad_norm": 0.2805740535259247, "learning_rate": 1e-05, "loss": 0.9794, "step": 102370 }, { "epoch": 90.67759078830824, "grad_norm": 0.22144073247909546, "learning_rate": 1e-05, "loss": 0.9481, "step": 102375 }, { "epoch": 90.68201948627103, "grad_norm": 0.2424430102109909, "learning_rate": 1e-05, "loss": 0.9541, "step": 102380 }, { "epoch": 90.68644818423384, "grad_norm": 0.22817647457122803, "learning_rate": 1e-05, "loss": 0.9734, "step": 102385 }, { "epoch": 90.69087688219663, "grad_norm": 0.21668201684951782, "learning_rate": 1e-05, "loss": 0.9701, "step": 102390 }, { "epoch": 90.69530558015943, "grad_norm": 0.26588642597198486, "learning_rate": 1e-05, "loss": 0.9482, "step": 102395 }, { "epoch": 90.69973427812224, "grad_norm": 0.27106720209121704, "learning_rate": 1e-05, "loss": 0.8924, "step": 102400 }, { "epoch": 90.70416297608503, "grad_norm": 0.2208392322063446, "learning_rate": 1e-05, "loss": 0.929, "step": 102405 }, { "epoch": 90.70859167404782, "grad_norm": 0.24484282732009888, "learning_rate": 1e-05, "loss": 0.9604, "step": 102410 }, { "epoch": 90.71302037201063, "grad_norm": 0.21779116988182068, "learning_rate": 1e-05, "loss": 0.942, "step": 102415 }, { "epoch": 90.71744906997343, "grad_norm": 0.1969458907842636, "learning_rate": 1e-05, "loss": 0.9646, "step": 102420 }, { "epoch": 90.72187776793622, "grad_norm": 0.23428089916706085, "learning_rate": 1e-05, "loss": 0.9936, "step": 102425 }, { "epoch": 90.72630646589903, "grad_norm": 0.2588891386985779, "learning_rate": 1e-05, "loss": 0.9777, "step": 102430 }, { "epoch": 90.73073516386182, "grad_norm": 0.27162402868270874, "learning_rate": 1e-05, "loss": 0.96, "step": 102435 }, { "epoch": 90.73516386182462, "grad_norm": 0.3209723234176636, "learning_rate": 1e-05, "loss": 0.9831, "step": 102440 }, { "epoch": 90.73959255978743, "grad_norm": 0.22253888845443726, "learning_rate": 1e-05, "loss": 0.9991, "step": 102445 }, { "epoch": 90.74402125775022, "grad_norm": 0.21653956174850464, "learning_rate": 1e-05, "loss": 0.9397, "step": 102450 }, { "epoch": 90.74844995571301, "grad_norm": 0.23073752224445343, "learning_rate": 1e-05, "loss": 0.9268, "step": 102455 }, { "epoch": 90.75287865367582, "grad_norm": 0.2643356919288635, "learning_rate": 1e-05, "loss": 0.9458, "step": 102460 }, { "epoch": 90.75730735163862, "grad_norm": 0.2735397517681122, "learning_rate": 1e-05, "loss": 1.0013, "step": 102465 }, { "epoch": 90.76173604960141, "grad_norm": 0.2693506181240082, "learning_rate": 1e-05, "loss": 0.9785, "step": 102470 }, { "epoch": 90.76616474756422, "grad_norm": 0.22383420169353485, "learning_rate": 1e-05, "loss": 0.9341, "step": 102475 }, { "epoch": 90.77059344552701, "grad_norm": 0.29040536284446716, "learning_rate": 1e-05, "loss": 0.9452, "step": 102480 }, { "epoch": 90.77502214348982, "grad_norm": 0.2927846610546112, "learning_rate": 1e-05, "loss": 0.9171, "step": 102485 }, { "epoch": 90.77945084145261, "grad_norm": 0.24086327850818634, "learning_rate": 1e-05, "loss": 0.9063, "step": 102490 }, { "epoch": 90.78387953941541, "grad_norm": 0.2180607169866562, "learning_rate": 1e-05, "loss": 0.952, "step": 102495 }, { "epoch": 90.78830823737822, "grad_norm": 0.19970956444740295, "learning_rate": 1e-05, "loss": 1.0209, "step": 102500 }, { "epoch": 90.79273693534101, "grad_norm": 0.20911867916584015, "learning_rate": 1e-05, "loss": 0.9653, "step": 102505 }, { "epoch": 90.7971656333038, "grad_norm": 0.22589075565338135, "learning_rate": 1e-05, "loss": 1.0589, "step": 102510 }, { "epoch": 90.80159433126661, "grad_norm": 0.2891215980052948, "learning_rate": 1e-05, "loss": 0.9613, "step": 102515 }, { "epoch": 90.80602302922941, "grad_norm": 0.25613537430763245, "learning_rate": 1e-05, "loss": 0.9122, "step": 102520 }, { "epoch": 90.8104517271922, "grad_norm": 0.237107053399086, "learning_rate": 1e-05, "loss": 0.9184, "step": 102525 }, { "epoch": 90.81488042515501, "grad_norm": 0.2320411503314972, "learning_rate": 1e-05, "loss": 0.9539, "step": 102530 }, { "epoch": 90.8193091231178, "grad_norm": 0.2441605031490326, "learning_rate": 1e-05, "loss": 0.967, "step": 102535 }, { "epoch": 90.8237378210806, "grad_norm": 0.23499517142772675, "learning_rate": 1e-05, "loss": 0.9674, "step": 102540 }, { "epoch": 90.8281665190434, "grad_norm": 0.2542986571788788, "learning_rate": 1e-05, "loss": 0.9725, "step": 102545 }, { "epoch": 90.8325952170062, "grad_norm": 0.2669646739959717, "learning_rate": 1e-05, "loss": 0.9338, "step": 102550 }, { "epoch": 90.837023914969, "grad_norm": 0.2450842261314392, "learning_rate": 1e-05, "loss": 0.9595, "step": 102555 }, { "epoch": 90.8414526129318, "grad_norm": 0.2577531635761261, "learning_rate": 1e-05, "loss": 0.9895, "step": 102560 }, { "epoch": 90.8458813108946, "grad_norm": 0.237412229180336, "learning_rate": 1e-05, "loss": 0.9366, "step": 102565 }, { "epoch": 90.85031000885739, "grad_norm": 0.25039517879486084, "learning_rate": 1e-05, "loss": 0.994, "step": 102570 }, { "epoch": 90.8547387068202, "grad_norm": 0.19879670441150665, "learning_rate": 1e-05, "loss": 0.9303, "step": 102575 }, { "epoch": 90.859167404783, "grad_norm": 0.2328568994998932, "learning_rate": 1e-05, "loss": 0.9911, "step": 102580 }, { "epoch": 90.86359610274579, "grad_norm": 0.22340106964111328, "learning_rate": 1e-05, "loss": 0.9732, "step": 102585 }, { "epoch": 90.8680248007086, "grad_norm": 0.23357562720775604, "learning_rate": 1e-05, "loss": 0.925, "step": 102590 }, { "epoch": 90.87245349867139, "grad_norm": 0.2421547919511795, "learning_rate": 1e-05, "loss": 0.9356, "step": 102595 }, { "epoch": 90.87688219663418, "grad_norm": 0.2209460288286209, "learning_rate": 1e-05, "loss": 0.9898, "step": 102600 }, { "epoch": 90.881310894597, "grad_norm": 0.20997555553913116, "learning_rate": 1e-05, "loss": 0.9445, "step": 102605 }, { "epoch": 90.88573959255979, "grad_norm": 0.21754190325737, "learning_rate": 1e-05, "loss": 0.9379, "step": 102610 }, { "epoch": 90.89016829052258, "grad_norm": 0.2394261509180069, "learning_rate": 1e-05, "loss": 0.9887, "step": 102615 }, { "epoch": 90.89459698848539, "grad_norm": 0.27301013469696045, "learning_rate": 1e-05, "loss": 0.9529, "step": 102620 }, { "epoch": 90.89902568644818, "grad_norm": 0.2613600790500641, "learning_rate": 1e-05, "loss": 0.9805, "step": 102625 }, { "epoch": 90.90345438441098, "grad_norm": 0.24907121062278748, "learning_rate": 1e-05, "loss": 0.9516, "step": 102630 }, { "epoch": 90.90788308237379, "grad_norm": 0.2370993047952652, "learning_rate": 1e-05, "loss": 0.9734, "step": 102635 }, { "epoch": 90.91231178033658, "grad_norm": 0.24002397060394287, "learning_rate": 1e-05, "loss": 1.0187, "step": 102640 }, { "epoch": 90.91674047829937, "grad_norm": 0.2537846565246582, "learning_rate": 1e-05, "loss": 0.9964, "step": 102645 }, { "epoch": 90.92116917626218, "grad_norm": 0.249927818775177, "learning_rate": 1e-05, "loss": 0.9445, "step": 102650 }, { "epoch": 90.92559787422498, "grad_norm": 0.2988022267818451, "learning_rate": 1e-05, "loss": 0.9867, "step": 102655 }, { "epoch": 90.93002657218777, "grad_norm": 0.24389055371284485, "learning_rate": 1e-05, "loss": 0.9637, "step": 102660 }, { "epoch": 90.93445527015058, "grad_norm": 0.19886188209056854, "learning_rate": 1e-05, "loss": 0.947, "step": 102665 }, { "epoch": 90.93888396811337, "grad_norm": 0.23499402403831482, "learning_rate": 1e-05, "loss": 0.9163, "step": 102670 }, { "epoch": 90.94331266607617, "grad_norm": 0.2173004448413849, "learning_rate": 1e-05, "loss": 0.9693, "step": 102675 }, { "epoch": 90.94774136403898, "grad_norm": 0.26165562868118286, "learning_rate": 1e-05, "loss": 0.952, "step": 102680 }, { "epoch": 90.95217006200177, "grad_norm": 0.2698575556278229, "learning_rate": 1e-05, "loss": 0.9852, "step": 102685 }, { "epoch": 90.95659875996456, "grad_norm": 0.25355783104896545, "learning_rate": 1e-05, "loss": 0.9956, "step": 102690 }, { "epoch": 90.96102745792737, "grad_norm": 0.2638607919216156, "learning_rate": 1e-05, "loss": 0.9941, "step": 102695 }, { "epoch": 90.96545615589017, "grad_norm": 0.2440202832221985, "learning_rate": 1e-05, "loss": 0.9496, "step": 102700 }, { "epoch": 90.96988485385296, "grad_norm": 0.2394225150346756, "learning_rate": 1e-05, "loss": 0.9004, "step": 102705 }, { "epoch": 90.97431355181577, "grad_norm": 0.22733670473098755, "learning_rate": 1e-05, "loss": 0.9792, "step": 102710 }, { "epoch": 90.97874224977856, "grad_norm": 0.21324612200260162, "learning_rate": 1e-05, "loss": 0.9433, "step": 102715 }, { "epoch": 90.98317094774137, "grad_norm": 0.23697666823863983, "learning_rate": 1e-05, "loss": 0.9595, "step": 102720 }, { "epoch": 90.98759964570417, "grad_norm": 0.20261825621128082, "learning_rate": 1e-05, "loss": 0.9817, "step": 102725 }, { "epoch": 90.99202834366696, "grad_norm": 0.2591460943222046, "learning_rate": 1e-05, "loss": 0.9189, "step": 102730 }, { "epoch": 90.99645704162977, "grad_norm": 0.22383613884449005, "learning_rate": 1e-05, "loss": 0.9378, "step": 102735 }, { "epoch": 91.00088573959256, "grad_norm": 0.27633705735206604, "learning_rate": 1e-05, "loss": 0.9932, "step": 102740 }, { "epoch": 91.00531443755536, "grad_norm": 0.20688210427761078, "learning_rate": 1e-05, "loss": 0.9565, "step": 102745 }, { "epoch": 91.00974313551816, "grad_norm": 0.22417804598808289, "learning_rate": 1e-05, "loss": 1.0074, "step": 102750 }, { "epoch": 91.01417183348096, "grad_norm": 0.21591448783874512, "learning_rate": 1e-05, "loss": 0.9949, "step": 102755 }, { "epoch": 91.01860053144375, "grad_norm": 0.2580453157424927, "learning_rate": 1e-05, "loss": 0.951, "step": 102760 }, { "epoch": 91.02302922940656, "grad_norm": 0.24250510334968567, "learning_rate": 1e-05, "loss": 0.9705, "step": 102765 }, { "epoch": 91.02745792736935, "grad_norm": 0.25079652667045593, "learning_rate": 1e-05, "loss": 0.8957, "step": 102770 }, { "epoch": 91.03188662533215, "grad_norm": 0.3197646737098694, "learning_rate": 1e-05, "loss": 0.9824, "step": 102775 }, { "epoch": 91.03631532329496, "grad_norm": 0.26770463585853577, "learning_rate": 1e-05, "loss": 0.9354, "step": 102780 }, { "epoch": 91.04074402125775, "grad_norm": 0.2663167417049408, "learning_rate": 1e-05, "loss": 0.9345, "step": 102785 }, { "epoch": 91.04517271922055, "grad_norm": 0.24982136487960815, "learning_rate": 1e-05, "loss": 0.9683, "step": 102790 }, { "epoch": 91.04960141718335, "grad_norm": 0.22635824978351593, "learning_rate": 1e-05, "loss": 0.9971, "step": 102795 }, { "epoch": 91.05403011514615, "grad_norm": 0.2758108079433441, "learning_rate": 1e-05, "loss": 0.9851, "step": 102800 }, { "epoch": 91.05845881310894, "grad_norm": 0.26177066564559937, "learning_rate": 1e-05, "loss": 0.973, "step": 102805 }, { "epoch": 91.06288751107175, "grad_norm": 0.22039005160331726, "learning_rate": 1e-05, "loss": 0.9514, "step": 102810 }, { "epoch": 91.06731620903454, "grad_norm": 0.2507549226284027, "learning_rate": 1e-05, "loss": 0.9647, "step": 102815 }, { "epoch": 91.07174490699734, "grad_norm": 0.2560892701148987, "learning_rate": 1e-05, "loss": 0.9197, "step": 102820 }, { "epoch": 91.07617360496015, "grad_norm": 0.23397120833396912, "learning_rate": 1e-05, "loss": 0.9225, "step": 102825 }, { "epoch": 91.08060230292294, "grad_norm": 0.24060940742492676, "learning_rate": 1e-05, "loss": 0.9865, "step": 102830 }, { "epoch": 91.08503100088573, "grad_norm": 0.27812033891677856, "learning_rate": 1e-05, "loss": 1.01, "step": 102835 }, { "epoch": 91.08945969884854, "grad_norm": 0.2784745693206787, "learning_rate": 1e-05, "loss": 0.9651, "step": 102840 }, { "epoch": 91.09388839681134, "grad_norm": 0.22309961915016174, "learning_rate": 1e-05, "loss": 0.9649, "step": 102845 }, { "epoch": 91.09831709477413, "grad_norm": 0.23336555063724518, "learning_rate": 1e-05, "loss": 0.915, "step": 102850 }, { "epoch": 91.10274579273694, "grad_norm": 0.22197680175304413, "learning_rate": 1e-05, "loss": 0.9814, "step": 102855 }, { "epoch": 91.10717449069973, "grad_norm": 0.22243289649486542, "learning_rate": 1e-05, "loss": 0.9724, "step": 102860 }, { "epoch": 91.11160318866253, "grad_norm": 0.23436890542507172, "learning_rate": 1e-05, "loss": 0.9506, "step": 102865 }, { "epoch": 91.11603188662534, "grad_norm": 0.24831825494766235, "learning_rate": 1e-05, "loss": 0.9598, "step": 102870 }, { "epoch": 91.12046058458813, "grad_norm": 0.20246507227420807, "learning_rate": 1e-05, "loss": 0.9857, "step": 102875 }, { "epoch": 91.12488928255092, "grad_norm": 0.2302592545747757, "learning_rate": 1e-05, "loss": 0.9449, "step": 102880 }, { "epoch": 91.12931798051373, "grad_norm": 0.23203761875629425, "learning_rate": 1e-05, "loss": 0.9566, "step": 102885 }, { "epoch": 91.13374667847653, "grad_norm": 0.2576983869075775, "learning_rate": 1e-05, "loss": 0.9167, "step": 102890 }, { "epoch": 91.13817537643932, "grad_norm": 0.2207961231470108, "learning_rate": 1e-05, "loss": 0.9111, "step": 102895 }, { "epoch": 91.14260407440213, "grad_norm": 0.23966224491596222, "learning_rate": 1e-05, "loss": 0.9558, "step": 102900 }, { "epoch": 91.14703277236492, "grad_norm": 0.23793980479240417, "learning_rate": 1e-05, "loss": 0.9668, "step": 102905 }, { "epoch": 91.15146147032772, "grad_norm": 0.23741868138313293, "learning_rate": 1e-05, "loss": 1.025, "step": 102910 }, { "epoch": 91.15589016829053, "grad_norm": 0.2640037536621094, "learning_rate": 1e-05, "loss": 1.006, "step": 102915 }, { "epoch": 91.16031886625332, "grad_norm": 0.22840575873851776, "learning_rate": 1e-05, "loss": 0.9803, "step": 102920 }, { "epoch": 91.16474756421611, "grad_norm": 0.24525302648544312, "learning_rate": 1e-05, "loss": 0.931, "step": 102925 }, { "epoch": 91.16917626217892, "grad_norm": 0.26970311999320984, "learning_rate": 1e-05, "loss": 0.9351, "step": 102930 }, { "epoch": 91.17360496014172, "grad_norm": 0.25400611758232117, "learning_rate": 1e-05, "loss": 0.9684, "step": 102935 }, { "epoch": 91.17803365810451, "grad_norm": 0.296526700258255, "learning_rate": 1e-05, "loss": 0.9641, "step": 102940 }, { "epoch": 91.18246235606732, "grad_norm": 0.25308018922805786, "learning_rate": 1e-05, "loss": 0.9497, "step": 102945 }, { "epoch": 91.18689105403011, "grad_norm": 0.2912212908267975, "learning_rate": 1e-05, "loss": 1.0263, "step": 102950 }, { "epoch": 91.1913197519929, "grad_norm": 0.21558740735054016, "learning_rate": 1e-05, "loss": 0.9559, "step": 102955 }, { "epoch": 91.19574844995572, "grad_norm": 0.20958438515663147, "learning_rate": 1e-05, "loss": 0.9649, "step": 102960 }, { "epoch": 91.20017714791851, "grad_norm": 0.2251177579164505, "learning_rate": 1e-05, "loss": 0.9687, "step": 102965 }, { "epoch": 91.20460584588132, "grad_norm": 0.2489888072013855, "learning_rate": 1e-05, "loss": 0.9419, "step": 102970 }, { "epoch": 91.20903454384411, "grad_norm": 0.2482336163520813, "learning_rate": 1e-05, "loss": 0.9315, "step": 102975 }, { "epoch": 91.2134632418069, "grad_norm": 0.20183423161506653, "learning_rate": 1e-05, "loss": 0.9906, "step": 102980 }, { "epoch": 91.21789193976971, "grad_norm": 0.22801020741462708, "learning_rate": 1e-05, "loss": 0.9754, "step": 102985 }, { "epoch": 91.22232063773251, "grad_norm": 0.2383076697587967, "learning_rate": 1e-05, "loss": 0.9673, "step": 102990 }, { "epoch": 91.2267493356953, "grad_norm": 0.2368904948234558, "learning_rate": 1e-05, "loss": 0.9887, "step": 102995 }, { "epoch": 91.23117803365811, "grad_norm": 0.23279932141304016, "learning_rate": 1e-05, "loss": 0.96, "step": 103000 }, { "epoch": 91.2356067316209, "grad_norm": 0.23190240561962128, "learning_rate": 1e-05, "loss": 0.969, "step": 103005 }, { "epoch": 91.2400354295837, "grad_norm": 0.2907813489437103, "learning_rate": 1e-05, "loss": 0.984, "step": 103010 }, { "epoch": 91.24446412754651, "grad_norm": 0.24953655898571014, "learning_rate": 1e-05, "loss": 0.9772, "step": 103015 }, { "epoch": 91.2488928255093, "grad_norm": 0.22680160403251648, "learning_rate": 1e-05, "loss": 0.9656, "step": 103020 }, { "epoch": 91.2533215234721, "grad_norm": 0.21623431146144867, "learning_rate": 1e-05, "loss": 1.0282, "step": 103025 }, { "epoch": 91.2577502214349, "grad_norm": 0.2570224404335022, "learning_rate": 1e-05, "loss": 0.9344, "step": 103030 }, { "epoch": 91.2621789193977, "grad_norm": 0.24587789177894592, "learning_rate": 1e-05, "loss": 0.9401, "step": 103035 }, { "epoch": 91.26660761736049, "grad_norm": 0.2315250188112259, "learning_rate": 1e-05, "loss": 0.9264, "step": 103040 }, { "epoch": 91.2710363153233, "grad_norm": 0.20012202858924866, "learning_rate": 1e-05, "loss": 0.9315, "step": 103045 }, { "epoch": 91.2754650132861, "grad_norm": 0.2329656034708023, "learning_rate": 1e-05, "loss": 0.9402, "step": 103050 }, { "epoch": 91.27989371124889, "grad_norm": 0.22873006761074066, "learning_rate": 1e-05, "loss": 0.9109, "step": 103055 }, { "epoch": 91.2843224092117, "grad_norm": 0.2502482533454895, "learning_rate": 1e-05, "loss": 0.982, "step": 103060 }, { "epoch": 91.28875110717449, "grad_norm": 0.23776748776435852, "learning_rate": 1e-05, "loss": 0.9716, "step": 103065 }, { "epoch": 91.29317980513729, "grad_norm": 0.2268352508544922, "learning_rate": 1e-05, "loss": 0.9561, "step": 103070 }, { "epoch": 91.2976085031001, "grad_norm": 0.22304129600524902, "learning_rate": 1e-05, "loss": 0.9778, "step": 103075 }, { "epoch": 91.30203720106289, "grad_norm": 0.237280935049057, "learning_rate": 1e-05, "loss": 0.9208, "step": 103080 }, { "epoch": 91.30646589902568, "grad_norm": 0.21677251160144806, "learning_rate": 1e-05, "loss": 0.9831, "step": 103085 }, { "epoch": 91.31089459698849, "grad_norm": 0.25403955578804016, "learning_rate": 1e-05, "loss": 0.9437, "step": 103090 }, { "epoch": 91.31532329495128, "grad_norm": 0.219672292470932, "learning_rate": 1e-05, "loss": 0.9727, "step": 103095 }, { "epoch": 91.31975199291408, "grad_norm": 0.2725236117839813, "learning_rate": 1e-05, "loss": 0.9567, "step": 103100 }, { "epoch": 91.32418069087689, "grad_norm": 0.2217569649219513, "learning_rate": 1e-05, "loss": 0.991, "step": 103105 }, { "epoch": 91.32860938883968, "grad_norm": 0.1981177031993866, "learning_rate": 1e-05, "loss": 0.956, "step": 103110 }, { "epoch": 91.33303808680247, "grad_norm": 0.23574258387088776, "learning_rate": 1e-05, "loss": 0.9379, "step": 103115 }, { "epoch": 91.33746678476528, "grad_norm": 0.2810885012149811, "learning_rate": 1e-05, "loss": 0.9269, "step": 103120 }, { "epoch": 91.34189548272808, "grad_norm": 0.2744520306587219, "learning_rate": 1e-05, "loss": 0.9803, "step": 103125 }, { "epoch": 91.34632418069087, "grad_norm": 0.24710234999656677, "learning_rate": 1e-05, "loss": 0.9512, "step": 103130 }, { "epoch": 91.35075287865368, "grad_norm": 0.1955631971359253, "learning_rate": 1e-05, "loss": 0.9394, "step": 103135 }, { "epoch": 91.35518157661647, "grad_norm": 0.22885887324810028, "learning_rate": 1e-05, "loss": 0.9416, "step": 103140 }, { "epoch": 91.35961027457927, "grad_norm": 0.25404462218284607, "learning_rate": 1e-05, "loss": 0.9647, "step": 103145 }, { "epoch": 91.36403897254208, "grad_norm": 0.20895420014858246, "learning_rate": 1e-05, "loss": 1.0192, "step": 103150 }, { "epoch": 91.36846767050487, "grad_norm": 0.23463614284992218, "learning_rate": 1e-05, "loss": 0.9735, "step": 103155 }, { "epoch": 91.37289636846766, "grad_norm": 0.2608257830142975, "learning_rate": 1e-05, "loss": 0.9676, "step": 103160 }, { "epoch": 91.37732506643047, "grad_norm": 0.25690919160842896, "learning_rate": 1e-05, "loss": 0.9316, "step": 103165 }, { "epoch": 91.38175376439327, "grad_norm": 0.2564966678619385, "learning_rate": 1e-05, "loss": 0.9744, "step": 103170 }, { "epoch": 91.38618246235606, "grad_norm": 0.24132072925567627, "learning_rate": 1e-05, "loss": 0.911, "step": 103175 }, { "epoch": 91.39061116031887, "grad_norm": 0.24077676236629486, "learning_rate": 1e-05, "loss": 0.9215, "step": 103180 }, { "epoch": 91.39503985828166, "grad_norm": 0.23886670172214508, "learning_rate": 1e-05, "loss": 0.9508, "step": 103185 }, { "epoch": 91.39946855624446, "grad_norm": 0.2572554349899292, "learning_rate": 1e-05, "loss": 0.9568, "step": 103190 }, { "epoch": 91.40389725420727, "grad_norm": 0.2579529285430908, "learning_rate": 1e-05, "loss": 0.9702, "step": 103195 }, { "epoch": 91.40832595217006, "grad_norm": 0.23599202930927277, "learning_rate": 1e-05, "loss": 1.0052, "step": 103200 }, { "epoch": 91.41275465013285, "grad_norm": 0.2726353704929352, "learning_rate": 1e-05, "loss": 0.9612, "step": 103205 }, { "epoch": 91.41718334809566, "grad_norm": 0.2780470848083496, "learning_rate": 1e-05, "loss": 0.968, "step": 103210 }, { "epoch": 91.42161204605846, "grad_norm": 0.25613561272621155, "learning_rate": 1e-05, "loss": 0.9559, "step": 103215 }, { "epoch": 91.42604074402126, "grad_norm": 0.24017487466335297, "learning_rate": 1e-05, "loss": 0.9252, "step": 103220 }, { "epoch": 91.43046944198406, "grad_norm": 0.21221084892749786, "learning_rate": 1e-05, "loss": 0.949, "step": 103225 }, { "epoch": 91.43489813994685, "grad_norm": 0.24472211301326752, "learning_rate": 1e-05, "loss": 0.9629, "step": 103230 }, { "epoch": 91.43932683790966, "grad_norm": 0.2382994145154953, "learning_rate": 1e-05, "loss": 0.913, "step": 103235 }, { "epoch": 91.44375553587246, "grad_norm": 0.2526755928993225, "learning_rate": 1e-05, "loss": 0.9195, "step": 103240 }, { "epoch": 91.44818423383525, "grad_norm": 0.22146645188331604, "learning_rate": 1e-05, "loss": 0.9206, "step": 103245 }, { "epoch": 91.45261293179806, "grad_norm": 0.2645628750324249, "learning_rate": 1e-05, "loss": 0.9441, "step": 103250 }, { "epoch": 91.45704162976085, "grad_norm": 0.25690531730651855, "learning_rate": 1e-05, "loss": 0.9744, "step": 103255 }, { "epoch": 91.46147032772365, "grad_norm": 0.2326193004846573, "learning_rate": 1e-05, "loss": 0.9074, "step": 103260 }, { "epoch": 91.46589902568645, "grad_norm": 0.23041969537734985, "learning_rate": 1e-05, "loss": 0.8958, "step": 103265 }, { "epoch": 91.47032772364925, "grad_norm": 0.2243490368127823, "learning_rate": 1e-05, "loss": 0.9702, "step": 103270 }, { "epoch": 91.47475642161204, "grad_norm": 0.2456541359424591, "learning_rate": 1e-05, "loss": 0.9337, "step": 103275 }, { "epoch": 91.47918511957485, "grad_norm": 0.22051627933979034, "learning_rate": 1e-05, "loss": 0.9896, "step": 103280 }, { "epoch": 91.48361381753764, "grad_norm": 0.22589299082756042, "learning_rate": 1e-05, "loss": 0.9542, "step": 103285 }, { "epoch": 91.48804251550044, "grad_norm": 0.2465701550245285, "learning_rate": 1e-05, "loss": 0.9252, "step": 103290 }, { "epoch": 91.49247121346325, "grad_norm": 0.23816026747226715, "learning_rate": 1e-05, "loss": 1.0028, "step": 103295 }, { "epoch": 91.49689991142604, "grad_norm": 0.25310397148132324, "learning_rate": 1e-05, "loss": 0.8909, "step": 103300 }, { "epoch": 91.50132860938884, "grad_norm": 0.24437053501605988, "learning_rate": 1e-05, "loss": 0.9902, "step": 103305 }, { "epoch": 91.50575730735164, "grad_norm": 0.286076158285141, "learning_rate": 1e-05, "loss": 1.0064, "step": 103310 }, { "epoch": 91.51018600531444, "grad_norm": 0.2814585864543915, "learning_rate": 1e-05, "loss": 0.9783, "step": 103315 }, { "epoch": 91.51461470327723, "grad_norm": 0.2804497182369232, "learning_rate": 1e-05, "loss": 0.9429, "step": 103320 }, { "epoch": 91.51904340124004, "grad_norm": 0.2686281204223633, "learning_rate": 1e-05, "loss": 0.9485, "step": 103325 }, { "epoch": 91.52347209920283, "grad_norm": 0.253391295671463, "learning_rate": 1e-05, "loss": 0.9355, "step": 103330 }, { "epoch": 91.52790079716563, "grad_norm": 0.28184524178504944, "learning_rate": 1e-05, "loss": 1.0088, "step": 103335 }, { "epoch": 91.53232949512844, "grad_norm": 0.24850156903266907, "learning_rate": 1e-05, "loss": 0.9348, "step": 103340 }, { "epoch": 91.53675819309123, "grad_norm": 0.2278163731098175, "learning_rate": 1e-05, "loss": 0.9952, "step": 103345 }, { "epoch": 91.54118689105402, "grad_norm": 0.2612732946872711, "learning_rate": 1e-05, "loss": 0.9384, "step": 103350 }, { "epoch": 91.54561558901683, "grad_norm": 0.22379198670387268, "learning_rate": 1e-05, "loss": 0.9587, "step": 103355 }, { "epoch": 91.55004428697963, "grad_norm": 0.21157947182655334, "learning_rate": 1e-05, "loss": 0.9713, "step": 103360 }, { "epoch": 91.55447298494242, "grad_norm": 0.22921548783779144, "learning_rate": 1e-05, "loss": 1.0296, "step": 103365 }, { "epoch": 91.55890168290523, "grad_norm": 0.2334277629852295, "learning_rate": 1e-05, "loss": 0.9441, "step": 103370 }, { "epoch": 91.56333038086802, "grad_norm": 0.27004989981651306, "learning_rate": 1e-05, "loss": 0.9869, "step": 103375 }, { "epoch": 91.56775907883082, "grad_norm": 0.24896419048309326, "learning_rate": 1e-05, "loss": 0.9696, "step": 103380 }, { "epoch": 91.57218777679363, "grad_norm": 0.2145649939775467, "learning_rate": 1e-05, "loss": 0.9314, "step": 103385 }, { "epoch": 91.57661647475642, "grad_norm": 0.2485453188419342, "learning_rate": 1e-05, "loss": 0.9656, "step": 103390 }, { "epoch": 91.58104517271921, "grad_norm": 0.2488047480583191, "learning_rate": 1e-05, "loss": 0.936, "step": 103395 }, { "epoch": 91.58547387068202, "grad_norm": 0.22245216369628906, "learning_rate": 1e-05, "loss": 0.9187, "step": 103400 }, { "epoch": 91.58990256864482, "grad_norm": 0.21726730465888977, "learning_rate": 1e-05, "loss": 0.9927, "step": 103405 }, { "epoch": 91.59433126660761, "grad_norm": 0.2124769389629364, "learning_rate": 1e-05, "loss": 0.9984, "step": 103410 }, { "epoch": 91.59875996457042, "grad_norm": 0.22882641851902008, "learning_rate": 1e-05, "loss": 0.9372, "step": 103415 }, { "epoch": 91.60318866253321, "grad_norm": 0.21799640357494354, "learning_rate": 1e-05, "loss": 0.9949, "step": 103420 }, { "epoch": 91.60761736049601, "grad_norm": 0.21663933992385864, "learning_rate": 1e-05, "loss": 0.9659, "step": 103425 }, { "epoch": 91.61204605845882, "grad_norm": 0.2153746485710144, "learning_rate": 1e-05, "loss": 0.9677, "step": 103430 }, { "epoch": 91.61647475642161, "grad_norm": 0.227844700217247, "learning_rate": 1e-05, "loss": 0.9785, "step": 103435 }, { "epoch": 91.6209034543844, "grad_norm": 0.1955043524503708, "learning_rate": 1e-05, "loss": 0.9749, "step": 103440 }, { "epoch": 91.62533215234721, "grad_norm": 0.23712210357189178, "learning_rate": 1e-05, "loss": 0.9719, "step": 103445 }, { "epoch": 91.62976085031, "grad_norm": 0.22838705778121948, "learning_rate": 1e-05, "loss": 0.9297, "step": 103450 }, { "epoch": 91.63418954827281, "grad_norm": 0.265532910823822, "learning_rate": 1e-05, "loss": 0.957, "step": 103455 }, { "epoch": 91.63861824623561, "grad_norm": 0.259996235370636, "learning_rate": 1e-05, "loss": 0.9859, "step": 103460 }, { "epoch": 91.6430469441984, "grad_norm": 0.25401797890663147, "learning_rate": 1e-05, "loss": 0.9758, "step": 103465 }, { "epoch": 91.64747564216121, "grad_norm": 0.2826327681541443, "learning_rate": 1e-05, "loss": 0.9856, "step": 103470 }, { "epoch": 91.651904340124, "grad_norm": 0.21522057056427002, "learning_rate": 1e-05, "loss": 0.9699, "step": 103475 }, { "epoch": 91.6563330380868, "grad_norm": 0.2449258714914322, "learning_rate": 1e-05, "loss": 0.9972, "step": 103480 }, { "epoch": 91.66076173604961, "grad_norm": 0.2270217090845108, "learning_rate": 1e-05, "loss": 0.914, "step": 103485 }, { "epoch": 91.6651904340124, "grad_norm": 0.23899859189987183, "learning_rate": 1e-05, "loss": 0.9193, "step": 103490 }, { "epoch": 91.6696191319752, "grad_norm": 0.26138898730278015, "learning_rate": 1e-05, "loss": 0.9661, "step": 103495 }, { "epoch": 91.674047829938, "grad_norm": 0.2310044914484024, "learning_rate": 1e-05, "loss": 0.9162, "step": 103500 }, { "epoch": 91.6784765279008, "grad_norm": 0.24325542151927948, "learning_rate": 1e-05, "loss": 0.9016, "step": 103505 }, { "epoch": 91.68290522586359, "grad_norm": 0.2700601816177368, "learning_rate": 1e-05, "loss": 0.9657, "step": 103510 }, { "epoch": 91.6873339238264, "grad_norm": 0.2320859283208847, "learning_rate": 1e-05, "loss": 0.9421, "step": 103515 }, { "epoch": 91.6917626217892, "grad_norm": 0.21339771151542664, "learning_rate": 1e-05, "loss": 0.9579, "step": 103520 }, { "epoch": 91.69619131975199, "grad_norm": 0.2141464501619339, "learning_rate": 1e-05, "loss": 0.9615, "step": 103525 }, { "epoch": 91.7006200177148, "grad_norm": 0.2622971832752228, "learning_rate": 1e-05, "loss": 0.9409, "step": 103530 }, { "epoch": 91.70504871567759, "grad_norm": 0.2728383243083954, "learning_rate": 1e-05, "loss": 0.9785, "step": 103535 }, { "epoch": 91.70947741364039, "grad_norm": 0.27427875995635986, "learning_rate": 1e-05, "loss": 0.9449, "step": 103540 }, { "epoch": 91.7139061116032, "grad_norm": 0.19493287801742554, "learning_rate": 1e-05, "loss": 0.9703, "step": 103545 }, { "epoch": 91.71833480956599, "grad_norm": 0.23417465388774872, "learning_rate": 1e-05, "loss": 0.9571, "step": 103550 }, { "epoch": 91.72276350752878, "grad_norm": 0.2390751987695694, "learning_rate": 1e-05, "loss": 0.9689, "step": 103555 }, { "epoch": 91.72719220549159, "grad_norm": 0.2476937621831894, "learning_rate": 1e-05, "loss": 0.9995, "step": 103560 }, { "epoch": 91.73162090345438, "grad_norm": 0.22002314031124115, "learning_rate": 1e-05, "loss": 0.9496, "step": 103565 }, { "epoch": 91.73604960141718, "grad_norm": 0.23944716155529022, "learning_rate": 1e-05, "loss": 0.9612, "step": 103570 }, { "epoch": 91.74047829937999, "grad_norm": 0.24306169152259827, "learning_rate": 1e-05, "loss": 0.9487, "step": 103575 }, { "epoch": 91.74490699734278, "grad_norm": 0.25580257177352905, "learning_rate": 1e-05, "loss": 0.9136, "step": 103580 }, { "epoch": 91.74933569530558, "grad_norm": 0.22761861979961395, "learning_rate": 1e-05, "loss": 0.9382, "step": 103585 }, { "epoch": 91.75376439326838, "grad_norm": 0.23008909821510315, "learning_rate": 1e-05, "loss": 0.9672, "step": 103590 }, { "epoch": 91.75819309123118, "grad_norm": 0.22540812194347382, "learning_rate": 1e-05, "loss": 0.972, "step": 103595 }, { "epoch": 91.76262178919397, "grad_norm": 0.2673172056674957, "learning_rate": 1e-05, "loss": 0.9972, "step": 103600 }, { "epoch": 91.76705048715678, "grad_norm": 0.2583169937133789, "learning_rate": 1e-05, "loss": 0.9666, "step": 103605 }, { "epoch": 91.77147918511957, "grad_norm": 0.2722247838973999, "learning_rate": 1e-05, "loss": 0.9429, "step": 103610 }, { "epoch": 91.77590788308237, "grad_norm": 0.22237414121627808, "learning_rate": 1e-05, "loss": 1.0068, "step": 103615 }, { "epoch": 91.78033658104518, "grad_norm": 0.28582963347435, "learning_rate": 1e-05, "loss": 0.9446, "step": 103620 }, { "epoch": 91.78476527900797, "grad_norm": 0.26985061168670654, "learning_rate": 1e-05, "loss": 0.9357, "step": 103625 }, { "epoch": 91.78919397697076, "grad_norm": 0.26526930928230286, "learning_rate": 1e-05, "loss": 1.0225, "step": 103630 }, { "epoch": 91.79362267493357, "grad_norm": 0.24199633300304413, "learning_rate": 1e-05, "loss": 0.9949, "step": 103635 }, { "epoch": 91.79805137289637, "grad_norm": 0.2448570430278778, "learning_rate": 1e-05, "loss": 0.9761, "step": 103640 }, { "epoch": 91.80248007085916, "grad_norm": 0.22262941300868988, "learning_rate": 1e-05, "loss": 0.9676, "step": 103645 }, { "epoch": 91.80690876882197, "grad_norm": 0.20818395912647247, "learning_rate": 1e-05, "loss": 0.9157, "step": 103650 }, { "epoch": 91.81133746678476, "grad_norm": 0.28213945031166077, "learning_rate": 1e-05, "loss": 0.921, "step": 103655 }, { "epoch": 91.81576616474756, "grad_norm": 0.24637234210968018, "learning_rate": 1e-05, "loss": 0.9128, "step": 103660 }, { "epoch": 91.82019486271037, "grad_norm": 0.2656781077384949, "learning_rate": 1e-05, "loss": 0.9492, "step": 103665 }, { "epoch": 91.82462356067316, "grad_norm": 0.24238321185112, "learning_rate": 1e-05, "loss": 0.9868, "step": 103670 }, { "epoch": 91.82905225863595, "grad_norm": 0.25143301486968994, "learning_rate": 1e-05, "loss": 0.9555, "step": 103675 }, { "epoch": 91.83348095659876, "grad_norm": 0.2636754512786865, "learning_rate": 1e-05, "loss": 0.9754, "step": 103680 }, { "epoch": 91.83790965456156, "grad_norm": 0.27992525696754456, "learning_rate": 1e-05, "loss": 0.9658, "step": 103685 }, { "epoch": 91.84233835252435, "grad_norm": 0.21452893316745758, "learning_rate": 1e-05, "loss": 0.9879, "step": 103690 }, { "epoch": 91.84676705048716, "grad_norm": 0.23102730512619019, "learning_rate": 1e-05, "loss": 0.9692, "step": 103695 }, { "epoch": 91.85119574844995, "grad_norm": 0.2500423789024353, "learning_rate": 1e-05, "loss": 0.9481, "step": 103700 }, { "epoch": 91.85562444641276, "grad_norm": 0.21993277966976166, "learning_rate": 1e-05, "loss": 0.9203, "step": 103705 }, { "epoch": 91.86005314437556, "grad_norm": 0.2363894283771515, "learning_rate": 1e-05, "loss": 0.9825, "step": 103710 }, { "epoch": 91.86448184233835, "grad_norm": 0.2259645015001297, "learning_rate": 1e-05, "loss": 0.9921, "step": 103715 }, { "epoch": 91.86891054030116, "grad_norm": 0.22022132575511932, "learning_rate": 1e-05, "loss": 0.9805, "step": 103720 }, { "epoch": 91.87333923826395, "grad_norm": 0.2187359780073166, "learning_rate": 1e-05, "loss": 0.986, "step": 103725 }, { "epoch": 91.87776793622675, "grad_norm": 0.23153722286224365, "learning_rate": 1e-05, "loss": 0.9774, "step": 103730 }, { "epoch": 91.88219663418955, "grad_norm": 0.26593056321144104, "learning_rate": 1e-05, "loss": 0.9583, "step": 103735 }, { "epoch": 91.88662533215235, "grad_norm": 0.24761411547660828, "learning_rate": 1e-05, "loss": 0.9157, "step": 103740 }, { "epoch": 91.89105403011514, "grad_norm": 0.19420082867145538, "learning_rate": 1e-05, "loss": 0.9711, "step": 103745 }, { "epoch": 91.89548272807795, "grad_norm": 0.23348109424114227, "learning_rate": 1e-05, "loss": 1.0093, "step": 103750 }, { "epoch": 91.89991142604075, "grad_norm": 0.23166079819202423, "learning_rate": 1e-05, "loss": 0.9468, "step": 103755 }, { "epoch": 91.90434012400354, "grad_norm": 0.23135671019554138, "learning_rate": 1e-05, "loss": 0.912, "step": 103760 }, { "epoch": 91.90876882196635, "grad_norm": 0.2203824818134308, "learning_rate": 1e-05, "loss": 0.9474, "step": 103765 }, { "epoch": 91.91319751992914, "grad_norm": 0.23650586605072021, "learning_rate": 1e-05, "loss": 0.9542, "step": 103770 }, { "epoch": 91.91762621789194, "grad_norm": 0.19482700526714325, "learning_rate": 1e-05, "loss": 0.9633, "step": 103775 }, { "epoch": 91.92205491585474, "grad_norm": 0.21504123508930206, "learning_rate": 1e-05, "loss": 1.0577, "step": 103780 }, { "epoch": 91.92648361381754, "grad_norm": 0.21181507408618927, "learning_rate": 1e-05, "loss": 0.9767, "step": 103785 }, { "epoch": 91.93091231178033, "grad_norm": 0.20825058221817017, "learning_rate": 1e-05, "loss": 0.9533, "step": 103790 }, { "epoch": 91.93534100974314, "grad_norm": 0.2213062196969986, "learning_rate": 1e-05, "loss": 0.9218, "step": 103795 }, { "epoch": 91.93976970770593, "grad_norm": 0.22234788537025452, "learning_rate": 1e-05, "loss": 0.9771, "step": 103800 }, { "epoch": 91.94419840566873, "grad_norm": 0.21452941000461578, "learning_rate": 1e-05, "loss": 0.9097, "step": 103805 }, { "epoch": 91.94862710363154, "grad_norm": 0.22836118936538696, "learning_rate": 1e-05, "loss": 0.9859, "step": 103810 }, { "epoch": 91.95305580159433, "grad_norm": 0.22469556331634521, "learning_rate": 1e-05, "loss": 0.9501, "step": 103815 }, { "epoch": 91.95748449955713, "grad_norm": 0.23338736593723297, "learning_rate": 1e-05, "loss": 0.9441, "step": 103820 }, { "epoch": 91.96191319751993, "grad_norm": 0.24340717494487762, "learning_rate": 1e-05, "loss": 0.9377, "step": 103825 }, { "epoch": 91.96634189548273, "grad_norm": 0.22474537789821625, "learning_rate": 1e-05, "loss": 0.9551, "step": 103830 }, { "epoch": 91.97077059344552, "grad_norm": 0.27064454555511475, "learning_rate": 1e-05, "loss": 1.008, "step": 103835 }, { "epoch": 91.97519929140833, "grad_norm": 0.30823981761932373, "learning_rate": 1e-05, "loss": 0.9704, "step": 103840 }, { "epoch": 91.97962798937112, "grad_norm": 0.24128656089305878, "learning_rate": 1e-05, "loss": 0.9613, "step": 103845 }, { "epoch": 91.98405668733392, "grad_norm": 0.2586424648761749, "learning_rate": 1e-05, "loss": 0.8864, "step": 103850 }, { "epoch": 91.98848538529673, "grad_norm": 0.2608751654624939, "learning_rate": 1e-05, "loss": 0.9365, "step": 103855 }, { "epoch": 91.99291408325952, "grad_norm": 0.26205989718437195, "learning_rate": 1e-05, "loss": 0.9626, "step": 103860 }, { "epoch": 91.99734278122232, "grad_norm": 0.22395013272762299, "learning_rate": 1e-05, "loss": 0.9374, "step": 103865 }, { "epoch": 92.00177147918512, "grad_norm": 0.23495995998382568, "learning_rate": 1e-05, "loss": 0.9755, "step": 103870 }, { "epoch": 92.00620017714792, "grad_norm": 0.24605832993984222, "learning_rate": 1e-05, "loss": 0.9256, "step": 103875 }, { "epoch": 92.01062887511071, "grad_norm": 0.24388107657432556, "learning_rate": 1e-05, "loss": 0.9763, "step": 103880 }, { "epoch": 92.01505757307352, "grad_norm": 0.22551771998405457, "learning_rate": 1e-05, "loss": 0.9773, "step": 103885 }, { "epoch": 92.01948627103631, "grad_norm": 0.20739533007144928, "learning_rate": 1e-05, "loss": 0.9462, "step": 103890 }, { "epoch": 92.02391496899911, "grad_norm": 0.208740696310997, "learning_rate": 1e-05, "loss": 0.9592, "step": 103895 }, { "epoch": 92.02834366696192, "grad_norm": 0.24853219091892242, "learning_rate": 1e-05, "loss": 0.8905, "step": 103900 }, { "epoch": 92.03277236492471, "grad_norm": 0.23374173045158386, "learning_rate": 1e-05, "loss": 0.9662, "step": 103905 }, { "epoch": 92.0372010628875, "grad_norm": 0.25845223665237427, "learning_rate": 1e-05, "loss": 0.942, "step": 103910 }, { "epoch": 92.04162976085031, "grad_norm": 0.242400661110878, "learning_rate": 1e-05, "loss": 0.9494, "step": 103915 }, { "epoch": 92.0460584588131, "grad_norm": 0.2653234899044037, "learning_rate": 1e-05, "loss": 0.8951, "step": 103920 }, { "epoch": 92.0504871567759, "grad_norm": 0.23645661771297455, "learning_rate": 1e-05, "loss": 1.0128, "step": 103925 }, { "epoch": 92.05491585473871, "grad_norm": 0.2464582622051239, "learning_rate": 1e-05, "loss": 0.9539, "step": 103930 }, { "epoch": 92.0593445527015, "grad_norm": 0.23376061022281647, "learning_rate": 1e-05, "loss": 0.9811, "step": 103935 }, { "epoch": 92.0637732506643, "grad_norm": 0.26240673661231995, "learning_rate": 1e-05, "loss": 0.919, "step": 103940 }, { "epoch": 92.0682019486271, "grad_norm": 0.26038098335266113, "learning_rate": 1e-05, "loss": 0.9553, "step": 103945 }, { "epoch": 92.0726306465899, "grad_norm": 0.21223558485507965, "learning_rate": 1e-05, "loss": 0.9302, "step": 103950 }, { "epoch": 92.07705934455271, "grad_norm": 0.23779959976673126, "learning_rate": 1e-05, "loss": 0.9981, "step": 103955 }, { "epoch": 92.0814880425155, "grad_norm": 0.27635443210601807, "learning_rate": 1e-05, "loss": 0.9162, "step": 103960 }, { "epoch": 92.0859167404783, "grad_norm": 0.2691574990749359, "learning_rate": 1e-05, "loss": 1.0295, "step": 103965 }, { "epoch": 92.0903454384411, "grad_norm": 0.24721093475818634, "learning_rate": 1e-05, "loss": 0.9039, "step": 103970 }, { "epoch": 92.0947741364039, "grad_norm": 0.28207874298095703, "learning_rate": 1e-05, "loss": 0.9988, "step": 103975 }, { "epoch": 92.0992028343667, "grad_norm": 0.2782837152481079, "learning_rate": 1e-05, "loss": 0.9741, "step": 103980 }, { "epoch": 92.1036315323295, "grad_norm": 0.25969675183296204, "learning_rate": 1e-05, "loss": 0.9766, "step": 103985 }, { "epoch": 92.1080602302923, "grad_norm": 0.2584705054759979, "learning_rate": 1e-05, "loss": 0.932, "step": 103990 }, { "epoch": 92.11248892825509, "grad_norm": 0.20821960270404816, "learning_rate": 1e-05, "loss": 0.9322, "step": 103995 }, { "epoch": 92.1169176262179, "grad_norm": 0.2077736109495163, "learning_rate": 1e-05, "loss": 0.9406, "step": 104000 }, { "epoch": 92.12134632418069, "grad_norm": 0.23634015023708344, "learning_rate": 1e-05, "loss": 1.0244, "step": 104005 }, { "epoch": 92.12577502214349, "grad_norm": 0.23706485331058502, "learning_rate": 1e-05, "loss": 0.9369, "step": 104010 }, { "epoch": 92.1302037201063, "grad_norm": 0.25284987688064575, "learning_rate": 1e-05, "loss": 0.9888, "step": 104015 }, { "epoch": 92.13463241806909, "grad_norm": 0.237601637840271, "learning_rate": 1e-05, "loss": 0.9696, "step": 104020 }, { "epoch": 92.13906111603188, "grad_norm": 0.23123103380203247, "learning_rate": 1e-05, "loss": 0.9749, "step": 104025 }, { "epoch": 92.14348981399469, "grad_norm": 0.2782042920589447, "learning_rate": 1e-05, "loss": 0.925, "step": 104030 }, { "epoch": 92.14791851195749, "grad_norm": 0.2409246861934662, "learning_rate": 1e-05, "loss": 0.9822, "step": 104035 }, { "epoch": 92.15234720992028, "grad_norm": 0.2444685846567154, "learning_rate": 1e-05, "loss": 0.9336, "step": 104040 }, { "epoch": 92.15677590788309, "grad_norm": 0.24705785512924194, "learning_rate": 1e-05, "loss": 0.9748, "step": 104045 }, { "epoch": 92.16120460584588, "grad_norm": 0.24171793460845947, "learning_rate": 1e-05, "loss": 0.8968, "step": 104050 }, { "epoch": 92.16563330380868, "grad_norm": 0.2305198609828949, "learning_rate": 1e-05, "loss": 0.9433, "step": 104055 }, { "epoch": 92.17006200177148, "grad_norm": 0.26496249437332153, "learning_rate": 1e-05, "loss": 0.9613, "step": 104060 }, { "epoch": 92.17449069973428, "grad_norm": 0.2260299175977707, "learning_rate": 1e-05, "loss": 0.9701, "step": 104065 }, { "epoch": 92.17891939769707, "grad_norm": 0.22638362646102905, "learning_rate": 1e-05, "loss": 0.9816, "step": 104070 }, { "epoch": 92.18334809565988, "grad_norm": 0.2787269353866577, "learning_rate": 1e-05, "loss": 0.9505, "step": 104075 }, { "epoch": 92.18777679362267, "grad_norm": 0.22965730726718903, "learning_rate": 1e-05, "loss": 0.9902, "step": 104080 }, { "epoch": 92.19220549158547, "grad_norm": 0.24454183876514435, "learning_rate": 1e-05, "loss": 0.9691, "step": 104085 }, { "epoch": 92.19663418954828, "grad_norm": 0.21229122579097748, "learning_rate": 1e-05, "loss": 0.9501, "step": 104090 }, { "epoch": 92.20106288751107, "grad_norm": 0.24226590991020203, "learning_rate": 1e-05, "loss": 0.9482, "step": 104095 }, { "epoch": 92.20549158547387, "grad_norm": 0.21497997641563416, "learning_rate": 1e-05, "loss": 0.9526, "step": 104100 }, { "epoch": 92.20992028343667, "grad_norm": 0.20268036425113678, "learning_rate": 1e-05, "loss": 0.9449, "step": 104105 }, { "epoch": 92.21434898139947, "grad_norm": 0.28298458456993103, "learning_rate": 1e-05, "loss": 0.9509, "step": 104110 }, { "epoch": 92.21877767936226, "grad_norm": 0.28500646352767944, "learning_rate": 1e-05, "loss": 0.9689, "step": 104115 }, { "epoch": 92.22320637732507, "grad_norm": 0.23234178125858307, "learning_rate": 1e-05, "loss": 0.9196, "step": 104120 }, { "epoch": 92.22763507528786, "grad_norm": 0.23796726763248444, "learning_rate": 1e-05, "loss": 0.9465, "step": 104125 }, { "epoch": 92.23206377325066, "grad_norm": 0.25222048163414, "learning_rate": 1e-05, "loss": 0.9443, "step": 104130 }, { "epoch": 92.23649247121347, "grad_norm": 0.3183692395687103, "learning_rate": 1e-05, "loss": 0.9542, "step": 104135 }, { "epoch": 92.24092116917626, "grad_norm": 0.27457526326179504, "learning_rate": 1e-05, "loss": 0.956, "step": 104140 }, { "epoch": 92.24534986713905, "grad_norm": 0.23743785917758942, "learning_rate": 1e-05, "loss": 0.9333, "step": 104145 }, { "epoch": 92.24977856510186, "grad_norm": 0.2591595947742462, "learning_rate": 1e-05, "loss": 0.9337, "step": 104150 }, { "epoch": 92.25420726306466, "grad_norm": 0.24182827770709991, "learning_rate": 1e-05, "loss": 0.9306, "step": 104155 }, { "epoch": 92.25863596102745, "grad_norm": 0.25132352113723755, "learning_rate": 1e-05, "loss": 0.972, "step": 104160 }, { "epoch": 92.26306465899026, "grad_norm": 0.264423131942749, "learning_rate": 1e-05, "loss": 0.9707, "step": 104165 }, { "epoch": 92.26749335695305, "grad_norm": 0.24574121832847595, "learning_rate": 1e-05, "loss": 1.0029, "step": 104170 }, { "epoch": 92.27192205491585, "grad_norm": 0.20907630026340485, "learning_rate": 1e-05, "loss": 0.9443, "step": 104175 }, { "epoch": 92.27635075287866, "grad_norm": 0.23272904753684998, "learning_rate": 1e-05, "loss": 0.9395, "step": 104180 }, { "epoch": 92.28077945084145, "grad_norm": 0.22407276928424835, "learning_rate": 1e-05, "loss": 0.9541, "step": 104185 }, { "epoch": 92.28520814880426, "grad_norm": 0.23466186225414276, "learning_rate": 1e-05, "loss": 0.9596, "step": 104190 }, { "epoch": 92.28963684676705, "grad_norm": 0.24560891091823578, "learning_rate": 1e-05, "loss": 0.9722, "step": 104195 }, { "epoch": 92.29406554472985, "grad_norm": 0.22800418734550476, "learning_rate": 1e-05, "loss": 0.9765, "step": 104200 }, { "epoch": 92.29849424269266, "grad_norm": 0.22940276563167572, "learning_rate": 1e-05, "loss": 0.9699, "step": 104205 }, { "epoch": 92.30292294065545, "grad_norm": 0.19750966131687164, "learning_rate": 1e-05, "loss": 0.951, "step": 104210 }, { "epoch": 92.30735163861824, "grad_norm": 0.24489665031433105, "learning_rate": 1e-05, "loss": 0.9922, "step": 104215 }, { "epoch": 92.31178033658105, "grad_norm": 0.2556707561016083, "learning_rate": 1e-05, "loss": 0.9666, "step": 104220 }, { "epoch": 92.31620903454385, "grad_norm": 0.250402569770813, "learning_rate": 1e-05, "loss": 0.9499, "step": 104225 }, { "epoch": 92.32063773250664, "grad_norm": 0.23689934611320496, "learning_rate": 1e-05, "loss": 0.9476, "step": 104230 }, { "epoch": 92.32506643046945, "grad_norm": 0.24054846167564392, "learning_rate": 1e-05, "loss": 0.9899, "step": 104235 }, { "epoch": 92.32949512843224, "grad_norm": 0.2402598112821579, "learning_rate": 1e-05, "loss": 0.9695, "step": 104240 }, { "epoch": 92.33392382639504, "grad_norm": 0.22055096924304962, "learning_rate": 1e-05, "loss": 0.9982, "step": 104245 }, { "epoch": 92.33835252435784, "grad_norm": 0.24660301208496094, "learning_rate": 1e-05, "loss": 0.9989, "step": 104250 }, { "epoch": 92.34278122232064, "grad_norm": 0.2818809747695923, "learning_rate": 1e-05, "loss": 0.9886, "step": 104255 }, { "epoch": 92.34720992028343, "grad_norm": 0.21271498501300812, "learning_rate": 1e-05, "loss": 0.9904, "step": 104260 }, { "epoch": 92.35163861824624, "grad_norm": 0.22835443913936615, "learning_rate": 1e-05, "loss": 0.9533, "step": 104265 }, { "epoch": 92.35606731620904, "grad_norm": 0.25345247983932495, "learning_rate": 1e-05, "loss": 0.9174, "step": 104270 }, { "epoch": 92.36049601417183, "grad_norm": 0.21460489928722382, "learning_rate": 1e-05, "loss": 0.9401, "step": 104275 }, { "epoch": 92.36492471213464, "grad_norm": 0.2641546130180359, "learning_rate": 1e-05, "loss": 0.9494, "step": 104280 }, { "epoch": 92.36935341009743, "grad_norm": 0.23895984888076782, "learning_rate": 1e-05, "loss": 0.9868, "step": 104285 }, { "epoch": 92.37378210806023, "grad_norm": 0.23548202216625214, "learning_rate": 1e-05, "loss": 0.9542, "step": 104290 }, { "epoch": 92.37821080602303, "grad_norm": 0.2439844310283661, "learning_rate": 1e-05, "loss": 0.9226, "step": 104295 }, { "epoch": 92.38263950398583, "grad_norm": 0.21296456456184387, "learning_rate": 1e-05, "loss": 0.913, "step": 104300 }, { "epoch": 92.38706820194862, "grad_norm": 0.22155778110027313, "learning_rate": 1e-05, "loss": 0.9672, "step": 104305 }, { "epoch": 92.39149689991143, "grad_norm": 0.3749118745326996, "learning_rate": 1e-05, "loss": 0.9525, "step": 104310 }, { "epoch": 92.39592559787422, "grad_norm": 0.27496710419654846, "learning_rate": 1e-05, "loss": 0.9736, "step": 104315 }, { "epoch": 92.40035429583702, "grad_norm": 0.22657912969589233, "learning_rate": 1e-05, "loss": 0.9478, "step": 104320 }, { "epoch": 92.40478299379983, "grad_norm": 0.23495355248451233, "learning_rate": 1e-05, "loss": 0.8912, "step": 104325 }, { "epoch": 92.40921169176262, "grad_norm": 0.25046372413635254, "learning_rate": 1e-05, "loss": 0.9639, "step": 104330 }, { "epoch": 92.41364038972542, "grad_norm": 0.20248231291770935, "learning_rate": 1e-05, "loss": 0.9558, "step": 104335 }, { "epoch": 92.41806908768822, "grad_norm": 0.2215609848499298, "learning_rate": 1e-05, "loss": 0.9854, "step": 104340 }, { "epoch": 92.42249778565102, "grad_norm": 0.20338699221611023, "learning_rate": 1e-05, "loss": 0.9927, "step": 104345 }, { "epoch": 92.42692648361381, "grad_norm": 0.23027391731739044, "learning_rate": 1e-05, "loss": 0.9075, "step": 104350 }, { "epoch": 92.43135518157662, "grad_norm": 0.2456299215555191, "learning_rate": 1e-05, "loss": 0.8935, "step": 104355 }, { "epoch": 92.43578387953941, "grad_norm": 0.234702467918396, "learning_rate": 1e-05, "loss": 0.918, "step": 104360 }, { "epoch": 92.44021257750221, "grad_norm": 0.23205651342868805, "learning_rate": 1e-05, "loss": 0.8771, "step": 104365 }, { "epoch": 92.44464127546502, "grad_norm": 0.25196555256843567, "learning_rate": 1e-05, "loss": 0.9523, "step": 104370 }, { "epoch": 92.44906997342781, "grad_norm": 0.22299695014953613, "learning_rate": 1e-05, "loss": 0.9619, "step": 104375 }, { "epoch": 92.4534986713906, "grad_norm": 0.2515668272972107, "learning_rate": 1e-05, "loss": 0.9692, "step": 104380 }, { "epoch": 92.45792736935341, "grad_norm": 0.22013632953166962, "learning_rate": 1e-05, "loss": 0.9315, "step": 104385 }, { "epoch": 92.46235606731621, "grad_norm": 0.2658098042011261, "learning_rate": 1e-05, "loss": 0.906, "step": 104390 }, { "epoch": 92.466784765279, "grad_norm": 0.23746754229068756, "learning_rate": 1e-05, "loss": 0.9506, "step": 104395 }, { "epoch": 92.47121346324181, "grad_norm": 0.22035254538059235, "learning_rate": 1e-05, "loss": 0.9205, "step": 104400 }, { "epoch": 92.4756421612046, "grad_norm": 0.23228588700294495, "learning_rate": 1e-05, "loss": 0.9822, "step": 104405 }, { "epoch": 92.4800708591674, "grad_norm": 0.22178740799427032, "learning_rate": 1e-05, "loss": 0.8872, "step": 104410 }, { "epoch": 92.4844995571302, "grad_norm": 0.23507189750671387, "learning_rate": 1e-05, "loss": 0.952, "step": 104415 }, { "epoch": 92.488928255093, "grad_norm": 0.29131391644477844, "learning_rate": 1e-05, "loss": 0.9656, "step": 104420 }, { "epoch": 92.4933569530558, "grad_norm": 0.26209887862205505, "learning_rate": 1e-05, "loss": 1.0061, "step": 104425 }, { "epoch": 92.4977856510186, "grad_norm": 0.2290084809064865, "learning_rate": 1e-05, "loss": 0.958, "step": 104430 }, { "epoch": 92.5022143489814, "grad_norm": 0.25928211212158203, "learning_rate": 1e-05, "loss": 0.9851, "step": 104435 }, { "epoch": 92.5066430469442, "grad_norm": 0.24726904928684235, "learning_rate": 1e-05, "loss": 0.9738, "step": 104440 }, { "epoch": 92.511071744907, "grad_norm": 0.2822262644767761, "learning_rate": 1e-05, "loss": 0.928, "step": 104445 }, { "epoch": 92.5155004428698, "grad_norm": 0.22445835173130035, "learning_rate": 1e-05, "loss": 0.9345, "step": 104450 }, { "epoch": 92.5199291408326, "grad_norm": 0.22521314024925232, "learning_rate": 1e-05, "loss": 0.9617, "step": 104455 }, { "epoch": 92.5243578387954, "grad_norm": 0.24269069731235504, "learning_rate": 1e-05, "loss": 0.9575, "step": 104460 }, { "epoch": 92.52878653675819, "grad_norm": 0.23524372279644012, "learning_rate": 1e-05, "loss": 0.9318, "step": 104465 }, { "epoch": 92.533215234721, "grad_norm": 0.26258307695388794, "learning_rate": 1e-05, "loss": 0.9063, "step": 104470 }, { "epoch": 92.53764393268379, "grad_norm": 0.22780494391918182, "learning_rate": 1e-05, "loss": 0.9871, "step": 104475 }, { "epoch": 92.54207263064659, "grad_norm": 0.23488205671310425, "learning_rate": 1e-05, "loss": 0.94, "step": 104480 }, { "epoch": 92.5465013286094, "grad_norm": 0.24419263005256653, "learning_rate": 1e-05, "loss": 0.9452, "step": 104485 }, { "epoch": 92.55093002657219, "grad_norm": 0.22354215383529663, "learning_rate": 1e-05, "loss": 0.9868, "step": 104490 }, { "epoch": 92.55535872453498, "grad_norm": 0.27550143003463745, "learning_rate": 1e-05, "loss": 0.9752, "step": 104495 }, { "epoch": 92.55978742249779, "grad_norm": 0.2394014596939087, "learning_rate": 1e-05, "loss": 0.9462, "step": 104500 }, { "epoch": 92.56421612046059, "grad_norm": 0.25666239857673645, "learning_rate": 1e-05, "loss": 1.0033, "step": 104505 }, { "epoch": 92.56864481842338, "grad_norm": 0.22383306920528412, "learning_rate": 1e-05, "loss": 0.9823, "step": 104510 }, { "epoch": 92.57307351638619, "grad_norm": 0.2127399742603302, "learning_rate": 1e-05, "loss": 0.9566, "step": 104515 }, { "epoch": 92.57750221434898, "grad_norm": 0.2735644578933716, "learning_rate": 1e-05, "loss": 0.9033, "step": 104520 }, { "epoch": 92.58193091231178, "grad_norm": 0.2298261821269989, "learning_rate": 1e-05, "loss": 0.9761, "step": 104525 }, { "epoch": 92.58635961027458, "grad_norm": 0.2576485574245453, "learning_rate": 1e-05, "loss": 1.0044, "step": 104530 }, { "epoch": 92.59078830823738, "grad_norm": 0.2730349600315094, "learning_rate": 1e-05, "loss": 0.9531, "step": 104535 }, { "epoch": 92.59521700620017, "grad_norm": 0.20015253126621246, "learning_rate": 1e-05, "loss": 0.9338, "step": 104540 }, { "epoch": 92.59964570416298, "grad_norm": 0.2402438223361969, "learning_rate": 1e-05, "loss": 0.927, "step": 104545 }, { "epoch": 92.60407440212578, "grad_norm": 0.2422506958246231, "learning_rate": 1e-05, "loss": 0.9222, "step": 104550 }, { "epoch": 92.60850310008857, "grad_norm": 0.23408442735671997, "learning_rate": 1e-05, "loss": 0.989, "step": 104555 }, { "epoch": 92.61293179805138, "grad_norm": 0.25045037269592285, "learning_rate": 1e-05, "loss": 0.9664, "step": 104560 }, { "epoch": 92.61736049601417, "grad_norm": 0.28587305545806885, "learning_rate": 1e-05, "loss": 0.9864, "step": 104565 }, { "epoch": 92.62178919397697, "grad_norm": 0.2303403615951538, "learning_rate": 1e-05, "loss": 0.9631, "step": 104570 }, { "epoch": 92.62621789193977, "grad_norm": 0.22380352020263672, "learning_rate": 1e-05, "loss": 0.9925, "step": 104575 }, { "epoch": 92.63064658990257, "grad_norm": 0.25911083817481995, "learning_rate": 1e-05, "loss": 0.97, "step": 104580 }, { "epoch": 92.63507528786536, "grad_norm": 0.2241116464138031, "learning_rate": 1e-05, "loss": 0.9804, "step": 104585 }, { "epoch": 92.63950398582817, "grad_norm": 0.2572423815727234, "learning_rate": 1e-05, "loss": 0.9535, "step": 104590 }, { "epoch": 92.64393268379096, "grad_norm": 0.26465338468551636, "learning_rate": 1e-05, "loss": 0.9548, "step": 104595 }, { "epoch": 92.64836138175376, "grad_norm": 0.24486985802650452, "learning_rate": 1e-05, "loss": 0.9796, "step": 104600 }, { "epoch": 92.65279007971657, "grad_norm": 0.20631392300128937, "learning_rate": 1e-05, "loss": 0.9405, "step": 104605 }, { "epoch": 92.65721877767936, "grad_norm": 0.19914941489696503, "learning_rate": 1e-05, "loss": 0.9466, "step": 104610 }, { "epoch": 92.66164747564216, "grad_norm": 0.22169391810894012, "learning_rate": 1e-05, "loss": 0.9626, "step": 104615 }, { "epoch": 92.66607617360496, "grad_norm": 0.24760858714580536, "learning_rate": 1e-05, "loss": 0.9445, "step": 104620 }, { "epoch": 92.67050487156776, "grad_norm": 0.2333918660879135, "learning_rate": 1e-05, "loss": 0.9678, "step": 104625 }, { "epoch": 92.67493356953055, "grad_norm": 0.24727186560630798, "learning_rate": 1e-05, "loss": 0.9452, "step": 104630 }, { "epoch": 92.67936226749336, "grad_norm": 0.2489270269870758, "learning_rate": 1e-05, "loss": 0.9886, "step": 104635 }, { "epoch": 92.68379096545615, "grad_norm": 0.2481164187192917, "learning_rate": 1e-05, "loss": 0.931, "step": 104640 }, { "epoch": 92.68821966341895, "grad_norm": 0.23372292518615723, "learning_rate": 1e-05, "loss": 0.9598, "step": 104645 }, { "epoch": 92.69264836138176, "grad_norm": 0.21750441193580627, "learning_rate": 1e-05, "loss": 0.9057, "step": 104650 }, { "epoch": 92.69707705934455, "grad_norm": 0.20124943554401398, "learning_rate": 1e-05, "loss": 0.9453, "step": 104655 }, { "epoch": 92.70150575730734, "grad_norm": 0.2387171983718872, "learning_rate": 1e-05, "loss": 0.9782, "step": 104660 }, { "epoch": 92.70593445527015, "grad_norm": 0.2116619199514389, "learning_rate": 1e-05, "loss": 0.9685, "step": 104665 }, { "epoch": 92.71036315323295, "grad_norm": 0.2503810226917267, "learning_rate": 1e-05, "loss": 1.0141, "step": 104670 }, { "epoch": 92.71479185119574, "grad_norm": 0.23251168429851532, "learning_rate": 1e-05, "loss": 1.005, "step": 104675 }, { "epoch": 92.71922054915855, "grad_norm": 0.21455849707126617, "learning_rate": 1e-05, "loss": 0.9884, "step": 104680 }, { "epoch": 92.72364924712134, "grad_norm": 0.23423728346824646, "learning_rate": 1e-05, "loss": 0.9852, "step": 104685 }, { "epoch": 92.72807794508415, "grad_norm": 0.19891810417175293, "learning_rate": 1e-05, "loss": 0.9668, "step": 104690 }, { "epoch": 92.73250664304695, "grad_norm": 0.27176955342292786, "learning_rate": 1e-05, "loss": 0.9703, "step": 104695 }, { "epoch": 92.73693534100974, "grad_norm": 0.2571922242641449, "learning_rate": 1e-05, "loss": 0.9446, "step": 104700 }, { "epoch": 92.74136403897255, "grad_norm": 0.27428558468818665, "learning_rate": 1e-05, "loss": 0.9636, "step": 104705 }, { "epoch": 92.74579273693534, "grad_norm": 0.20511743426322937, "learning_rate": 1e-05, "loss": 0.9602, "step": 104710 }, { "epoch": 92.75022143489814, "grad_norm": 0.22100993990898132, "learning_rate": 1e-05, "loss": 1.022, "step": 104715 }, { "epoch": 92.75465013286095, "grad_norm": 0.2564641237258911, "learning_rate": 1e-05, "loss": 0.888, "step": 104720 }, { "epoch": 92.75907883082374, "grad_norm": 0.23297759890556335, "learning_rate": 1e-05, "loss": 0.9436, "step": 104725 }, { "epoch": 92.76350752878653, "grad_norm": 0.2554158568382263, "learning_rate": 1e-05, "loss": 0.972, "step": 104730 }, { "epoch": 92.76793622674934, "grad_norm": 0.25017249584198, "learning_rate": 1e-05, "loss": 1.0114, "step": 104735 }, { "epoch": 92.77236492471214, "grad_norm": 0.22326292097568512, "learning_rate": 1e-05, "loss": 1.0144, "step": 104740 }, { "epoch": 92.77679362267493, "grad_norm": 0.24096561968326569, "learning_rate": 1e-05, "loss": 1.0013, "step": 104745 }, { "epoch": 92.78122232063774, "grad_norm": 0.23591643571853638, "learning_rate": 1e-05, "loss": 0.9367, "step": 104750 }, { "epoch": 92.78565101860053, "grad_norm": 0.20652945339679718, "learning_rate": 1e-05, "loss": 0.9519, "step": 104755 }, { "epoch": 92.79007971656333, "grad_norm": 0.2492186576128006, "learning_rate": 1e-05, "loss": 0.9822, "step": 104760 }, { "epoch": 92.79450841452613, "grad_norm": 0.21970874071121216, "learning_rate": 1e-05, "loss": 0.9963, "step": 104765 }, { "epoch": 92.79893711248893, "grad_norm": 0.24520668387413025, "learning_rate": 1e-05, "loss": 0.9379, "step": 104770 }, { "epoch": 92.80336581045172, "grad_norm": 0.22851429879665375, "learning_rate": 1e-05, "loss": 0.9281, "step": 104775 }, { "epoch": 92.80779450841453, "grad_norm": 0.28227218985557556, "learning_rate": 1e-05, "loss": 0.9828, "step": 104780 }, { "epoch": 92.81222320637733, "grad_norm": 0.24791565537452698, "learning_rate": 1e-05, "loss": 0.9757, "step": 104785 }, { "epoch": 92.81665190434012, "grad_norm": 0.40673360228538513, "learning_rate": 1e-05, "loss": 0.9555, "step": 104790 }, { "epoch": 92.82108060230293, "grad_norm": 0.24803990125656128, "learning_rate": 1e-05, "loss": 0.9512, "step": 104795 }, { "epoch": 92.82550930026572, "grad_norm": 0.22825345396995544, "learning_rate": 1e-05, "loss": 0.9578, "step": 104800 }, { "epoch": 92.82993799822852, "grad_norm": 0.26857370138168335, "learning_rate": 1e-05, "loss": 0.9829, "step": 104805 }, { "epoch": 92.83436669619132, "grad_norm": 0.21720892190933228, "learning_rate": 1e-05, "loss": 0.9679, "step": 104810 }, { "epoch": 92.83879539415412, "grad_norm": 0.22743439674377441, "learning_rate": 1e-05, "loss": 0.9408, "step": 104815 }, { "epoch": 92.84322409211691, "grad_norm": 0.21095409989356995, "learning_rate": 1e-05, "loss": 0.9665, "step": 104820 }, { "epoch": 92.84765279007972, "grad_norm": 0.2132745087146759, "learning_rate": 1e-05, "loss": 0.9697, "step": 104825 }, { "epoch": 92.85208148804251, "grad_norm": 0.23872998356819153, "learning_rate": 1e-05, "loss": 0.9505, "step": 104830 }, { "epoch": 92.85651018600531, "grad_norm": 0.2352243810892105, "learning_rate": 1e-05, "loss": 1.0017, "step": 104835 }, { "epoch": 92.86093888396812, "grad_norm": 0.22757011651992798, "learning_rate": 1e-05, "loss": 0.9535, "step": 104840 }, { "epoch": 92.86536758193091, "grad_norm": 0.2358512133359909, "learning_rate": 1e-05, "loss": 0.9058, "step": 104845 }, { "epoch": 92.8697962798937, "grad_norm": 0.2236064225435257, "learning_rate": 1e-05, "loss": 1.0125, "step": 104850 }, { "epoch": 92.87422497785651, "grad_norm": 0.2655746042728424, "learning_rate": 1e-05, "loss": 0.9726, "step": 104855 }, { "epoch": 92.87865367581931, "grad_norm": 0.2977094352245331, "learning_rate": 1e-05, "loss": 0.9496, "step": 104860 }, { "epoch": 92.8830823737821, "grad_norm": 0.2825170159339905, "learning_rate": 1e-05, "loss": 1.0069, "step": 104865 }, { "epoch": 92.88751107174491, "grad_norm": 0.23948055505752563, "learning_rate": 1e-05, "loss": 0.9296, "step": 104870 }, { "epoch": 92.8919397697077, "grad_norm": 0.2645384967327118, "learning_rate": 1e-05, "loss": 0.9211, "step": 104875 }, { "epoch": 92.8963684676705, "grad_norm": 0.27442029118537903, "learning_rate": 1e-05, "loss": 1.0144, "step": 104880 }, { "epoch": 92.9007971656333, "grad_norm": 0.24493233859539032, "learning_rate": 1e-05, "loss": 0.953, "step": 104885 }, { "epoch": 92.9052258635961, "grad_norm": 0.2219218909740448, "learning_rate": 1e-05, "loss": 0.8975, "step": 104890 }, { "epoch": 92.9096545615589, "grad_norm": 0.23851463198661804, "learning_rate": 1e-05, "loss": 0.9674, "step": 104895 }, { "epoch": 92.9140832595217, "grad_norm": 0.21424821019172668, "learning_rate": 1e-05, "loss": 0.9231, "step": 104900 }, { "epoch": 92.9185119574845, "grad_norm": 0.22765296697616577, "learning_rate": 1e-05, "loss": 1.0331, "step": 104905 }, { "epoch": 92.92294065544729, "grad_norm": 0.25646573305130005, "learning_rate": 1e-05, "loss": 0.9473, "step": 104910 }, { "epoch": 92.9273693534101, "grad_norm": 0.21691830456256866, "learning_rate": 1e-05, "loss": 0.8911, "step": 104915 }, { "epoch": 92.9317980513729, "grad_norm": 0.21906930208206177, "learning_rate": 1e-05, "loss": 0.9567, "step": 104920 }, { "epoch": 92.9362267493357, "grad_norm": 0.2170998603105545, "learning_rate": 1e-05, "loss": 0.9599, "step": 104925 }, { "epoch": 92.9406554472985, "grad_norm": 0.23521636426448822, "learning_rate": 1e-05, "loss": 0.9762, "step": 104930 }, { "epoch": 92.94508414526129, "grad_norm": 0.2122250497341156, "learning_rate": 1e-05, "loss": 0.9758, "step": 104935 }, { "epoch": 92.9495128432241, "grad_norm": 0.22160866856575012, "learning_rate": 1e-05, "loss": 0.9526, "step": 104940 }, { "epoch": 92.9539415411869, "grad_norm": 0.22128285467624664, "learning_rate": 1e-05, "loss": 0.9574, "step": 104945 }, { "epoch": 92.95837023914969, "grad_norm": 0.27809882164001465, "learning_rate": 1e-05, "loss": 0.9825, "step": 104950 }, { "epoch": 92.9627989371125, "grad_norm": 0.21179543435573578, "learning_rate": 1e-05, "loss": 0.968, "step": 104955 }, { "epoch": 92.96722763507529, "grad_norm": 0.27684488892555237, "learning_rate": 1e-05, "loss": 0.9937, "step": 104960 }, { "epoch": 92.97165633303808, "grad_norm": 0.2391420155763626, "learning_rate": 1e-05, "loss": 0.9744, "step": 104965 }, { "epoch": 92.97608503100089, "grad_norm": 0.3003121018409729, "learning_rate": 1e-05, "loss": 0.9357, "step": 104970 }, { "epoch": 92.98051372896369, "grad_norm": 0.22289247810840607, "learning_rate": 1e-05, "loss": 0.8927, "step": 104975 }, { "epoch": 92.98494242692648, "grad_norm": 0.19042231142520905, "learning_rate": 1e-05, "loss": 0.9388, "step": 104980 }, { "epoch": 92.98937112488929, "grad_norm": 0.22295452654361725, "learning_rate": 1e-05, "loss": 0.9485, "step": 104985 }, { "epoch": 92.99379982285208, "grad_norm": 0.2865106463432312, "learning_rate": 1e-05, "loss": 0.9732, "step": 104990 }, { "epoch": 92.99822852081488, "grad_norm": 0.22310854494571686, "learning_rate": 1e-05, "loss": 0.943, "step": 104995 }, { "epoch": 93.00265721877768, "grad_norm": 0.2363872081041336, "learning_rate": 1e-05, "loss": 0.9601, "step": 105000 }, { "epoch": 93.00708591674048, "grad_norm": 0.20139619708061218, "learning_rate": 1e-05, "loss": 0.9573, "step": 105005 }, { "epoch": 93.01151461470327, "grad_norm": 0.20604220032691956, "learning_rate": 1e-05, "loss": 0.9272, "step": 105010 }, { "epoch": 93.01594331266608, "grad_norm": 0.23860906064510345, "learning_rate": 1e-05, "loss": 0.9341, "step": 105015 }, { "epoch": 93.02037201062888, "grad_norm": 0.24045388400554657, "learning_rate": 1e-05, "loss": 0.9524, "step": 105020 }, { "epoch": 93.02480070859167, "grad_norm": 0.20502442121505737, "learning_rate": 1e-05, "loss": 0.9811, "step": 105025 }, { "epoch": 93.02922940655448, "grad_norm": 0.2512694001197815, "learning_rate": 1e-05, "loss": 0.9833, "step": 105030 }, { "epoch": 93.03365810451727, "grad_norm": 0.2656692862510681, "learning_rate": 1e-05, "loss": 0.9719, "step": 105035 }, { "epoch": 93.03808680248007, "grad_norm": 0.25519657135009766, "learning_rate": 1e-05, "loss": 0.9736, "step": 105040 }, { "epoch": 93.04251550044287, "grad_norm": 0.269862562417984, "learning_rate": 1e-05, "loss": 1.0164, "step": 105045 }, { "epoch": 93.04694419840567, "grad_norm": 0.24477478861808777, "learning_rate": 1e-05, "loss": 0.9429, "step": 105050 }, { "epoch": 93.05137289636846, "grad_norm": 0.25380614399909973, "learning_rate": 1e-05, "loss": 0.9349, "step": 105055 }, { "epoch": 93.05580159433127, "grad_norm": 0.2589641511440277, "learning_rate": 1e-05, "loss": 0.9433, "step": 105060 }, { "epoch": 93.06023029229407, "grad_norm": 0.22753207385540009, "learning_rate": 1e-05, "loss": 0.9567, "step": 105065 }, { "epoch": 93.06465899025686, "grad_norm": 0.24029399454593658, "learning_rate": 1e-05, "loss": 0.9503, "step": 105070 }, { "epoch": 93.06908768821967, "grad_norm": 0.2508775293827057, "learning_rate": 1e-05, "loss": 0.9369, "step": 105075 }, { "epoch": 93.07351638618246, "grad_norm": 0.26097723841667175, "learning_rate": 1e-05, "loss": 0.9108, "step": 105080 }, { "epoch": 93.07794508414526, "grad_norm": 0.24411065876483917, "learning_rate": 1e-05, "loss": 0.9048, "step": 105085 }, { "epoch": 93.08237378210806, "grad_norm": 0.2114243507385254, "learning_rate": 1e-05, "loss": 1.0098, "step": 105090 }, { "epoch": 93.08680248007086, "grad_norm": 0.24687489867210388, "learning_rate": 1e-05, "loss": 0.9103, "step": 105095 }, { "epoch": 93.09123117803365, "grad_norm": 0.2059982866048813, "learning_rate": 1e-05, "loss": 1.0134, "step": 105100 }, { "epoch": 93.09565987599646, "grad_norm": 0.23935602605342865, "learning_rate": 1e-05, "loss": 0.9905, "step": 105105 }, { "epoch": 93.10008857395925, "grad_norm": 0.24573317170143127, "learning_rate": 1e-05, "loss": 0.9515, "step": 105110 }, { "epoch": 93.10451727192205, "grad_norm": 0.2513274848461151, "learning_rate": 1e-05, "loss": 0.9293, "step": 105115 }, { "epoch": 93.10894596988486, "grad_norm": 0.24034105241298676, "learning_rate": 1e-05, "loss": 0.9701, "step": 105120 }, { "epoch": 93.11337466784765, "grad_norm": 0.24127021431922913, "learning_rate": 1e-05, "loss": 0.9176, "step": 105125 }, { "epoch": 93.11780336581045, "grad_norm": 0.2650061845779419, "learning_rate": 1e-05, "loss": 0.9825, "step": 105130 }, { "epoch": 93.12223206377325, "grad_norm": 0.25877442955970764, "learning_rate": 1e-05, "loss": 0.976, "step": 105135 }, { "epoch": 93.12666076173605, "grad_norm": 0.20887874066829681, "learning_rate": 1e-05, "loss": 0.9101, "step": 105140 }, { "epoch": 93.13108945969884, "grad_norm": 0.22704558074474335, "learning_rate": 1e-05, "loss": 1.0058, "step": 105145 }, { "epoch": 93.13551815766165, "grad_norm": 0.26891300082206726, "learning_rate": 1e-05, "loss": 0.9137, "step": 105150 }, { "epoch": 93.13994685562444, "grad_norm": 0.2265508770942688, "learning_rate": 1e-05, "loss": 0.9831, "step": 105155 }, { "epoch": 93.14437555358724, "grad_norm": 0.29639148712158203, "learning_rate": 1e-05, "loss": 0.9939, "step": 105160 }, { "epoch": 93.14880425155005, "grad_norm": 0.2827858030796051, "learning_rate": 1e-05, "loss": 0.9459, "step": 105165 }, { "epoch": 93.15323294951284, "grad_norm": 0.26621493697166443, "learning_rate": 1e-05, "loss": 0.9465, "step": 105170 }, { "epoch": 93.15766164747565, "grad_norm": 0.34894272685050964, "learning_rate": 1e-05, "loss": 0.9615, "step": 105175 }, { "epoch": 93.16209034543844, "grad_norm": 0.30075308680534363, "learning_rate": 1e-05, "loss": 0.9078, "step": 105180 }, { "epoch": 93.16651904340124, "grad_norm": 0.3316897451877594, "learning_rate": 1e-05, "loss": 0.9798, "step": 105185 }, { "epoch": 93.17094774136405, "grad_norm": 0.23709797859191895, "learning_rate": 1e-05, "loss": 0.909, "step": 105190 }, { "epoch": 93.17537643932684, "grad_norm": 0.2581119239330292, "learning_rate": 1e-05, "loss": 0.993, "step": 105195 }, { "epoch": 93.17980513728963, "grad_norm": 0.2426549792289734, "learning_rate": 1e-05, "loss": 0.9893, "step": 105200 }, { "epoch": 93.18423383525244, "grad_norm": 0.2955203950405121, "learning_rate": 1e-05, "loss": 0.9618, "step": 105205 }, { "epoch": 93.18866253321524, "grad_norm": 0.2558835744857788, "learning_rate": 1e-05, "loss": 0.9767, "step": 105210 }, { "epoch": 93.19309123117803, "grad_norm": 0.2346285581588745, "learning_rate": 1e-05, "loss": 0.9337, "step": 105215 }, { "epoch": 93.19751992914084, "grad_norm": 0.2578800618648529, "learning_rate": 1e-05, "loss": 0.9594, "step": 105220 }, { "epoch": 93.20194862710363, "grad_norm": 0.24137237668037415, "learning_rate": 1e-05, "loss": 0.9178, "step": 105225 }, { "epoch": 93.20637732506643, "grad_norm": 0.23343440890312195, "learning_rate": 1e-05, "loss": 0.9893, "step": 105230 }, { "epoch": 93.21080602302924, "grad_norm": 0.2665894329547882, "learning_rate": 1e-05, "loss": 1.0001, "step": 105235 }, { "epoch": 93.21523472099203, "grad_norm": 0.27070286870002747, "learning_rate": 1e-05, "loss": 0.9475, "step": 105240 }, { "epoch": 93.21966341895482, "grad_norm": 0.2480572909116745, "learning_rate": 1e-05, "loss": 0.9457, "step": 105245 }, { "epoch": 93.22409211691763, "grad_norm": 0.22742024064064026, "learning_rate": 1e-05, "loss": 0.9424, "step": 105250 }, { "epoch": 93.22852081488043, "grad_norm": 0.21622078120708466, "learning_rate": 1e-05, "loss": 0.9526, "step": 105255 }, { "epoch": 93.23294951284322, "grad_norm": 0.230107843875885, "learning_rate": 1e-05, "loss": 0.9855, "step": 105260 }, { "epoch": 93.23737821080603, "grad_norm": 0.2354736626148224, "learning_rate": 1e-05, "loss": 0.9827, "step": 105265 }, { "epoch": 93.24180690876882, "grad_norm": 0.2273092418909073, "learning_rate": 1e-05, "loss": 0.9928, "step": 105270 }, { "epoch": 93.24623560673162, "grad_norm": 0.22728396952152252, "learning_rate": 1e-05, "loss": 1.0176, "step": 105275 }, { "epoch": 93.25066430469442, "grad_norm": 0.2526763677597046, "learning_rate": 1e-05, "loss": 1.0011, "step": 105280 }, { "epoch": 93.25509300265722, "grad_norm": 0.22754980623722076, "learning_rate": 1e-05, "loss": 0.964, "step": 105285 }, { "epoch": 93.25952170062001, "grad_norm": 0.237117737531662, "learning_rate": 1e-05, "loss": 0.9674, "step": 105290 }, { "epoch": 93.26395039858282, "grad_norm": 0.25008267164230347, "learning_rate": 1e-05, "loss": 0.9783, "step": 105295 }, { "epoch": 93.26837909654562, "grad_norm": 0.2122572809457779, "learning_rate": 1e-05, "loss": 0.975, "step": 105300 }, { "epoch": 93.27280779450841, "grad_norm": 0.26850852370262146, "learning_rate": 1e-05, "loss": 1.0009, "step": 105305 }, { "epoch": 93.27723649247122, "grad_norm": 0.2170819491147995, "learning_rate": 1e-05, "loss": 0.9638, "step": 105310 }, { "epoch": 93.28166519043401, "grad_norm": 0.25903019309043884, "learning_rate": 1e-05, "loss": 0.9557, "step": 105315 }, { "epoch": 93.2860938883968, "grad_norm": 0.22077183425426483, "learning_rate": 1e-05, "loss": 0.9515, "step": 105320 }, { "epoch": 93.29052258635961, "grad_norm": 0.27361154556274414, "learning_rate": 1e-05, "loss": 0.9441, "step": 105325 }, { "epoch": 93.29495128432241, "grad_norm": 0.2323404997587204, "learning_rate": 1e-05, "loss": 0.9986, "step": 105330 }, { "epoch": 93.2993799822852, "grad_norm": 0.23912915587425232, "learning_rate": 1e-05, "loss": 0.9274, "step": 105335 }, { "epoch": 93.30380868024801, "grad_norm": 0.23637878894805908, "learning_rate": 1e-05, "loss": 0.9253, "step": 105340 }, { "epoch": 93.3082373782108, "grad_norm": 0.21239687502384186, "learning_rate": 1e-05, "loss": 0.9722, "step": 105345 }, { "epoch": 93.3126660761736, "grad_norm": 0.2532808780670166, "learning_rate": 1e-05, "loss": 0.9555, "step": 105350 }, { "epoch": 93.31709477413641, "grad_norm": 0.24801155924797058, "learning_rate": 1e-05, "loss": 0.9539, "step": 105355 }, { "epoch": 93.3215234720992, "grad_norm": 0.2455122470855713, "learning_rate": 1e-05, "loss": 0.9639, "step": 105360 }, { "epoch": 93.325952170062, "grad_norm": 0.2233913689851761, "learning_rate": 1e-05, "loss": 0.9561, "step": 105365 }, { "epoch": 93.3303808680248, "grad_norm": 0.22580552101135254, "learning_rate": 1e-05, "loss": 0.9667, "step": 105370 }, { "epoch": 93.3348095659876, "grad_norm": 0.2244798094034195, "learning_rate": 1e-05, "loss": 0.9676, "step": 105375 }, { "epoch": 93.33923826395039, "grad_norm": 0.2265198677778244, "learning_rate": 1e-05, "loss": 0.9611, "step": 105380 }, { "epoch": 93.3436669619132, "grad_norm": 0.26863136887550354, "learning_rate": 1e-05, "loss": 0.9605, "step": 105385 }, { "epoch": 93.348095659876, "grad_norm": 0.23423950374126434, "learning_rate": 1e-05, "loss": 0.9271, "step": 105390 }, { "epoch": 93.35252435783879, "grad_norm": 0.2464640587568283, "learning_rate": 1e-05, "loss": 0.9492, "step": 105395 }, { "epoch": 93.3569530558016, "grad_norm": 0.19595874845981598, "learning_rate": 1e-05, "loss": 0.9555, "step": 105400 }, { "epoch": 93.36138175376439, "grad_norm": 0.23236393928527832, "learning_rate": 1e-05, "loss": 0.9798, "step": 105405 }, { "epoch": 93.36581045172719, "grad_norm": 0.22964251041412354, "learning_rate": 1e-05, "loss": 0.9332, "step": 105410 }, { "epoch": 93.37023914969, "grad_norm": 0.22863388061523438, "learning_rate": 1e-05, "loss": 0.9513, "step": 105415 }, { "epoch": 93.37466784765279, "grad_norm": 0.24817189574241638, "learning_rate": 1e-05, "loss": 0.9713, "step": 105420 }, { "epoch": 93.3790965456156, "grad_norm": 0.2519422769546509, "learning_rate": 1e-05, "loss": 0.8984, "step": 105425 }, { "epoch": 93.38352524357839, "grad_norm": 0.24377983808517456, "learning_rate": 1e-05, "loss": 0.936, "step": 105430 }, { "epoch": 93.38795394154118, "grad_norm": 0.253544420003891, "learning_rate": 1e-05, "loss": 0.9521, "step": 105435 }, { "epoch": 93.39238263950399, "grad_norm": 0.24527321755886078, "learning_rate": 1e-05, "loss": 0.9583, "step": 105440 }, { "epoch": 93.39681133746679, "grad_norm": 0.23329472541809082, "learning_rate": 1e-05, "loss": 0.9605, "step": 105445 }, { "epoch": 93.40124003542958, "grad_norm": 0.23848466575145721, "learning_rate": 1e-05, "loss": 0.9503, "step": 105450 }, { "epoch": 93.40566873339239, "grad_norm": 0.23122332990169525, "learning_rate": 1e-05, "loss": 0.8782, "step": 105455 }, { "epoch": 93.41009743135518, "grad_norm": 0.2294558584690094, "learning_rate": 1e-05, "loss": 0.9701, "step": 105460 }, { "epoch": 93.41452612931798, "grad_norm": 0.24524912238121033, "learning_rate": 1e-05, "loss": 0.9649, "step": 105465 }, { "epoch": 93.41895482728079, "grad_norm": 0.23014891147613525, "learning_rate": 1e-05, "loss": 0.9876, "step": 105470 }, { "epoch": 93.42338352524358, "grad_norm": 0.1919795572757721, "learning_rate": 1e-05, "loss": 0.8927, "step": 105475 }, { "epoch": 93.42781222320637, "grad_norm": 0.25757303833961487, "learning_rate": 1e-05, "loss": 0.9516, "step": 105480 }, { "epoch": 93.43224092116918, "grad_norm": 0.3006926476955414, "learning_rate": 1e-05, "loss": 0.9243, "step": 105485 }, { "epoch": 93.43666961913198, "grad_norm": 0.23858633637428284, "learning_rate": 1e-05, "loss": 0.9384, "step": 105490 }, { "epoch": 93.44109831709477, "grad_norm": 0.2525339424610138, "learning_rate": 1e-05, "loss": 0.9757, "step": 105495 }, { "epoch": 93.44552701505758, "grad_norm": 0.19873204827308655, "learning_rate": 1e-05, "loss": 0.9053, "step": 105500 }, { "epoch": 93.44995571302037, "grad_norm": 0.2233034074306488, "learning_rate": 1e-05, "loss": 0.9877, "step": 105505 }, { "epoch": 93.45438441098317, "grad_norm": 0.2588280141353607, "learning_rate": 1e-05, "loss": 0.9368, "step": 105510 }, { "epoch": 93.45881310894598, "grad_norm": 0.19022412598133087, "learning_rate": 1e-05, "loss": 0.9704, "step": 105515 }, { "epoch": 93.46324180690877, "grad_norm": 0.24633122980594635, "learning_rate": 1e-05, "loss": 1.0333, "step": 105520 }, { "epoch": 93.46767050487156, "grad_norm": 0.24296921491622925, "learning_rate": 1e-05, "loss": 0.9391, "step": 105525 }, { "epoch": 93.47209920283437, "grad_norm": 0.2565198838710785, "learning_rate": 1e-05, "loss": 0.9666, "step": 105530 }, { "epoch": 93.47652790079717, "grad_norm": 0.23200194537639618, "learning_rate": 1e-05, "loss": 0.9549, "step": 105535 }, { "epoch": 93.48095659875996, "grad_norm": 0.2639910578727722, "learning_rate": 1e-05, "loss": 0.9593, "step": 105540 }, { "epoch": 93.48538529672277, "grad_norm": 0.1978563517332077, "learning_rate": 1e-05, "loss": 0.962, "step": 105545 }, { "epoch": 93.48981399468556, "grad_norm": 0.21637532114982605, "learning_rate": 1e-05, "loss": 0.9993, "step": 105550 }, { "epoch": 93.49424269264836, "grad_norm": 0.21952396631240845, "learning_rate": 1e-05, "loss": 0.9398, "step": 105555 }, { "epoch": 93.49867139061116, "grad_norm": 0.24132317304611206, "learning_rate": 1e-05, "loss": 1.0003, "step": 105560 }, { "epoch": 93.50310008857396, "grad_norm": 0.23598463833332062, "learning_rate": 1e-05, "loss": 0.9879, "step": 105565 }, { "epoch": 93.50752878653675, "grad_norm": 0.22310227155685425, "learning_rate": 1e-05, "loss": 0.9763, "step": 105570 }, { "epoch": 93.51195748449956, "grad_norm": 0.2703414261341095, "learning_rate": 1e-05, "loss": 0.8837, "step": 105575 }, { "epoch": 93.51638618246236, "grad_norm": 0.2524678409099579, "learning_rate": 1e-05, "loss": 0.9458, "step": 105580 }, { "epoch": 93.52081488042515, "grad_norm": 0.22786295413970947, "learning_rate": 1e-05, "loss": 0.9599, "step": 105585 }, { "epoch": 93.52524357838796, "grad_norm": 0.2373133897781372, "learning_rate": 1e-05, "loss": 1.0109, "step": 105590 }, { "epoch": 93.52967227635075, "grad_norm": 0.24242708086967468, "learning_rate": 1e-05, "loss": 0.9913, "step": 105595 }, { "epoch": 93.53410097431355, "grad_norm": 0.2709553837776184, "learning_rate": 1e-05, "loss": 0.9227, "step": 105600 }, { "epoch": 93.53852967227635, "grad_norm": 0.21862784028053284, "learning_rate": 1e-05, "loss": 0.9875, "step": 105605 }, { "epoch": 93.54295837023915, "grad_norm": 0.30443358421325684, "learning_rate": 1e-05, "loss": 0.9345, "step": 105610 }, { "epoch": 93.54738706820194, "grad_norm": 0.2625056207180023, "learning_rate": 1e-05, "loss": 0.9821, "step": 105615 }, { "epoch": 93.55181576616475, "grad_norm": 0.2663037180900574, "learning_rate": 1e-05, "loss": 0.9614, "step": 105620 }, { "epoch": 93.55624446412754, "grad_norm": 0.25701725482940674, "learning_rate": 1e-05, "loss": 0.9721, "step": 105625 }, { "epoch": 93.56067316209034, "grad_norm": 0.27689310908317566, "learning_rate": 1e-05, "loss": 0.9211, "step": 105630 }, { "epoch": 93.56510186005315, "grad_norm": 0.2511526942253113, "learning_rate": 1e-05, "loss": 0.9648, "step": 105635 }, { "epoch": 93.56953055801594, "grad_norm": 0.23363229632377625, "learning_rate": 1e-05, "loss": 0.9614, "step": 105640 }, { "epoch": 93.57395925597874, "grad_norm": 0.21774157881736755, "learning_rate": 1e-05, "loss": 0.9634, "step": 105645 }, { "epoch": 93.57838795394154, "grad_norm": 0.266116738319397, "learning_rate": 1e-05, "loss": 0.9655, "step": 105650 }, { "epoch": 93.58281665190434, "grad_norm": 0.26490187644958496, "learning_rate": 1e-05, "loss": 0.9246, "step": 105655 }, { "epoch": 93.58724534986715, "grad_norm": 0.24928782880306244, "learning_rate": 1e-05, "loss": 0.9488, "step": 105660 }, { "epoch": 93.59167404782994, "grad_norm": 0.26242780685424805, "learning_rate": 1e-05, "loss": 0.9034, "step": 105665 }, { "epoch": 93.59610274579273, "grad_norm": 0.22840522229671478, "learning_rate": 1e-05, "loss": 0.9725, "step": 105670 }, { "epoch": 93.60053144375554, "grad_norm": 0.3233090937137604, "learning_rate": 1e-05, "loss": 0.9167, "step": 105675 }, { "epoch": 93.60496014171834, "grad_norm": 0.28963232040405273, "learning_rate": 1e-05, "loss": 0.9238, "step": 105680 }, { "epoch": 93.60938883968113, "grad_norm": 0.21576011180877686, "learning_rate": 1e-05, "loss": 0.9347, "step": 105685 }, { "epoch": 93.61381753764394, "grad_norm": 0.2524646818637848, "learning_rate": 1e-05, "loss": 0.8925, "step": 105690 }, { "epoch": 93.61824623560673, "grad_norm": 0.23083454370498657, "learning_rate": 1e-05, "loss": 0.9865, "step": 105695 }, { "epoch": 93.62267493356953, "grad_norm": 0.2314504086971283, "learning_rate": 1e-05, "loss": 1.0087, "step": 105700 }, { "epoch": 93.62710363153234, "grad_norm": 0.2916317284107208, "learning_rate": 1e-05, "loss": 0.9561, "step": 105705 }, { "epoch": 93.63153232949513, "grad_norm": 0.24366340041160583, "learning_rate": 1e-05, "loss": 0.9512, "step": 105710 }, { "epoch": 93.63596102745792, "grad_norm": 0.2564623951911926, "learning_rate": 1e-05, "loss": 0.9985, "step": 105715 }, { "epoch": 93.64038972542073, "grad_norm": 0.23970985412597656, "learning_rate": 1e-05, "loss": 0.9398, "step": 105720 }, { "epoch": 93.64481842338353, "grad_norm": 0.25679343938827515, "learning_rate": 1e-05, "loss": 0.9591, "step": 105725 }, { "epoch": 93.64924712134632, "grad_norm": 0.24072374403476715, "learning_rate": 1e-05, "loss": 0.967, "step": 105730 }, { "epoch": 93.65367581930913, "grad_norm": 0.23277287185192108, "learning_rate": 1e-05, "loss": 0.9206, "step": 105735 }, { "epoch": 93.65810451727192, "grad_norm": 0.2334434539079666, "learning_rate": 1e-05, "loss": 0.9719, "step": 105740 }, { "epoch": 93.66253321523472, "grad_norm": 0.26355889439582825, "learning_rate": 1e-05, "loss": 1.0143, "step": 105745 }, { "epoch": 93.66696191319753, "grad_norm": 0.236064150929451, "learning_rate": 1e-05, "loss": 0.9357, "step": 105750 }, { "epoch": 93.67139061116032, "grad_norm": 0.21392065286636353, "learning_rate": 1e-05, "loss": 0.9288, "step": 105755 }, { "epoch": 93.67581930912311, "grad_norm": 0.2361832857131958, "learning_rate": 1e-05, "loss": 0.9398, "step": 105760 }, { "epoch": 93.68024800708592, "grad_norm": 0.23423390090465546, "learning_rate": 1e-05, "loss": 0.912, "step": 105765 }, { "epoch": 93.68467670504872, "grad_norm": 0.22610947489738464, "learning_rate": 1e-05, "loss": 0.9606, "step": 105770 }, { "epoch": 93.68910540301151, "grad_norm": 0.27814966440200806, "learning_rate": 1e-05, "loss": 0.9517, "step": 105775 }, { "epoch": 93.69353410097432, "grad_norm": 0.2127348929643631, "learning_rate": 1e-05, "loss": 0.9797, "step": 105780 }, { "epoch": 93.69796279893711, "grad_norm": 0.2242303192615509, "learning_rate": 1e-05, "loss": 0.9073, "step": 105785 }, { "epoch": 93.7023914968999, "grad_norm": 0.21777324378490448, "learning_rate": 1e-05, "loss": 0.9335, "step": 105790 }, { "epoch": 93.70682019486271, "grad_norm": 0.24466101825237274, "learning_rate": 1e-05, "loss": 0.9227, "step": 105795 }, { "epoch": 93.71124889282551, "grad_norm": 0.2629692256450653, "learning_rate": 1e-05, "loss": 0.9389, "step": 105800 }, { "epoch": 93.7156775907883, "grad_norm": 0.26391953229904175, "learning_rate": 1e-05, "loss": 0.9594, "step": 105805 }, { "epoch": 93.72010628875111, "grad_norm": 0.2962143123149872, "learning_rate": 1e-05, "loss": 0.9736, "step": 105810 }, { "epoch": 93.7245349867139, "grad_norm": 0.26831114292144775, "learning_rate": 1e-05, "loss": 0.9393, "step": 105815 }, { "epoch": 93.7289636846767, "grad_norm": 0.1945819854736328, "learning_rate": 1e-05, "loss": 0.9691, "step": 105820 }, { "epoch": 93.73339238263951, "grad_norm": 0.21918925642967224, "learning_rate": 1e-05, "loss": 0.9195, "step": 105825 }, { "epoch": 93.7378210806023, "grad_norm": 0.22030341625213623, "learning_rate": 1e-05, "loss": 0.9609, "step": 105830 }, { "epoch": 93.7422497785651, "grad_norm": 0.23286330699920654, "learning_rate": 1e-05, "loss": 0.972, "step": 105835 }, { "epoch": 93.7466784765279, "grad_norm": 0.22802424430847168, "learning_rate": 1e-05, "loss": 0.9728, "step": 105840 }, { "epoch": 93.7511071744907, "grad_norm": 0.232124462723732, "learning_rate": 1e-05, "loss": 1.0102, "step": 105845 }, { "epoch": 93.75553587245349, "grad_norm": 0.23087060451507568, "learning_rate": 1e-05, "loss": 0.9573, "step": 105850 }, { "epoch": 93.7599645704163, "grad_norm": 0.22931510210037231, "learning_rate": 1e-05, "loss": 1.0065, "step": 105855 }, { "epoch": 93.7643932683791, "grad_norm": 0.20915593206882477, "learning_rate": 1e-05, "loss": 0.9583, "step": 105860 }, { "epoch": 93.76882196634189, "grad_norm": 0.30223989486694336, "learning_rate": 1e-05, "loss": 1.0422, "step": 105865 }, { "epoch": 93.7732506643047, "grad_norm": 0.2437208592891693, "learning_rate": 1e-05, "loss": 0.9818, "step": 105870 }, { "epoch": 93.77767936226749, "grad_norm": 0.2636796236038208, "learning_rate": 1e-05, "loss": 0.9678, "step": 105875 }, { "epoch": 93.78210806023029, "grad_norm": 0.23928864300251007, "learning_rate": 1e-05, "loss": 1.0097, "step": 105880 }, { "epoch": 93.7865367581931, "grad_norm": 0.2815961241722107, "learning_rate": 1e-05, "loss": 0.9622, "step": 105885 }, { "epoch": 93.79096545615589, "grad_norm": 0.2229508012533188, "learning_rate": 1e-05, "loss": 0.9377, "step": 105890 }, { "epoch": 93.79539415411868, "grad_norm": 0.24366120994091034, "learning_rate": 1e-05, "loss": 0.9833, "step": 105895 }, { "epoch": 93.79982285208149, "grad_norm": 0.2436981350183487, "learning_rate": 1e-05, "loss": 0.9485, "step": 105900 }, { "epoch": 93.80425155004428, "grad_norm": 0.21499690413475037, "learning_rate": 1e-05, "loss": 0.953, "step": 105905 }, { "epoch": 93.8086802480071, "grad_norm": 0.23913350701332092, "learning_rate": 1e-05, "loss": 0.9148, "step": 105910 }, { "epoch": 93.81310894596989, "grad_norm": 0.23672091960906982, "learning_rate": 1e-05, "loss": 1.0014, "step": 105915 }, { "epoch": 93.81753764393268, "grad_norm": 0.25560393929481506, "learning_rate": 1e-05, "loss": 0.8931, "step": 105920 }, { "epoch": 93.82196634189549, "grad_norm": 0.23304376006126404, "learning_rate": 1e-05, "loss": 0.8914, "step": 105925 }, { "epoch": 93.82639503985828, "grad_norm": 0.2780101001262665, "learning_rate": 1e-05, "loss": 0.9455, "step": 105930 }, { "epoch": 93.83082373782108, "grad_norm": 0.20780281722545624, "learning_rate": 1e-05, "loss": 0.954, "step": 105935 }, { "epoch": 93.83525243578389, "grad_norm": 0.29038724303245544, "learning_rate": 1e-05, "loss": 0.9839, "step": 105940 }, { "epoch": 93.83968113374668, "grad_norm": 0.24847756326198578, "learning_rate": 1e-05, "loss": 0.9289, "step": 105945 }, { "epoch": 93.84410983170947, "grad_norm": 0.2558285593986511, "learning_rate": 1e-05, "loss": 0.9701, "step": 105950 }, { "epoch": 93.84853852967228, "grad_norm": 0.2624794542789459, "learning_rate": 1e-05, "loss": 0.9129, "step": 105955 }, { "epoch": 93.85296722763508, "grad_norm": 0.2644163966178894, "learning_rate": 1e-05, "loss": 1.0023, "step": 105960 }, { "epoch": 93.85739592559787, "grad_norm": 0.2528930604457855, "learning_rate": 1e-05, "loss": 0.9382, "step": 105965 }, { "epoch": 93.86182462356068, "grad_norm": 0.2019290030002594, "learning_rate": 1e-05, "loss": 0.9758, "step": 105970 }, { "epoch": 93.86625332152347, "grad_norm": 0.24584545195102692, "learning_rate": 1e-05, "loss": 0.9177, "step": 105975 }, { "epoch": 93.87068201948627, "grad_norm": 0.23608802258968353, "learning_rate": 1e-05, "loss": 0.9404, "step": 105980 }, { "epoch": 93.87511071744908, "grad_norm": 0.25936681032180786, "learning_rate": 1e-05, "loss": 0.9227, "step": 105985 }, { "epoch": 93.87953941541187, "grad_norm": 0.2223770022392273, "learning_rate": 1e-05, "loss": 0.9843, "step": 105990 }, { "epoch": 93.88396811337466, "grad_norm": 0.2358471006155014, "learning_rate": 1e-05, "loss": 0.9407, "step": 105995 }, { "epoch": 93.88839681133747, "grad_norm": 0.23934701085090637, "learning_rate": 1e-05, "loss": 0.9238, "step": 106000 }, { "epoch": 93.89282550930027, "grad_norm": 0.24968120455741882, "learning_rate": 1e-05, "loss": 0.935, "step": 106005 }, { "epoch": 93.89725420726306, "grad_norm": 0.23409950733184814, "learning_rate": 1e-05, "loss": 1.0067, "step": 106010 }, { "epoch": 93.90168290522587, "grad_norm": 0.2168843001127243, "learning_rate": 1e-05, "loss": 0.9663, "step": 106015 }, { "epoch": 93.90611160318866, "grad_norm": 0.2087348997592926, "learning_rate": 1e-05, "loss": 0.9268, "step": 106020 }, { "epoch": 93.91054030115146, "grad_norm": 0.23356500267982483, "learning_rate": 1e-05, "loss": 0.9475, "step": 106025 }, { "epoch": 93.91496899911427, "grad_norm": 0.220457524061203, "learning_rate": 1e-05, "loss": 0.978, "step": 106030 }, { "epoch": 93.91939769707706, "grad_norm": 0.21549856662750244, "learning_rate": 1e-05, "loss": 1.0328, "step": 106035 }, { "epoch": 93.92382639503985, "grad_norm": 0.21504388749599457, "learning_rate": 1e-05, "loss": 0.9082, "step": 106040 }, { "epoch": 93.92825509300266, "grad_norm": 0.23265616595745087, "learning_rate": 1e-05, "loss": 0.8788, "step": 106045 }, { "epoch": 93.93268379096546, "grad_norm": 0.28221890330314636, "learning_rate": 1e-05, "loss": 0.951, "step": 106050 }, { "epoch": 93.93711248892825, "grad_norm": 0.2596142888069153, "learning_rate": 1e-05, "loss": 0.9273, "step": 106055 }, { "epoch": 93.94154118689106, "grad_norm": 0.21485969424247742, "learning_rate": 1e-05, "loss": 0.9467, "step": 106060 }, { "epoch": 93.94596988485385, "grad_norm": 0.22859859466552734, "learning_rate": 1e-05, "loss": 0.9757, "step": 106065 }, { "epoch": 93.95039858281665, "grad_norm": 0.2548113465309143, "learning_rate": 1e-05, "loss": 0.9416, "step": 106070 }, { "epoch": 93.95482728077945, "grad_norm": 0.258844792842865, "learning_rate": 1e-05, "loss": 0.9334, "step": 106075 }, { "epoch": 93.95925597874225, "grad_norm": 0.22398611903190613, "learning_rate": 1e-05, "loss": 0.9123, "step": 106080 }, { "epoch": 93.96368467670504, "grad_norm": 0.24999378621578217, "learning_rate": 1e-05, "loss": 0.9719, "step": 106085 }, { "epoch": 93.96811337466785, "grad_norm": 0.22970613837242126, "learning_rate": 1e-05, "loss": 0.9062, "step": 106090 }, { "epoch": 93.97254207263065, "grad_norm": 0.24498316645622253, "learning_rate": 1e-05, "loss": 0.9754, "step": 106095 }, { "epoch": 93.97697077059344, "grad_norm": 0.2519926428794861, "learning_rate": 1e-05, "loss": 1.036, "step": 106100 }, { "epoch": 93.98139946855625, "grad_norm": 0.24293173849582672, "learning_rate": 1e-05, "loss": 0.948, "step": 106105 }, { "epoch": 93.98582816651904, "grad_norm": 0.24342729151248932, "learning_rate": 1e-05, "loss": 0.9414, "step": 106110 }, { "epoch": 93.99025686448184, "grad_norm": 0.23912575840950012, "learning_rate": 1e-05, "loss": 0.9539, "step": 106115 }, { "epoch": 93.99468556244464, "grad_norm": 0.25066015124320984, "learning_rate": 1e-05, "loss": 0.8752, "step": 106120 }, { "epoch": 93.99911426040744, "grad_norm": 0.24286554753780365, "learning_rate": 1e-05, "loss": 0.9517, "step": 106125 }, { "epoch": 94.00354295837023, "grad_norm": 0.32619595527648926, "learning_rate": 1e-05, "loss": 0.9093, "step": 106130 }, { "epoch": 94.00797165633304, "grad_norm": 0.2892949879169464, "learning_rate": 1e-05, "loss": 0.8868, "step": 106135 }, { "epoch": 94.01240035429583, "grad_norm": 0.2920706272125244, "learning_rate": 1e-05, "loss": 1.0165, "step": 106140 }, { "epoch": 94.01682905225863, "grad_norm": 0.22681845724582672, "learning_rate": 1e-05, "loss": 0.9482, "step": 106145 }, { "epoch": 94.02125775022144, "grad_norm": 0.2509005069732666, "learning_rate": 1e-05, "loss": 0.9494, "step": 106150 }, { "epoch": 94.02568644818423, "grad_norm": 0.24970906972885132, "learning_rate": 1e-05, "loss": 0.9761, "step": 106155 }, { "epoch": 94.03011514614704, "grad_norm": 0.2244836837053299, "learning_rate": 1e-05, "loss": 1.0076, "step": 106160 }, { "epoch": 94.03454384410983, "grad_norm": 0.28171905875205994, "learning_rate": 1e-05, "loss": 0.9938, "step": 106165 }, { "epoch": 94.03897254207263, "grad_norm": 0.26539576053619385, "learning_rate": 1e-05, "loss": 0.9574, "step": 106170 }, { "epoch": 94.04340124003544, "grad_norm": 0.29274851083755493, "learning_rate": 1e-05, "loss": 0.9495, "step": 106175 }, { "epoch": 94.04782993799823, "grad_norm": 0.22678150236606598, "learning_rate": 1e-05, "loss": 0.9836, "step": 106180 }, { "epoch": 94.05225863596102, "grad_norm": 0.24295900762081146, "learning_rate": 1e-05, "loss": 0.9742, "step": 106185 }, { "epoch": 94.05668733392383, "grad_norm": 0.2369834929704666, "learning_rate": 1e-05, "loss": 0.9956, "step": 106190 }, { "epoch": 94.06111603188663, "grad_norm": 0.2000340223312378, "learning_rate": 1e-05, "loss": 0.9662, "step": 106195 }, { "epoch": 94.06554472984942, "grad_norm": 0.20921172201633453, "learning_rate": 1e-05, "loss": 0.9718, "step": 106200 }, { "epoch": 94.06997342781223, "grad_norm": 0.24889475107192993, "learning_rate": 1e-05, "loss": 0.9784, "step": 106205 }, { "epoch": 94.07440212577502, "grad_norm": 0.2599664628505707, "learning_rate": 1e-05, "loss": 0.9395, "step": 106210 }, { "epoch": 94.07883082373782, "grad_norm": 0.251973420381546, "learning_rate": 1e-05, "loss": 1.0244, "step": 106215 }, { "epoch": 94.08325952170063, "grad_norm": 0.26440638303756714, "learning_rate": 1e-05, "loss": 0.9379, "step": 106220 }, { "epoch": 94.08768821966342, "grad_norm": 0.2862977087497711, "learning_rate": 1e-05, "loss": 0.9649, "step": 106225 }, { "epoch": 94.09211691762621, "grad_norm": 0.21448568999767303, "learning_rate": 1e-05, "loss": 0.9159, "step": 106230 }, { "epoch": 94.09654561558902, "grad_norm": 0.2629851698875427, "learning_rate": 1e-05, "loss": 0.9383, "step": 106235 }, { "epoch": 94.10097431355182, "grad_norm": 0.24043430387973785, "learning_rate": 1e-05, "loss": 0.9656, "step": 106240 }, { "epoch": 94.10540301151461, "grad_norm": 0.24246253073215485, "learning_rate": 1e-05, "loss": 0.9977, "step": 106245 }, { "epoch": 94.10983170947742, "grad_norm": 0.2435413897037506, "learning_rate": 1e-05, "loss": 0.9406, "step": 106250 }, { "epoch": 94.11426040744021, "grad_norm": 0.27490028738975525, "learning_rate": 1e-05, "loss": 0.9411, "step": 106255 }, { "epoch": 94.118689105403, "grad_norm": 0.22965127229690552, "learning_rate": 1e-05, "loss": 1.0131, "step": 106260 }, { "epoch": 94.12311780336582, "grad_norm": 0.22054623067378998, "learning_rate": 1e-05, "loss": 0.9713, "step": 106265 }, { "epoch": 94.12754650132861, "grad_norm": 0.23733578622341156, "learning_rate": 1e-05, "loss": 0.8998, "step": 106270 }, { "epoch": 94.1319751992914, "grad_norm": 0.24123911559581757, "learning_rate": 1e-05, "loss": 0.957, "step": 106275 }, { "epoch": 94.13640389725421, "grad_norm": 0.2411148101091385, "learning_rate": 1e-05, "loss": 0.9586, "step": 106280 }, { "epoch": 94.140832595217, "grad_norm": 0.23350247740745544, "learning_rate": 1e-05, "loss": 0.9729, "step": 106285 }, { "epoch": 94.1452612931798, "grad_norm": 0.2606596350669861, "learning_rate": 1e-05, "loss": 0.9471, "step": 106290 }, { "epoch": 94.14968999114261, "grad_norm": 0.24810384213924408, "learning_rate": 1e-05, "loss": 0.9765, "step": 106295 }, { "epoch": 94.1541186891054, "grad_norm": 0.2267456352710724, "learning_rate": 1e-05, "loss": 0.9558, "step": 106300 }, { "epoch": 94.1585473870682, "grad_norm": 0.25738993287086487, "learning_rate": 1e-05, "loss": 0.9394, "step": 106305 }, { "epoch": 94.162976085031, "grad_norm": 0.20813482999801636, "learning_rate": 1e-05, "loss": 0.9555, "step": 106310 }, { "epoch": 94.1674047829938, "grad_norm": 0.24512743949890137, "learning_rate": 1e-05, "loss": 0.9263, "step": 106315 }, { "epoch": 94.1718334809566, "grad_norm": 0.2616995871067047, "learning_rate": 1e-05, "loss": 0.9637, "step": 106320 }, { "epoch": 94.1762621789194, "grad_norm": 0.23294930160045624, "learning_rate": 1e-05, "loss": 0.9971, "step": 106325 }, { "epoch": 94.1806908768822, "grad_norm": 0.21492715179920197, "learning_rate": 1e-05, "loss": 0.9516, "step": 106330 }, { "epoch": 94.18511957484499, "grad_norm": 0.2580125629901886, "learning_rate": 1e-05, "loss": 0.9345, "step": 106335 }, { "epoch": 94.1895482728078, "grad_norm": 0.24805280566215515, "learning_rate": 1e-05, "loss": 0.9394, "step": 106340 }, { "epoch": 94.19397697077059, "grad_norm": 0.2524789571762085, "learning_rate": 1e-05, "loss": 0.9461, "step": 106345 }, { "epoch": 94.19840566873339, "grad_norm": 0.22692793607711792, "learning_rate": 1e-05, "loss": 0.9648, "step": 106350 }, { "epoch": 94.2028343666962, "grad_norm": 0.2800218462944031, "learning_rate": 1e-05, "loss": 1.0008, "step": 106355 }, { "epoch": 94.20726306465899, "grad_norm": 0.26655757427215576, "learning_rate": 1e-05, "loss": 1.0073, "step": 106360 }, { "epoch": 94.21169176262178, "grad_norm": 0.2496543526649475, "learning_rate": 1e-05, "loss": 0.9298, "step": 106365 }, { "epoch": 94.21612046058459, "grad_norm": 0.2739472985267639, "learning_rate": 1e-05, "loss": 0.9523, "step": 106370 }, { "epoch": 94.22054915854739, "grad_norm": 0.2369619607925415, "learning_rate": 1e-05, "loss": 0.9946, "step": 106375 }, { "epoch": 94.22497785651018, "grad_norm": 0.2599835693836212, "learning_rate": 1e-05, "loss": 0.9479, "step": 106380 }, { "epoch": 94.22940655447299, "grad_norm": 0.270287424325943, "learning_rate": 1e-05, "loss": 1.0174, "step": 106385 }, { "epoch": 94.23383525243578, "grad_norm": 0.24475695192813873, "learning_rate": 1e-05, "loss": 1.0063, "step": 106390 }, { "epoch": 94.23826395039858, "grad_norm": 0.2421625852584839, "learning_rate": 1e-05, "loss": 0.9418, "step": 106395 }, { "epoch": 94.24269264836138, "grad_norm": 0.2789970338344574, "learning_rate": 1e-05, "loss": 0.8944, "step": 106400 }, { "epoch": 94.24712134632418, "grad_norm": 0.24444790184497833, "learning_rate": 1e-05, "loss": 0.9534, "step": 106405 }, { "epoch": 94.25155004428699, "grad_norm": 0.28055405616760254, "learning_rate": 1e-05, "loss": 0.9747, "step": 106410 }, { "epoch": 94.25597874224978, "grad_norm": 0.2829570770263672, "learning_rate": 1e-05, "loss": 0.9377, "step": 106415 }, { "epoch": 94.26040744021257, "grad_norm": 0.20914830267429352, "learning_rate": 1e-05, "loss": 0.9965, "step": 106420 }, { "epoch": 94.26483613817538, "grad_norm": 0.25046849250793457, "learning_rate": 1e-05, "loss": 1.0129, "step": 106425 }, { "epoch": 94.26926483613818, "grad_norm": 0.25098326802253723, "learning_rate": 1e-05, "loss": 0.9433, "step": 106430 }, { "epoch": 94.27369353410097, "grad_norm": 0.23200610280036926, "learning_rate": 1e-05, "loss": 0.9559, "step": 106435 }, { "epoch": 94.27812223206378, "grad_norm": 0.24208246171474457, "learning_rate": 1e-05, "loss": 0.9244, "step": 106440 }, { "epoch": 94.28255093002657, "grad_norm": 0.25149649381637573, "learning_rate": 1e-05, "loss": 0.9693, "step": 106445 }, { "epoch": 94.28697962798937, "grad_norm": 0.22865618765354156, "learning_rate": 1e-05, "loss": 0.9148, "step": 106450 }, { "epoch": 94.29140832595218, "grad_norm": 0.21656499803066254, "learning_rate": 1e-05, "loss": 0.985, "step": 106455 }, { "epoch": 94.29583702391497, "grad_norm": 0.20786502957344055, "learning_rate": 1e-05, "loss": 0.9096, "step": 106460 }, { "epoch": 94.30026572187776, "grad_norm": 0.23585541546344757, "learning_rate": 1e-05, "loss": 0.9351, "step": 106465 }, { "epoch": 94.30469441984057, "grad_norm": 0.24068616330623627, "learning_rate": 1e-05, "loss": 0.9597, "step": 106470 }, { "epoch": 94.30912311780337, "grad_norm": 0.2645930349826813, "learning_rate": 1e-05, "loss": 0.8932, "step": 106475 }, { "epoch": 94.31355181576616, "grad_norm": 0.22868798673152924, "learning_rate": 1e-05, "loss": 0.931, "step": 106480 }, { "epoch": 94.31798051372897, "grad_norm": 0.22378195822238922, "learning_rate": 1e-05, "loss": 0.9773, "step": 106485 }, { "epoch": 94.32240921169176, "grad_norm": 0.2546638250350952, "learning_rate": 1e-05, "loss": 0.92, "step": 106490 }, { "epoch": 94.32683790965456, "grad_norm": 0.26925382018089294, "learning_rate": 1e-05, "loss": 0.9341, "step": 106495 }, { "epoch": 94.33126660761737, "grad_norm": 0.23870424926280975, "learning_rate": 1e-05, "loss": 0.9454, "step": 106500 }, { "epoch": 94.33569530558016, "grad_norm": 0.28356173634529114, "learning_rate": 1e-05, "loss": 0.9072, "step": 106505 }, { "epoch": 94.34012400354295, "grad_norm": 0.21108368039131165, "learning_rate": 1e-05, "loss": 0.972, "step": 106510 }, { "epoch": 94.34455270150576, "grad_norm": 0.2537795305252075, "learning_rate": 1e-05, "loss": 0.9517, "step": 106515 }, { "epoch": 94.34898139946856, "grad_norm": 0.25566232204437256, "learning_rate": 1e-05, "loss": 0.9637, "step": 106520 }, { "epoch": 94.35341009743135, "grad_norm": 0.24104613065719604, "learning_rate": 1e-05, "loss": 0.9971, "step": 106525 }, { "epoch": 94.35783879539416, "grad_norm": 0.2654995322227478, "learning_rate": 1e-05, "loss": 0.9586, "step": 106530 }, { "epoch": 94.36226749335695, "grad_norm": 0.23405615985393524, "learning_rate": 1e-05, "loss": 0.962, "step": 106535 }, { "epoch": 94.36669619131975, "grad_norm": 0.25690433382987976, "learning_rate": 1e-05, "loss": 0.9921, "step": 106540 }, { "epoch": 94.37112488928256, "grad_norm": 0.2330116629600525, "learning_rate": 1e-05, "loss": 0.917, "step": 106545 }, { "epoch": 94.37555358724535, "grad_norm": 0.2403959035873413, "learning_rate": 1e-05, "loss": 0.9586, "step": 106550 }, { "epoch": 94.37998228520814, "grad_norm": 0.24891816079616547, "learning_rate": 1e-05, "loss": 1.0033, "step": 106555 }, { "epoch": 94.38441098317095, "grad_norm": 0.27309247851371765, "learning_rate": 1e-05, "loss": 0.9406, "step": 106560 }, { "epoch": 94.38883968113375, "grad_norm": 0.23420576751232147, "learning_rate": 1e-05, "loss": 0.9871, "step": 106565 }, { "epoch": 94.39326837909654, "grad_norm": 0.22360877692699432, "learning_rate": 1e-05, "loss": 0.9771, "step": 106570 }, { "epoch": 94.39769707705935, "grad_norm": 0.21367734670639038, "learning_rate": 1e-05, "loss": 0.9502, "step": 106575 }, { "epoch": 94.40212577502214, "grad_norm": 0.2309485524892807, "learning_rate": 1e-05, "loss": 0.9566, "step": 106580 }, { "epoch": 94.40655447298494, "grad_norm": 0.2124474197626114, "learning_rate": 1e-05, "loss": 0.8815, "step": 106585 }, { "epoch": 94.41098317094774, "grad_norm": 0.2443016618490219, "learning_rate": 1e-05, "loss": 0.9478, "step": 106590 }, { "epoch": 94.41541186891054, "grad_norm": 0.2841915488243103, "learning_rate": 1e-05, "loss": 1.0113, "step": 106595 }, { "epoch": 94.41984056687333, "grad_norm": 0.23119981586933136, "learning_rate": 1e-05, "loss": 0.9825, "step": 106600 }, { "epoch": 94.42426926483614, "grad_norm": 0.2255610227584839, "learning_rate": 1e-05, "loss": 0.9591, "step": 106605 }, { "epoch": 94.42869796279894, "grad_norm": 0.25441014766693115, "learning_rate": 1e-05, "loss": 0.9836, "step": 106610 }, { "epoch": 94.43312666076173, "grad_norm": 0.24843120574951172, "learning_rate": 1e-05, "loss": 0.9641, "step": 106615 }, { "epoch": 94.43755535872454, "grad_norm": 0.20645496249198914, "learning_rate": 1e-05, "loss": 0.9121, "step": 106620 }, { "epoch": 94.44198405668733, "grad_norm": 0.2380525916814804, "learning_rate": 1e-05, "loss": 0.953, "step": 106625 }, { "epoch": 94.44641275465013, "grad_norm": 0.24944616854190826, "learning_rate": 1e-05, "loss": 0.9504, "step": 106630 }, { "epoch": 94.45084145261293, "grad_norm": 0.2314627766609192, "learning_rate": 1e-05, "loss": 0.9547, "step": 106635 }, { "epoch": 94.45527015057573, "grad_norm": 0.23358173668384552, "learning_rate": 1e-05, "loss": 0.9484, "step": 106640 }, { "epoch": 94.45969884853854, "grad_norm": 0.22349950671195984, "learning_rate": 1e-05, "loss": 0.919, "step": 106645 }, { "epoch": 94.46412754650133, "grad_norm": 0.2273334115743637, "learning_rate": 1e-05, "loss": 0.9902, "step": 106650 }, { "epoch": 94.46855624446412, "grad_norm": 0.2579975426197052, "learning_rate": 1e-05, "loss": 0.9775, "step": 106655 }, { "epoch": 94.47298494242693, "grad_norm": 0.2125396430492401, "learning_rate": 1e-05, "loss": 0.9255, "step": 106660 }, { "epoch": 94.47741364038973, "grad_norm": 0.24439695477485657, "learning_rate": 1e-05, "loss": 0.9744, "step": 106665 }, { "epoch": 94.48184233835252, "grad_norm": 0.22889472544193268, "learning_rate": 1e-05, "loss": 0.9267, "step": 106670 }, { "epoch": 94.48627103631533, "grad_norm": 0.21434274315834045, "learning_rate": 1e-05, "loss": 0.9754, "step": 106675 }, { "epoch": 94.49069973427812, "grad_norm": 0.23859034478664398, "learning_rate": 1e-05, "loss": 0.9646, "step": 106680 }, { "epoch": 94.49512843224092, "grad_norm": 0.2425820231437683, "learning_rate": 1e-05, "loss": 0.9365, "step": 106685 }, { "epoch": 94.49955713020373, "grad_norm": 0.24613337218761444, "learning_rate": 1e-05, "loss": 0.9768, "step": 106690 }, { "epoch": 94.50398582816652, "grad_norm": 0.25566110014915466, "learning_rate": 1e-05, "loss": 0.9792, "step": 106695 }, { "epoch": 94.50841452612931, "grad_norm": 0.23636366426944733, "learning_rate": 1e-05, "loss": 0.9608, "step": 106700 }, { "epoch": 94.51284322409212, "grad_norm": 0.27085795998573303, "learning_rate": 1e-05, "loss": 0.9418, "step": 106705 }, { "epoch": 94.51727192205492, "grad_norm": 0.2568426728248596, "learning_rate": 1e-05, "loss": 0.9003, "step": 106710 }, { "epoch": 94.52170062001771, "grad_norm": 0.22434687614440918, "learning_rate": 1e-05, "loss": 0.9249, "step": 106715 }, { "epoch": 94.52612931798052, "grad_norm": 0.2627421021461487, "learning_rate": 1e-05, "loss": 0.9065, "step": 106720 }, { "epoch": 94.53055801594331, "grad_norm": 0.25969594717025757, "learning_rate": 1e-05, "loss": 0.9905, "step": 106725 }, { "epoch": 94.53498671390611, "grad_norm": 0.2693764865398407, "learning_rate": 1e-05, "loss": 0.9444, "step": 106730 }, { "epoch": 94.53941541186892, "grad_norm": 0.24594071507453918, "learning_rate": 1e-05, "loss": 0.9423, "step": 106735 }, { "epoch": 94.54384410983171, "grad_norm": 0.2511425018310547, "learning_rate": 1e-05, "loss": 0.9971, "step": 106740 }, { "epoch": 94.5482728077945, "grad_norm": 0.23891757428646088, "learning_rate": 1e-05, "loss": 1.0044, "step": 106745 }, { "epoch": 94.55270150575731, "grad_norm": 0.23691214621067047, "learning_rate": 1e-05, "loss": 0.9686, "step": 106750 }, { "epoch": 94.5571302037201, "grad_norm": 0.21230220794677734, "learning_rate": 1e-05, "loss": 0.9377, "step": 106755 }, { "epoch": 94.5615589016829, "grad_norm": 0.20601458847522736, "learning_rate": 1e-05, "loss": 0.9561, "step": 106760 }, { "epoch": 94.56598759964571, "grad_norm": 0.228900745511055, "learning_rate": 1e-05, "loss": 0.9294, "step": 106765 }, { "epoch": 94.5704162976085, "grad_norm": 0.2278706282377243, "learning_rate": 1e-05, "loss": 1.001, "step": 106770 }, { "epoch": 94.5748449955713, "grad_norm": 0.21343880891799927, "learning_rate": 1e-05, "loss": 1.0164, "step": 106775 }, { "epoch": 94.5792736935341, "grad_norm": 0.26100000739097595, "learning_rate": 1e-05, "loss": 0.9327, "step": 106780 }, { "epoch": 94.5837023914969, "grad_norm": 0.2264852076768875, "learning_rate": 1e-05, "loss": 0.9327, "step": 106785 }, { "epoch": 94.5881310894597, "grad_norm": 0.27163198590278625, "learning_rate": 1e-05, "loss": 0.9255, "step": 106790 }, { "epoch": 94.5925597874225, "grad_norm": 0.2259998619556427, "learning_rate": 1e-05, "loss": 0.9793, "step": 106795 }, { "epoch": 94.5969884853853, "grad_norm": 0.23439352214336395, "learning_rate": 1e-05, "loss": 0.9661, "step": 106800 }, { "epoch": 94.60141718334809, "grad_norm": 0.2967948615550995, "learning_rate": 1e-05, "loss": 0.9522, "step": 106805 }, { "epoch": 94.6058458813109, "grad_norm": 0.22962042689323425, "learning_rate": 1e-05, "loss": 0.963, "step": 106810 }, { "epoch": 94.61027457927369, "grad_norm": 0.24918319284915924, "learning_rate": 1e-05, "loss": 0.9863, "step": 106815 }, { "epoch": 94.61470327723649, "grad_norm": 0.2544427216053009, "learning_rate": 1e-05, "loss": 0.9427, "step": 106820 }, { "epoch": 94.6191319751993, "grad_norm": 0.24607641994953156, "learning_rate": 1e-05, "loss": 0.9621, "step": 106825 }, { "epoch": 94.62356067316209, "grad_norm": 0.21463237702846527, "learning_rate": 1e-05, "loss": 0.9376, "step": 106830 }, { "epoch": 94.62798937112488, "grad_norm": 0.2559424042701721, "learning_rate": 1e-05, "loss": 0.9419, "step": 106835 }, { "epoch": 94.63241806908769, "grad_norm": 0.24596081674098969, "learning_rate": 1e-05, "loss": 0.9514, "step": 106840 }, { "epoch": 94.63684676705049, "grad_norm": 0.22070500254631042, "learning_rate": 1e-05, "loss": 0.944, "step": 106845 }, { "epoch": 94.64127546501328, "grad_norm": 0.21979346871376038, "learning_rate": 1e-05, "loss": 0.906, "step": 106850 }, { "epoch": 94.64570416297609, "grad_norm": 0.22034062445163727, "learning_rate": 1e-05, "loss": 0.9481, "step": 106855 }, { "epoch": 94.65013286093888, "grad_norm": 0.23303696513175964, "learning_rate": 1e-05, "loss": 0.9593, "step": 106860 }, { "epoch": 94.65456155890168, "grad_norm": 0.24786536395549774, "learning_rate": 1e-05, "loss": 0.9215, "step": 106865 }, { "epoch": 94.65899025686448, "grad_norm": 0.25682350993156433, "learning_rate": 1e-05, "loss": 0.975, "step": 106870 }, { "epoch": 94.66341895482728, "grad_norm": 0.20970618724822998, "learning_rate": 1e-05, "loss": 0.9085, "step": 106875 }, { "epoch": 94.66784765279007, "grad_norm": 0.24832530319690704, "learning_rate": 1e-05, "loss": 0.9316, "step": 106880 }, { "epoch": 94.67227635075288, "grad_norm": 0.2522828280925751, "learning_rate": 1e-05, "loss": 0.9189, "step": 106885 }, { "epoch": 94.67670504871568, "grad_norm": 0.2097586691379547, "learning_rate": 1e-05, "loss": 0.9591, "step": 106890 }, { "epoch": 94.68113374667848, "grad_norm": 0.2314344048500061, "learning_rate": 1e-05, "loss": 0.9676, "step": 106895 }, { "epoch": 94.68556244464128, "grad_norm": 0.2404661476612091, "learning_rate": 1e-05, "loss": 0.9452, "step": 106900 }, { "epoch": 94.68999114260407, "grad_norm": 0.21006403863430023, "learning_rate": 1e-05, "loss": 0.9951, "step": 106905 }, { "epoch": 94.69441984056688, "grad_norm": 0.2337801307439804, "learning_rate": 1e-05, "loss": 0.9207, "step": 106910 }, { "epoch": 94.69884853852967, "grad_norm": 0.24181610345840454, "learning_rate": 1e-05, "loss": 0.9822, "step": 106915 }, { "epoch": 94.70327723649247, "grad_norm": 0.24205538630485535, "learning_rate": 1e-05, "loss": 0.9562, "step": 106920 }, { "epoch": 94.70770593445528, "grad_norm": 0.23540495336055756, "learning_rate": 1e-05, "loss": 0.9399, "step": 106925 }, { "epoch": 94.71213463241807, "grad_norm": 0.23933860659599304, "learning_rate": 1e-05, "loss": 0.9545, "step": 106930 }, { "epoch": 94.71656333038086, "grad_norm": 0.21771498024463654, "learning_rate": 1e-05, "loss": 0.9741, "step": 106935 }, { "epoch": 94.72099202834367, "grad_norm": 0.2311660200357437, "learning_rate": 1e-05, "loss": 0.9633, "step": 106940 }, { "epoch": 94.72542072630647, "grad_norm": 0.257680743932724, "learning_rate": 1e-05, "loss": 0.9669, "step": 106945 }, { "epoch": 94.72984942426926, "grad_norm": 0.19089046120643616, "learning_rate": 1e-05, "loss": 0.955, "step": 106950 }, { "epoch": 94.73427812223207, "grad_norm": 0.23095518350601196, "learning_rate": 1e-05, "loss": 0.9248, "step": 106955 }, { "epoch": 94.73870682019486, "grad_norm": 0.21291907131671906, "learning_rate": 1e-05, "loss": 0.9816, "step": 106960 }, { "epoch": 94.74313551815766, "grad_norm": 0.28700071573257446, "learning_rate": 1e-05, "loss": 0.9321, "step": 106965 }, { "epoch": 94.74756421612047, "grad_norm": 0.2774430215358734, "learning_rate": 1e-05, "loss": 0.9425, "step": 106970 }, { "epoch": 94.75199291408326, "grad_norm": 0.22784988582134247, "learning_rate": 1e-05, "loss": 0.9443, "step": 106975 }, { "epoch": 94.75642161204605, "grad_norm": 0.25632473826408386, "learning_rate": 1e-05, "loss": 0.922, "step": 106980 }, { "epoch": 94.76085031000886, "grad_norm": 0.2301356941461563, "learning_rate": 1e-05, "loss": 0.9756, "step": 106985 }, { "epoch": 94.76527900797166, "grad_norm": 0.24693381786346436, "learning_rate": 1e-05, "loss": 0.9788, "step": 106990 }, { "epoch": 94.76970770593445, "grad_norm": 0.24424348771572113, "learning_rate": 1e-05, "loss": 0.9692, "step": 106995 }, { "epoch": 94.77413640389726, "grad_norm": 0.27526140213012695, "learning_rate": 1e-05, "loss": 0.9503, "step": 107000 }, { "epoch": 94.77856510186005, "grad_norm": 0.25445759296417236, "learning_rate": 1e-05, "loss": 1.0091, "step": 107005 }, { "epoch": 94.78299379982285, "grad_norm": 0.25785014033317566, "learning_rate": 1e-05, "loss": 0.9154, "step": 107010 }, { "epoch": 94.78742249778566, "grad_norm": 0.20772771537303925, "learning_rate": 1e-05, "loss": 0.9856, "step": 107015 }, { "epoch": 94.79185119574845, "grad_norm": 0.24703574180603027, "learning_rate": 1e-05, "loss": 1.0047, "step": 107020 }, { "epoch": 94.79627989371124, "grad_norm": 0.26185283064842224, "learning_rate": 1e-05, "loss": 1.0057, "step": 107025 }, { "epoch": 94.80070859167405, "grad_norm": 0.24025486409664154, "learning_rate": 1e-05, "loss": 0.9589, "step": 107030 }, { "epoch": 94.80513728963685, "grad_norm": 0.23892425000667572, "learning_rate": 1e-05, "loss": 0.9934, "step": 107035 }, { "epoch": 94.80956598759964, "grad_norm": 0.2291329950094223, "learning_rate": 1e-05, "loss": 0.923, "step": 107040 }, { "epoch": 94.81399468556245, "grad_norm": 0.23601289093494415, "learning_rate": 1e-05, "loss": 0.9333, "step": 107045 }, { "epoch": 94.81842338352524, "grad_norm": 0.27586621046066284, "learning_rate": 1e-05, "loss": 0.9919, "step": 107050 }, { "epoch": 94.82285208148804, "grad_norm": 0.294270783662796, "learning_rate": 1e-05, "loss": 0.9341, "step": 107055 }, { "epoch": 94.82728077945085, "grad_norm": 0.2527262568473816, "learning_rate": 1e-05, "loss": 0.98, "step": 107060 }, { "epoch": 94.83170947741364, "grad_norm": 0.26242318749427795, "learning_rate": 1e-05, "loss": 0.9651, "step": 107065 }, { "epoch": 94.83613817537643, "grad_norm": 0.22645775973796844, "learning_rate": 1e-05, "loss": 0.9532, "step": 107070 }, { "epoch": 94.84056687333924, "grad_norm": 0.19579118490219116, "learning_rate": 1e-05, "loss": 0.9684, "step": 107075 }, { "epoch": 94.84499557130204, "grad_norm": 0.23198047280311584, "learning_rate": 1e-05, "loss": 0.9229, "step": 107080 }, { "epoch": 94.84942426926483, "grad_norm": 0.2218148112297058, "learning_rate": 1e-05, "loss": 0.9468, "step": 107085 }, { "epoch": 94.85385296722764, "grad_norm": 0.2478971779346466, "learning_rate": 1e-05, "loss": 0.9188, "step": 107090 }, { "epoch": 94.85828166519043, "grad_norm": 0.20794397592544556, "learning_rate": 1e-05, "loss": 0.9862, "step": 107095 }, { "epoch": 94.86271036315323, "grad_norm": 0.21862350404262543, "learning_rate": 1e-05, "loss": 0.9915, "step": 107100 }, { "epoch": 94.86713906111603, "grad_norm": 0.20600490272045135, "learning_rate": 1e-05, "loss": 1.0267, "step": 107105 }, { "epoch": 94.87156775907883, "grad_norm": 0.2510509788990021, "learning_rate": 1e-05, "loss": 0.9477, "step": 107110 }, { "epoch": 94.87599645704162, "grad_norm": 0.23539741337299347, "learning_rate": 1e-05, "loss": 0.9295, "step": 107115 }, { "epoch": 94.88042515500443, "grad_norm": 0.21304330229759216, "learning_rate": 1e-05, "loss": 0.9732, "step": 107120 }, { "epoch": 94.88485385296723, "grad_norm": 0.24177055060863495, "learning_rate": 1e-05, "loss": 0.9376, "step": 107125 }, { "epoch": 94.88928255093003, "grad_norm": 0.2605949640274048, "learning_rate": 1e-05, "loss": 0.9431, "step": 107130 }, { "epoch": 94.89371124889283, "grad_norm": 0.21455734968185425, "learning_rate": 1e-05, "loss": 1.0063, "step": 107135 }, { "epoch": 94.89813994685562, "grad_norm": 0.24048122763633728, "learning_rate": 1e-05, "loss": 0.9518, "step": 107140 }, { "epoch": 94.90256864481843, "grad_norm": 0.2430018186569214, "learning_rate": 1e-05, "loss": 1.0224, "step": 107145 }, { "epoch": 94.90699734278122, "grad_norm": 0.26060742139816284, "learning_rate": 1e-05, "loss": 0.9353, "step": 107150 }, { "epoch": 94.91142604074402, "grad_norm": 0.2363378405570984, "learning_rate": 1e-05, "loss": 0.9573, "step": 107155 }, { "epoch": 94.91585473870683, "grad_norm": 0.2722887396812439, "learning_rate": 1e-05, "loss": 0.9256, "step": 107160 }, { "epoch": 94.92028343666962, "grad_norm": 0.2661963999271393, "learning_rate": 1e-05, "loss": 0.9535, "step": 107165 }, { "epoch": 94.92471213463241, "grad_norm": 0.2966618537902832, "learning_rate": 1e-05, "loss": 0.9396, "step": 107170 }, { "epoch": 94.92914083259522, "grad_norm": 0.25535956025123596, "learning_rate": 1e-05, "loss": 0.961, "step": 107175 }, { "epoch": 94.93356953055802, "grad_norm": 0.2727159559726715, "learning_rate": 1e-05, "loss": 0.9662, "step": 107180 }, { "epoch": 94.93799822852081, "grad_norm": 0.2500159442424774, "learning_rate": 1e-05, "loss": 0.8967, "step": 107185 }, { "epoch": 94.94242692648362, "grad_norm": 0.24730458855628967, "learning_rate": 1e-05, "loss": 0.9788, "step": 107190 }, { "epoch": 94.94685562444641, "grad_norm": 0.27276644110679626, "learning_rate": 1e-05, "loss": 0.972, "step": 107195 }, { "epoch": 94.95128432240921, "grad_norm": 0.24198442697525024, "learning_rate": 1e-05, "loss": 0.9289, "step": 107200 }, { "epoch": 94.95571302037202, "grad_norm": 0.25210312008857727, "learning_rate": 1e-05, "loss": 0.9557, "step": 107205 }, { "epoch": 94.96014171833481, "grad_norm": 0.21360740065574646, "learning_rate": 1e-05, "loss": 0.9704, "step": 107210 }, { "epoch": 94.9645704162976, "grad_norm": 0.2179337441921234, "learning_rate": 1e-05, "loss": 0.9223, "step": 107215 }, { "epoch": 94.96899911426041, "grad_norm": 0.27676576375961304, "learning_rate": 1e-05, "loss": 0.9579, "step": 107220 }, { "epoch": 94.9734278122232, "grad_norm": 0.2532136142253876, "learning_rate": 1e-05, "loss": 0.9522, "step": 107225 }, { "epoch": 94.977856510186, "grad_norm": 0.24171559512615204, "learning_rate": 1e-05, "loss": 0.9324, "step": 107230 }, { "epoch": 94.98228520814881, "grad_norm": 0.23634073138237, "learning_rate": 1e-05, "loss": 0.9639, "step": 107235 }, { "epoch": 94.9867139061116, "grad_norm": 0.2309700846672058, "learning_rate": 1e-05, "loss": 0.9825, "step": 107240 }, { "epoch": 94.9911426040744, "grad_norm": 0.25484704971313477, "learning_rate": 1e-05, "loss": 0.983, "step": 107245 }, { "epoch": 94.9955713020372, "grad_norm": 0.2663779556751251, "learning_rate": 1e-05, "loss": 0.9316, "step": 107250 }, { "epoch": 95.0, "grad_norm": 0.25913217663764954, "learning_rate": 1e-05, "loss": 0.8762, "step": 107255 }, { "epoch": 95.0044286979628, "grad_norm": 0.23530063033103943, "learning_rate": 1e-05, "loss": 0.9112, "step": 107260 }, { "epoch": 95.0088573959256, "grad_norm": 0.3243725895881653, "learning_rate": 1e-05, "loss": 0.9339, "step": 107265 }, { "epoch": 95.0132860938884, "grad_norm": 0.26743388175964355, "learning_rate": 1e-05, "loss": 0.9193, "step": 107270 }, { "epoch": 95.01771479185119, "grad_norm": 0.24291455745697021, "learning_rate": 1e-05, "loss": 0.8946, "step": 107275 }, { "epoch": 95.022143489814, "grad_norm": 0.27711138129234314, "learning_rate": 1e-05, "loss": 1.0305, "step": 107280 }, { "epoch": 95.0265721877768, "grad_norm": 0.2239644080400467, "learning_rate": 1e-05, "loss": 0.9679, "step": 107285 }, { "epoch": 95.03100088573959, "grad_norm": 0.21524082124233246, "learning_rate": 1e-05, "loss": 0.9087, "step": 107290 }, { "epoch": 95.0354295837024, "grad_norm": 0.22020091116428375, "learning_rate": 1e-05, "loss": 0.9123, "step": 107295 }, { "epoch": 95.03985828166519, "grad_norm": 0.28928342461586, "learning_rate": 1e-05, "loss": 0.9943, "step": 107300 }, { "epoch": 95.04428697962798, "grad_norm": 0.2070544809103012, "learning_rate": 1e-05, "loss": 0.9833, "step": 107305 }, { "epoch": 95.04871567759079, "grad_norm": 0.24191558361053467, "learning_rate": 1e-05, "loss": 0.9635, "step": 107310 }, { "epoch": 95.05314437555359, "grad_norm": 0.23150593042373657, "learning_rate": 1e-05, "loss": 0.9515, "step": 107315 }, { "epoch": 95.05757307351638, "grad_norm": 0.2508734166622162, "learning_rate": 1e-05, "loss": 1.004, "step": 107320 }, { "epoch": 95.06200177147919, "grad_norm": 0.2241988629102707, "learning_rate": 1e-05, "loss": 0.9684, "step": 107325 }, { "epoch": 95.06643046944198, "grad_norm": 0.24449783563613892, "learning_rate": 1e-05, "loss": 0.901, "step": 107330 }, { "epoch": 95.07085916740478, "grad_norm": 0.20451109111309052, "learning_rate": 1e-05, "loss": 0.9216, "step": 107335 }, { "epoch": 95.07528786536759, "grad_norm": 0.21989353001117706, "learning_rate": 1e-05, "loss": 0.9507, "step": 107340 }, { "epoch": 95.07971656333038, "grad_norm": 0.22003479301929474, "learning_rate": 1e-05, "loss": 0.9316, "step": 107345 }, { "epoch": 95.08414526129317, "grad_norm": 0.2669506072998047, "learning_rate": 1e-05, "loss": 0.9447, "step": 107350 }, { "epoch": 95.08857395925598, "grad_norm": 0.2667923867702484, "learning_rate": 1e-05, "loss": 0.9908, "step": 107355 }, { "epoch": 95.09300265721878, "grad_norm": 0.2533407509326935, "learning_rate": 1e-05, "loss": 0.9138, "step": 107360 }, { "epoch": 95.09743135518157, "grad_norm": 0.24007335305213928, "learning_rate": 1e-05, "loss": 0.9581, "step": 107365 }, { "epoch": 95.10186005314438, "grad_norm": 0.2502859830856323, "learning_rate": 1e-05, "loss": 1.0162, "step": 107370 }, { "epoch": 95.10628875110717, "grad_norm": 0.22984546422958374, "learning_rate": 1e-05, "loss": 0.9317, "step": 107375 }, { "epoch": 95.11071744906998, "grad_norm": 0.21428455412387848, "learning_rate": 1e-05, "loss": 0.9664, "step": 107380 }, { "epoch": 95.11514614703277, "grad_norm": 0.27482813596725464, "learning_rate": 1e-05, "loss": 0.9709, "step": 107385 }, { "epoch": 95.11957484499557, "grad_norm": 0.25785624980926514, "learning_rate": 1e-05, "loss": 0.9597, "step": 107390 }, { "epoch": 95.12400354295838, "grad_norm": 0.33070603013038635, "learning_rate": 1e-05, "loss": 0.9371, "step": 107395 }, { "epoch": 95.12843224092117, "grad_norm": 0.23416925966739655, "learning_rate": 1e-05, "loss": 0.948, "step": 107400 }, { "epoch": 95.13286093888397, "grad_norm": 0.24412092566490173, "learning_rate": 1e-05, "loss": 0.9792, "step": 107405 }, { "epoch": 95.13728963684677, "grad_norm": 0.23255768418312073, "learning_rate": 1e-05, "loss": 0.9573, "step": 107410 }, { "epoch": 95.14171833480957, "grad_norm": 0.21100811660289764, "learning_rate": 1e-05, "loss": 0.9389, "step": 107415 }, { "epoch": 95.14614703277236, "grad_norm": 0.22685614228248596, "learning_rate": 1e-05, "loss": 0.9866, "step": 107420 }, { "epoch": 95.15057573073517, "grad_norm": 0.23177024722099304, "learning_rate": 1e-05, "loss": 0.9223, "step": 107425 }, { "epoch": 95.15500442869796, "grad_norm": 0.23917986452579498, "learning_rate": 1e-05, "loss": 0.9876, "step": 107430 }, { "epoch": 95.15943312666076, "grad_norm": 0.24278783798217773, "learning_rate": 1e-05, "loss": 0.9396, "step": 107435 }, { "epoch": 95.16386182462357, "grad_norm": 0.21225441992282867, "learning_rate": 1e-05, "loss": 0.948, "step": 107440 }, { "epoch": 95.16829052258636, "grad_norm": 0.2684895098209381, "learning_rate": 1e-05, "loss": 0.938, "step": 107445 }, { "epoch": 95.17271922054915, "grad_norm": 0.21584945917129517, "learning_rate": 1e-05, "loss": 0.9512, "step": 107450 }, { "epoch": 95.17714791851196, "grad_norm": 0.25267452001571655, "learning_rate": 1e-05, "loss": 1.0081, "step": 107455 }, { "epoch": 95.18157661647476, "grad_norm": 0.23381614685058594, "learning_rate": 1e-05, "loss": 0.9216, "step": 107460 }, { "epoch": 95.18600531443755, "grad_norm": 0.2509293854236603, "learning_rate": 1e-05, "loss": 0.9354, "step": 107465 }, { "epoch": 95.19043401240036, "grad_norm": 0.288047194480896, "learning_rate": 1e-05, "loss": 0.965, "step": 107470 }, { "epoch": 95.19486271036315, "grad_norm": 0.3397163152694702, "learning_rate": 1e-05, "loss": 0.9213, "step": 107475 }, { "epoch": 95.19929140832595, "grad_norm": 0.297993004322052, "learning_rate": 1e-05, "loss": 1.0076, "step": 107480 }, { "epoch": 95.20372010628876, "grad_norm": 0.2647121548652649, "learning_rate": 1e-05, "loss": 0.9796, "step": 107485 }, { "epoch": 95.20814880425155, "grad_norm": 0.29293352365493774, "learning_rate": 1e-05, "loss": 0.9325, "step": 107490 }, { "epoch": 95.21257750221434, "grad_norm": 0.2722911238670349, "learning_rate": 1e-05, "loss": 0.9533, "step": 107495 }, { "epoch": 95.21700620017715, "grad_norm": 0.256132036447525, "learning_rate": 1e-05, "loss": 0.9474, "step": 107500 }, { "epoch": 95.22143489813995, "grad_norm": 0.2437218278646469, "learning_rate": 1e-05, "loss": 0.9642, "step": 107505 }, { "epoch": 95.22586359610274, "grad_norm": 0.2889432907104492, "learning_rate": 1e-05, "loss": 0.9672, "step": 107510 }, { "epoch": 95.23029229406555, "grad_norm": 0.26617172360420227, "learning_rate": 1e-05, "loss": 0.9858, "step": 107515 }, { "epoch": 95.23472099202834, "grad_norm": 0.2752954065799713, "learning_rate": 1e-05, "loss": 0.9123, "step": 107520 }, { "epoch": 95.23914968999114, "grad_norm": 0.2673304080963135, "learning_rate": 1e-05, "loss": 0.9757, "step": 107525 }, { "epoch": 95.24357838795395, "grad_norm": 0.2583892047405243, "learning_rate": 1e-05, "loss": 0.9431, "step": 107530 }, { "epoch": 95.24800708591674, "grad_norm": 0.21743465960025787, "learning_rate": 1e-05, "loss": 1.0182, "step": 107535 }, { "epoch": 95.25243578387953, "grad_norm": 0.23750510811805725, "learning_rate": 1e-05, "loss": 0.8856, "step": 107540 }, { "epoch": 95.25686448184234, "grad_norm": 0.22443650662899017, "learning_rate": 1e-05, "loss": 0.9288, "step": 107545 }, { "epoch": 95.26129317980514, "grad_norm": 0.22520148754119873, "learning_rate": 1e-05, "loss": 0.9256, "step": 107550 }, { "epoch": 95.26572187776793, "grad_norm": 0.23910659551620483, "learning_rate": 1e-05, "loss": 1.031, "step": 107555 }, { "epoch": 95.27015057573074, "grad_norm": 0.21996942162513733, "learning_rate": 1e-05, "loss": 0.9822, "step": 107560 }, { "epoch": 95.27457927369353, "grad_norm": 0.22658292949199677, "learning_rate": 1e-05, "loss": 0.9913, "step": 107565 }, { "epoch": 95.27900797165633, "grad_norm": 0.2718266546726227, "learning_rate": 1e-05, "loss": 0.9423, "step": 107570 }, { "epoch": 95.28343666961914, "grad_norm": 0.21327579021453857, "learning_rate": 1e-05, "loss": 0.9973, "step": 107575 }, { "epoch": 95.28786536758193, "grad_norm": 0.22494789958000183, "learning_rate": 1e-05, "loss": 0.9865, "step": 107580 }, { "epoch": 95.29229406554472, "grad_norm": 0.20625858008861542, "learning_rate": 1e-05, "loss": 1.0131, "step": 107585 }, { "epoch": 95.29672276350753, "grad_norm": 0.19615961611270905, "learning_rate": 1e-05, "loss": 0.9596, "step": 107590 }, { "epoch": 95.30115146147033, "grad_norm": 0.2181224524974823, "learning_rate": 1e-05, "loss": 0.9877, "step": 107595 }, { "epoch": 95.30558015943312, "grad_norm": 0.26227763295173645, "learning_rate": 1e-05, "loss": 0.9524, "step": 107600 }, { "epoch": 95.31000885739593, "grad_norm": 0.24930843710899353, "learning_rate": 1e-05, "loss": 0.9644, "step": 107605 }, { "epoch": 95.31443755535872, "grad_norm": 0.2209462672472, "learning_rate": 1e-05, "loss": 1.0201, "step": 107610 }, { "epoch": 95.31886625332152, "grad_norm": 0.2423831820487976, "learning_rate": 1e-05, "loss": 0.9908, "step": 107615 }, { "epoch": 95.32329495128432, "grad_norm": 0.27176910638809204, "learning_rate": 1e-05, "loss": 0.9171, "step": 107620 }, { "epoch": 95.32772364924712, "grad_norm": 0.227650985121727, "learning_rate": 1e-05, "loss": 0.9586, "step": 107625 }, { "epoch": 95.33215234720993, "grad_norm": 0.26221513748168945, "learning_rate": 1e-05, "loss": 0.9259, "step": 107630 }, { "epoch": 95.33658104517272, "grad_norm": 0.26837992668151855, "learning_rate": 1e-05, "loss": 0.9426, "step": 107635 }, { "epoch": 95.34100974313552, "grad_norm": 0.2633305490016937, "learning_rate": 1e-05, "loss": 0.9448, "step": 107640 }, { "epoch": 95.34543844109832, "grad_norm": 0.2688346803188324, "learning_rate": 1e-05, "loss": 0.9791, "step": 107645 }, { "epoch": 95.34986713906112, "grad_norm": 0.2908957004547119, "learning_rate": 1e-05, "loss": 0.9689, "step": 107650 }, { "epoch": 95.35429583702391, "grad_norm": 0.23492327332496643, "learning_rate": 1e-05, "loss": 0.9536, "step": 107655 }, { "epoch": 95.35872453498672, "grad_norm": 0.2329270839691162, "learning_rate": 1e-05, "loss": 0.9544, "step": 107660 }, { "epoch": 95.36315323294951, "grad_norm": 0.2525000274181366, "learning_rate": 1e-05, "loss": 0.9387, "step": 107665 }, { "epoch": 95.36758193091231, "grad_norm": 0.2484455555677414, "learning_rate": 1e-05, "loss": 0.959, "step": 107670 }, { "epoch": 95.37201062887512, "grad_norm": 0.2335076630115509, "learning_rate": 1e-05, "loss": 0.9731, "step": 107675 }, { "epoch": 95.37643932683791, "grad_norm": 0.2261621057987213, "learning_rate": 1e-05, "loss": 0.9696, "step": 107680 }, { "epoch": 95.3808680248007, "grad_norm": 0.24132534861564636, "learning_rate": 1e-05, "loss": 0.9353, "step": 107685 }, { "epoch": 95.38529672276351, "grad_norm": 0.2227826863527298, "learning_rate": 1e-05, "loss": 0.9808, "step": 107690 }, { "epoch": 95.38972542072631, "grad_norm": 0.2079281508922577, "learning_rate": 1e-05, "loss": 0.9691, "step": 107695 }, { "epoch": 95.3941541186891, "grad_norm": 0.25957751274108887, "learning_rate": 1e-05, "loss": 0.9596, "step": 107700 }, { "epoch": 95.39858281665191, "grad_norm": 0.21455520391464233, "learning_rate": 1e-05, "loss": 0.9516, "step": 107705 }, { "epoch": 95.4030115146147, "grad_norm": 0.21068847179412842, "learning_rate": 1e-05, "loss": 0.9967, "step": 107710 }, { "epoch": 95.4074402125775, "grad_norm": 0.2353878915309906, "learning_rate": 1e-05, "loss": 0.9281, "step": 107715 }, { "epoch": 95.4118689105403, "grad_norm": 0.2761279046535492, "learning_rate": 1e-05, "loss": 0.9186, "step": 107720 }, { "epoch": 95.4162976085031, "grad_norm": 0.27247875928878784, "learning_rate": 1e-05, "loss": 0.9855, "step": 107725 }, { "epoch": 95.4207263064659, "grad_norm": 0.2290761023759842, "learning_rate": 1e-05, "loss": 0.9845, "step": 107730 }, { "epoch": 95.4251550044287, "grad_norm": 0.24237966537475586, "learning_rate": 1e-05, "loss": 0.9415, "step": 107735 }, { "epoch": 95.4295837023915, "grad_norm": 0.23053774237632751, "learning_rate": 1e-05, "loss": 0.9548, "step": 107740 }, { "epoch": 95.43401240035429, "grad_norm": 0.23504970967769623, "learning_rate": 1e-05, "loss": 0.8922, "step": 107745 }, { "epoch": 95.4384410983171, "grad_norm": 0.22553934156894684, "learning_rate": 1e-05, "loss": 0.9138, "step": 107750 }, { "epoch": 95.4428697962799, "grad_norm": 0.2617259621620178, "learning_rate": 1e-05, "loss": 1.0025, "step": 107755 }, { "epoch": 95.44729849424269, "grad_norm": 0.2620776295661926, "learning_rate": 1e-05, "loss": 0.919, "step": 107760 }, { "epoch": 95.4517271922055, "grad_norm": 0.27812597155570984, "learning_rate": 1e-05, "loss": 0.9638, "step": 107765 }, { "epoch": 95.45615589016829, "grad_norm": 0.27106449007987976, "learning_rate": 1e-05, "loss": 0.9288, "step": 107770 }, { "epoch": 95.46058458813108, "grad_norm": 0.2544442415237427, "learning_rate": 1e-05, "loss": 0.9701, "step": 107775 }, { "epoch": 95.46501328609389, "grad_norm": 0.2651505768299103, "learning_rate": 1e-05, "loss": 0.9219, "step": 107780 }, { "epoch": 95.46944198405669, "grad_norm": 0.27600473165512085, "learning_rate": 1e-05, "loss": 0.9928, "step": 107785 }, { "epoch": 95.47387068201948, "grad_norm": 0.24292562901973724, "learning_rate": 1e-05, "loss": 0.9912, "step": 107790 }, { "epoch": 95.47829937998229, "grad_norm": 0.24555571377277374, "learning_rate": 1e-05, "loss": 0.9698, "step": 107795 }, { "epoch": 95.48272807794508, "grad_norm": 0.2511807978153229, "learning_rate": 1e-05, "loss": 0.8852, "step": 107800 }, { "epoch": 95.48715677590788, "grad_norm": 0.20451007783412933, "learning_rate": 1e-05, "loss": 0.9561, "step": 107805 }, { "epoch": 95.49158547387069, "grad_norm": 0.20194494724273682, "learning_rate": 1e-05, "loss": 0.9719, "step": 107810 }, { "epoch": 95.49601417183348, "grad_norm": 0.24525058269500732, "learning_rate": 1e-05, "loss": 0.9682, "step": 107815 }, { "epoch": 95.50044286979627, "grad_norm": 0.2273806780576706, "learning_rate": 1e-05, "loss": 0.9388, "step": 107820 }, { "epoch": 95.50487156775908, "grad_norm": 0.3020380437374115, "learning_rate": 1e-05, "loss": 0.949, "step": 107825 }, { "epoch": 95.50930026572188, "grad_norm": 0.2678582966327667, "learning_rate": 1e-05, "loss": 0.9901, "step": 107830 }, { "epoch": 95.51372896368467, "grad_norm": 0.2440209686756134, "learning_rate": 1e-05, "loss": 0.9783, "step": 107835 }, { "epoch": 95.51815766164748, "grad_norm": 0.22443720698356628, "learning_rate": 1e-05, "loss": 0.9216, "step": 107840 }, { "epoch": 95.52258635961027, "grad_norm": 0.21010196208953857, "learning_rate": 1e-05, "loss": 0.9417, "step": 107845 }, { "epoch": 95.52701505757307, "grad_norm": 0.23201937973499298, "learning_rate": 1e-05, "loss": 0.9839, "step": 107850 }, { "epoch": 95.53144375553588, "grad_norm": 0.2587655782699585, "learning_rate": 1e-05, "loss": 0.9845, "step": 107855 }, { "epoch": 95.53587245349867, "grad_norm": 0.23483608663082123, "learning_rate": 1e-05, "loss": 0.9571, "step": 107860 }, { "epoch": 95.54030115146146, "grad_norm": 0.2619555592536926, "learning_rate": 1e-05, "loss": 0.9971, "step": 107865 }, { "epoch": 95.54472984942427, "grad_norm": 0.21851742267608643, "learning_rate": 1e-05, "loss": 0.9398, "step": 107870 }, { "epoch": 95.54915854738707, "grad_norm": 0.23866872489452362, "learning_rate": 1e-05, "loss": 0.9107, "step": 107875 }, { "epoch": 95.55358724534987, "grad_norm": 0.2349606454372406, "learning_rate": 1e-05, "loss": 0.9908, "step": 107880 }, { "epoch": 95.55801594331267, "grad_norm": 0.24345038831233978, "learning_rate": 1e-05, "loss": 1.0209, "step": 107885 }, { "epoch": 95.56244464127546, "grad_norm": 0.23698939383029938, "learning_rate": 1e-05, "loss": 0.9595, "step": 107890 }, { "epoch": 95.56687333923827, "grad_norm": 0.2121095508337021, "learning_rate": 1e-05, "loss": 0.9571, "step": 107895 }, { "epoch": 95.57130203720106, "grad_norm": 0.2416294664144516, "learning_rate": 1e-05, "loss": 0.9191, "step": 107900 }, { "epoch": 95.57573073516386, "grad_norm": 0.23848575353622437, "learning_rate": 1e-05, "loss": 0.94, "step": 107905 }, { "epoch": 95.58015943312667, "grad_norm": 0.24208682775497437, "learning_rate": 1e-05, "loss": 0.9788, "step": 107910 }, { "epoch": 95.58458813108946, "grad_norm": 0.23229193687438965, "learning_rate": 1e-05, "loss": 0.9583, "step": 107915 }, { "epoch": 95.58901682905226, "grad_norm": 0.21544359624385834, "learning_rate": 1e-05, "loss": 0.9483, "step": 107920 }, { "epoch": 95.59344552701506, "grad_norm": 0.2173500657081604, "learning_rate": 1e-05, "loss": 0.9804, "step": 107925 }, { "epoch": 95.59787422497786, "grad_norm": 0.24962668120861053, "learning_rate": 1e-05, "loss": 0.9569, "step": 107930 }, { "epoch": 95.60230292294065, "grad_norm": 0.23697854578495026, "learning_rate": 1e-05, "loss": 0.9595, "step": 107935 }, { "epoch": 95.60673162090346, "grad_norm": 0.2456091195344925, "learning_rate": 1e-05, "loss": 0.999, "step": 107940 }, { "epoch": 95.61116031886625, "grad_norm": 0.2261052429676056, "learning_rate": 1e-05, "loss": 1.0016, "step": 107945 }, { "epoch": 95.61558901682905, "grad_norm": 0.21750393509864807, "learning_rate": 1e-05, "loss": 0.9734, "step": 107950 }, { "epoch": 95.62001771479186, "grad_norm": 0.2327343225479126, "learning_rate": 1e-05, "loss": 0.977, "step": 107955 }, { "epoch": 95.62444641275465, "grad_norm": 0.21743425726890564, "learning_rate": 1e-05, "loss": 0.9674, "step": 107960 }, { "epoch": 95.62887511071744, "grad_norm": 0.21500687301158905, "learning_rate": 1e-05, "loss": 0.9442, "step": 107965 }, { "epoch": 95.63330380868025, "grad_norm": 0.2827708125114441, "learning_rate": 1e-05, "loss": 0.9877, "step": 107970 }, { "epoch": 95.63773250664305, "grad_norm": 0.22060155868530273, "learning_rate": 1e-05, "loss": 0.9755, "step": 107975 }, { "epoch": 95.64216120460584, "grad_norm": 0.23911650478839874, "learning_rate": 1e-05, "loss": 0.9696, "step": 107980 }, { "epoch": 95.64658990256865, "grad_norm": 0.25105273723602295, "learning_rate": 1e-05, "loss": 1.0211, "step": 107985 }, { "epoch": 95.65101860053144, "grad_norm": 0.24852430820465088, "learning_rate": 1e-05, "loss": 0.9274, "step": 107990 }, { "epoch": 95.65544729849424, "grad_norm": 0.26660463213920593, "learning_rate": 1e-05, "loss": 0.9086, "step": 107995 }, { "epoch": 95.65987599645705, "grad_norm": 0.27217134833335876, "learning_rate": 1e-05, "loss": 0.9179, "step": 108000 }, { "epoch": 95.66430469441984, "grad_norm": 0.257432222366333, "learning_rate": 1e-05, "loss": 0.9696, "step": 108005 }, { "epoch": 95.66873339238263, "grad_norm": 0.2769724428653717, "learning_rate": 1e-05, "loss": 0.9497, "step": 108010 }, { "epoch": 95.67316209034544, "grad_norm": 0.24498045444488525, "learning_rate": 1e-05, "loss": 0.9832, "step": 108015 }, { "epoch": 95.67759078830824, "grad_norm": 0.21995790302753448, "learning_rate": 1e-05, "loss": 0.9239, "step": 108020 }, { "epoch": 95.68201948627103, "grad_norm": 0.2099144607782364, "learning_rate": 1e-05, "loss": 0.9099, "step": 108025 }, { "epoch": 95.68644818423384, "grad_norm": 0.22911645472049713, "learning_rate": 1e-05, "loss": 0.9346, "step": 108030 }, { "epoch": 95.69087688219663, "grad_norm": 0.2082708179950714, "learning_rate": 1e-05, "loss": 1.0215, "step": 108035 }, { "epoch": 95.69530558015943, "grad_norm": 0.22762998938560486, "learning_rate": 1e-05, "loss": 1.0118, "step": 108040 }, { "epoch": 95.69973427812224, "grad_norm": 0.2905156910419464, "learning_rate": 1e-05, "loss": 0.9076, "step": 108045 }, { "epoch": 95.70416297608503, "grad_norm": 0.2250833511352539, "learning_rate": 1e-05, "loss": 0.8849, "step": 108050 }, { "epoch": 95.70859167404782, "grad_norm": 0.23311945796012878, "learning_rate": 1e-05, "loss": 0.9737, "step": 108055 }, { "epoch": 95.71302037201063, "grad_norm": 0.24047549068927765, "learning_rate": 1e-05, "loss": 0.932, "step": 108060 }, { "epoch": 95.71744906997343, "grad_norm": 0.25582608580589294, "learning_rate": 1e-05, "loss": 0.9352, "step": 108065 }, { "epoch": 95.72187776793622, "grad_norm": 0.2521994411945343, "learning_rate": 1e-05, "loss": 0.9393, "step": 108070 }, { "epoch": 95.72630646589903, "grad_norm": 0.21439537405967712, "learning_rate": 1e-05, "loss": 0.9691, "step": 108075 }, { "epoch": 95.73073516386182, "grad_norm": 0.2501664161682129, "learning_rate": 1e-05, "loss": 0.968, "step": 108080 }, { "epoch": 95.73516386182462, "grad_norm": 0.19152836501598358, "learning_rate": 1e-05, "loss": 0.9463, "step": 108085 }, { "epoch": 95.73959255978743, "grad_norm": 0.22520753741264343, "learning_rate": 1e-05, "loss": 0.9308, "step": 108090 }, { "epoch": 95.74402125775022, "grad_norm": 0.23537737131118774, "learning_rate": 1e-05, "loss": 0.9874, "step": 108095 }, { "epoch": 95.74844995571301, "grad_norm": 0.22484339773654938, "learning_rate": 1e-05, "loss": 0.9251, "step": 108100 }, { "epoch": 95.75287865367582, "grad_norm": 0.23144637048244476, "learning_rate": 1e-05, "loss": 0.9623, "step": 108105 }, { "epoch": 95.75730735163862, "grad_norm": 0.25670066475868225, "learning_rate": 1e-05, "loss": 0.9646, "step": 108110 }, { "epoch": 95.76173604960141, "grad_norm": 0.24359601736068726, "learning_rate": 1e-05, "loss": 0.9398, "step": 108115 }, { "epoch": 95.76616474756422, "grad_norm": 0.2958217263221741, "learning_rate": 1e-05, "loss": 0.9882, "step": 108120 }, { "epoch": 95.77059344552701, "grad_norm": 0.2801280915737152, "learning_rate": 1e-05, "loss": 0.9573, "step": 108125 }, { "epoch": 95.77502214348982, "grad_norm": 0.34061455726623535, "learning_rate": 1e-05, "loss": 0.9895, "step": 108130 }, { "epoch": 95.77945084145261, "grad_norm": 0.2528132200241089, "learning_rate": 1e-05, "loss": 1.0364, "step": 108135 }, { "epoch": 95.78387953941541, "grad_norm": 0.2196105420589447, "learning_rate": 1e-05, "loss": 0.944, "step": 108140 }, { "epoch": 95.78830823737822, "grad_norm": 0.23865923285484314, "learning_rate": 1e-05, "loss": 0.9216, "step": 108145 }, { "epoch": 95.79273693534101, "grad_norm": 0.24523451924324036, "learning_rate": 1e-05, "loss": 0.9898, "step": 108150 }, { "epoch": 95.7971656333038, "grad_norm": 0.2544254958629608, "learning_rate": 1e-05, "loss": 0.9915, "step": 108155 }, { "epoch": 95.80159433126661, "grad_norm": 0.22589778900146484, "learning_rate": 1e-05, "loss": 0.9579, "step": 108160 }, { "epoch": 95.80602302922941, "grad_norm": 0.21877838671207428, "learning_rate": 1e-05, "loss": 1.0116, "step": 108165 }, { "epoch": 95.8104517271922, "grad_norm": 0.2545568645000458, "learning_rate": 1e-05, "loss": 0.9712, "step": 108170 }, { "epoch": 95.81488042515501, "grad_norm": 0.22206686437129974, "learning_rate": 1e-05, "loss": 0.9766, "step": 108175 }, { "epoch": 95.8193091231178, "grad_norm": 0.22488637268543243, "learning_rate": 1e-05, "loss": 0.8899, "step": 108180 }, { "epoch": 95.8237378210806, "grad_norm": 0.2672382891178131, "learning_rate": 1e-05, "loss": 0.9407, "step": 108185 }, { "epoch": 95.8281665190434, "grad_norm": 0.23874084651470184, "learning_rate": 1e-05, "loss": 0.989, "step": 108190 }, { "epoch": 95.8325952170062, "grad_norm": 0.27910977602005005, "learning_rate": 1e-05, "loss": 0.9121, "step": 108195 }, { "epoch": 95.837023914969, "grad_norm": 0.21587279438972473, "learning_rate": 1e-05, "loss": 0.9349, "step": 108200 }, { "epoch": 95.8414526129318, "grad_norm": 0.25613000988960266, "learning_rate": 1e-05, "loss": 0.9765, "step": 108205 }, { "epoch": 95.8458813108946, "grad_norm": 0.2395486682653427, "learning_rate": 1e-05, "loss": 0.9275, "step": 108210 }, { "epoch": 95.85031000885739, "grad_norm": 0.22896182537078857, "learning_rate": 1e-05, "loss": 0.9826, "step": 108215 }, { "epoch": 95.8547387068202, "grad_norm": 0.1961967796087265, "learning_rate": 1e-05, "loss": 0.9832, "step": 108220 }, { "epoch": 95.859167404783, "grad_norm": 0.2177097350358963, "learning_rate": 1e-05, "loss": 0.9456, "step": 108225 }, { "epoch": 95.86359610274579, "grad_norm": 0.2459237426519394, "learning_rate": 1e-05, "loss": 0.9112, "step": 108230 }, { "epoch": 95.8680248007086, "grad_norm": 0.21894758939743042, "learning_rate": 1e-05, "loss": 0.9189, "step": 108235 }, { "epoch": 95.87245349867139, "grad_norm": 0.2109079658985138, "learning_rate": 1e-05, "loss": 0.9435, "step": 108240 }, { "epoch": 95.87688219663418, "grad_norm": 0.22265484929084778, "learning_rate": 1e-05, "loss": 1.0252, "step": 108245 }, { "epoch": 95.881310894597, "grad_norm": 0.21388401091098785, "learning_rate": 1e-05, "loss": 0.9837, "step": 108250 }, { "epoch": 95.88573959255979, "grad_norm": 0.2093137949705124, "learning_rate": 1e-05, "loss": 1.0204, "step": 108255 }, { "epoch": 95.89016829052258, "grad_norm": 0.2581399977207184, "learning_rate": 1e-05, "loss": 0.9222, "step": 108260 }, { "epoch": 95.89459698848539, "grad_norm": 0.2453896701335907, "learning_rate": 1e-05, "loss": 0.9378, "step": 108265 }, { "epoch": 95.89902568644818, "grad_norm": 0.23279815912246704, "learning_rate": 1e-05, "loss": 1.0243, "step": 108270 }, { "epoch": 95.90345438441098, "grad_norm": 0.24917802214622498, "learning_rate": 1e-05, "loss": 0.9331, "step": 108275 }, { "epoch": 95.90788308237379, "grad_norm": 0.23274897038936615, "learning_rate": 1e-05, "loss": 0.9535, "step": 108280 }, { "epoch": 95.91231178033658, "grad_norm": 0.23741154372692108, "learning_rate": 1e-05, "loss": 0.9256, "step": 108285 }, { "epoch": 95.91674047829937, "grad_norm": 0.26576706767082214, "learning_rate": 1e-05, "loss": 0.9609, "step": 108290 }, { "epoch": 95.92116917626218, "grad_norm": 0.25688600540161133, "learning_rate": 1e-05, "loss": 1.0139, "step": 108295 }, { "epoch": 95.92559787422498, "grad_norm": 0.22089515626430511, "learning_rate": 1e-05, "loss": 0.9804, "step": 108300 }, { "epoch": 95.93002657218777, "grad_norm": 0.22952593863010406, "learning_rate": 1e-05, "loss": 0.958, "step": 108305 }, { "epoch": 95.93445527015058, "grad_norm": 0.25194287300109863, "learning_rate": 1e-05, "loss": 0.9602, "step": 108310 }, { "epoch": 95.93888396811337, "grad_norm": 0.24947448074817657, "learning_rate": 1e-05, "loss": 0.9603, "step": 108315 }, { "epoch": 95.94331266607617, "grad_norm": 0.23501215875148773, "learning_rate": 1e-05, "loss": 0.9736, "step": 108320 }, { "epoch": 95.94774136403898, "grad_norm": 0.23731659352779388, "learning_rate": 1e-05, "loss": 0.9398, "step": 108325 }, { "epoch": 95.95217006200177, "grad_norm": 0.24125874042510986, "learning_rate": 1e-05, "loss": 0.9554, "step": 108330 }, { "epoch": 95.95659875996456, "grad_norm": 0.2147240787744522, "learning_rate": 1e-05, "loss": 0.9858, "step": 108335 }, { "epoch": 95.96102745792737, "grad_norm": 0.26002317667007446, "learning_rate": 1e-05, "loss": 0.9395, "step": 108340 }, { "epoch": 95.96545615589017, "grad_norm": 0.26153072714805603, "learning_rate": 1e-05, "loss": 0.9642, "step": 108345 }, { "epoch": 95.96988485385296, "grad_norm": 0.22740699350833893, "learning_rate": 1e-05, "loss": 0.9547, "step": 108350 }, { "epoch": 95.97431355181577, "grad_norm": 0.3171957731246948, "learning_rate": 1e-05, "loss": 0.9951, "step": 108355 }, { "epoch": 95.97874224977856, "grad_norm": 0.2792864143848419, "learning_rate": 1e-05, "loss": 0.9458, "step": 108360 }, { "epoch": 95.98317094774137, "grad_norm": 0.21778006851673126, "learning_rate": 1e-05, "loss": 0.9553, "step": 108365 }, { "epoch": 95.98759964570417, "grad_norm": 0.2647404968738556, "learning_rate": 1e-05, "loss": 0.932, "step": 108370 }, { "epoch": 95.99202834366696, "grad_norm": 0.23767496645450592, "learning_rate": 1e-05, "loss": 0.9711, "step": 108375 }, { "epoch": 95.99645704162977, "grad_norm": 0.2408391386270523, "learning_rate": 1e-05, "loss": 0.9021, "step": 108380 }, { "epoch": 96.00088573959256, "grad_norm": 0.2284712940454483, "learning_rate": 1e-05, "loss": 0.9268, "step": 108385 }, { "epoch": 96.00531443755536, "grad_norm": 0.2424333393573761, "learning_rate": 1e-05, "loss": 0.9535, "step": 108390 }, { "epoch": 96.00974313551816, "grad_norm": 0.24135784804821014, "learning_rate": 1e-05, "loss": 0.9409, "step": 108395 }, { "epoch": 96.01417183348096, "grad_norm": 0.21310147643089294, "learning_rate": 1e-05, "loss": 0.9467, "step": 108400 }, { "epoch": 96.01860053144375, "grad_norm": 0.27662280201911926, "learning_rate": 1e-05, "loss": 1.0024, "step": 108405 }, { "epoch": 96.02302922940656, "grad_norm": 0.255436509847641, "learning_rate": 1e-05, "loss": 0.9737, "step": 108410 }, { "epoch": 96.02745792736935, "grad_norm": 0.25066521763801575, "learning_rate": 1e-05, "loss": 0.9239, "step": 108415 }, { "epoch": 96.03188662533215, "grad_norm": 0.26973816752433777, "learning_rate": 1e-05, "loss": 0.9649, "step": 108420 }, { "epoch": 96.03631532329496, "grad_norm": 0.25600868463516235, "learning_rate": 1e-05, "loss": 0.9882, "step": 108425 }, { "epoch": 96.04074402125775, "grad_norm": 0.2159801870584488, "learning_rate": 1e-05, "loss": 1.0013, "step": 108430 }, { "epoch": 96.04517271922055, "grad_norm": 0.239048033952713, "learning_rate": 1e-05, "loss": 0.9366, "step": 108435 }, { "epoch": 96.04960141718335, "grad_norm": 0.2514578700065613, "learning_rate": 1e-05, "loss": 0.9154, "step": 108440 }, { "epoch": 96.05403011514615, "grad_norm": 0.23884740471839905, "learning_rate": 1e-05, "loss": 0.9859, "step": 108445 }, { "epoch": 96.05845881310894, "grad_norm": 0.23057234287261963, "learning_rate": 1e-05, "loss": 0.9717, "step": 108450 }, { "epoch": 96.06288751107175, "grad_norm": 0.2376694679260254, "learning_rate": 1e-05, "loss": 0.9598, "step": 108455 }, { "epoch": 96.06731620903454, "grad_norm": 0.20688025653362274, "learning_rate": 1e-05, "loss": 0.9365, "step": 108460 }, { "epoch": 96.07174490699734, "grad_norm": 0.24544085562229156, "learning_rate": 1e-05, "loss": 0.9267, "step": 108465 }, { "epoch": 96.07617360496015, "grad_norm": 0.24091307818889618, "learning_rate": 1e-05, "loss": 0.9258, "step": 108470 }, { "epoch": 96.08060230292294, "grad_norm": 0.28245028853416443, "learning_rate": 1e-05, "loss": 0.9342, "step": 108475 }, { "epoch": 96.08503100088573, "grad_norm": 0.3136764168739319, "learning_rate": 1e-05, "loss": 0.9337, "step": 108480 }, { "epoch": 96.08945969884854, "grad_norm": 0.24032048881053925, "learning_rate": 1e-05, "loss": 0.9351, "step": 108485 }, { "epoch": 96.09388839681134, "grad_norm": 0.23953594267368317, "learning_rate": 1e-05, "loss": 0.9425, "step": 108490 }, { "epoch": 96.09831709477413, "grad_norm": 0.22534404695034027, "learning_rate": 1e-05, "loss": 0.9612, "step": 108495 }, { "epoch": 96.10274579273694, "grad_norm": 0.27071499824523926, "learning_rate": 1e-05, "loss": 0.9944, "step": 108500 }, { "epoch": 96.10717449069973, "grad_norm": 0.2184804379940033, "learning_rate": 1e-05, "loss": 0.9647, "step": 108505 }, { "epoch": 96.11160318866253, "grad_norm": 0.2290293276309967, "learning_rate": 1e-05, "loss": 0.9883, "step": 108510 }, { "epoch": 96.11603188662534, "grad_norm": 0.31282541155815125, "learning_rate": 1e-05, "loss": 0.9715, "step": 108515 }, { "epoch": 96.12046058458813, "grad_norm": 0.354139506816864, "learning_rate": 1e-05, "loss": 0.9563, "step": 108520 }, { "epoch": 96.12488928255092, "grad_norm": 0.2839699983596802, "learning_rate": 1e-05, "loss": 0.9133, "step": 108525 }, { "epoch": 96.12931798051373, "grad_norm": 0.2797127366065979, "learning_rate": 1e-05, "loss": 0.9486, "step": 108530 }, { "epoch": 96.13374667847653, "grad_norm": 0.22523237764835358, "learning_rate": 1e-05, "loss": 0.9985, "step": 108535 }, { "epoch": 96.13817537643932, "grad_norm": 0.22221854329109192, "learning_rate": 1e-05, "loss": 0.9882, "step": 108540 }, { "epoch": 96.14260407440213, "grad_norm": 0.24611735343933105, "learning_rate": 1e-05, "loss": 0.9939, "step": 108545 }, { "epoch": 96.14703277236492, "grad_norm": 0.2309277355670929, "learning_rate": 1e-05, "loss": 0.9606, "step": 108550 }, { "epoch": 96.15146147032772, "grad_norm": 0.22810816764831543, "learning_rate": 1e-05, "loss": 0.9797, "step": 108555 }, { "epoch": 96.15589016829053, "grad_norm": 0.30302348732948303, "learning_rate": 1e-05, "loss": 0.9773, "step": 108560 }, { "epoch": 96.16031886625332, "grad_norm": 0.24275289475917816, "learning_rate": 1e-05, "loss": 0.9489, "step": 108565 }, { "epoch": 96.16474756421611, "grad_norm": 0.23894831538200378, "learning_rate": 1e-05, "loss": 0.9651, "step": 108570 }, { "epoch": 96.16917626217892, "grad_norm": 0.23714321851730347, "learning_rate": 1e-05, "loss": 0.9745, "step": 108575 }, { "epoch": 96.17360496014172, "grad_norm": 0.23956306278705597, "learning_rate": 1e-05, "loss": 0.9327, "step": 108580 }, { "epoch": 96.17803365810451, "grad_norm": 0.29898393154144287, "learning_rate": 1e-05, "loss": 0.9297, "step": 108585 }, { "epoch": 96.18246235606732, "grad_norm": 0.28933021426200867, "learning_rate": 1e-05, "loss": 1.0165, "step": 108590 }, { "epoch": 96.18689105403011, "grad_norm": 0.2901092767715454, "learning_rate": 1e-05, "loss": 0.9557, "step": 108595 }, { "epoch": 96.1913197519929, "grad_norm": 0.21013809740543365, "learning_rate": 1e-05, "loss": 0.9909, "step": 108600 }, { "epoch": 96.19574844995572, "grad_norm": 0.2515837550163269, "learning_rate": 1e-05, "loss": 0.9376, "step": 108605 }, { "epoch": 96.20017714791851, "grad_norm": 0.26505985856056213, "learning_rate": 1e-05, "loss": 0.9318, "step": 108610 }, { "epoch": 96.20460584588132, "grad_norm": 0.2377777397632599, "learning_rate": 1e-05, "loss": 0.9361, "step": 108615 }, { "epoch": 96.20903454384411, "grad_norm": 0.25668391585350037, "learning_rate": 1e-05, "loss": 0.9664, "step": 108620 }, { "epoch": 96.2134632418069, "grad_norm": 0.2132730931043625, "learning_rate": 1e-05, "loss": 0.939, "step": 108625 }, { "epoch": 96.21789193976971, "grad_norm": 0.22132623195648193, "learning_rate": 1e-05, "loss": 0.933, "step": 108630 }, { "epoch": 96.22232063773251, "grad_norm": 0.24276478588581085, "learning_rate": 1e-05, "loss": 0.9574, "step": 108635 }, { "epoch": 96.2267493356953, "grad_norm": 0.21171914041042328, "learning_rate": 1e-05, "loss": 0.949, "step": 108640 }, { "epoch": 96.23117803365811, "grad_norm": 0.22050949931144714, "learning_rate": 1e-05, "loss": 0.9307, "step": 108645 }, { "epoch": 96.2356067316209, "grad_norm": 0.27296534180641174, "learning_rate": 1e-05, "loss": 0.9602, "step": 108650 }, { "epoch": 96.2400354295837, "grad_norm": 0.22463339567184448, "learning_rate": 1e-05, "loss": 0.9883, "step": 108655 }, { "epoch": 96.24446412754651, "grad_norm": 0.25902196764945984, "learning_rate": 1e-05, "loss": 0.9347, "step": 108660 }, { "epoch": 96.2488928255093, "grad_norm": 0.26889732480049133, "learning_rate": 1e-05, "loss": 0.9234, "step": 108665 }, { "epoch": 96.2533215234721, "grad_norm": 0.24156594276428223, "learning_rate": 1e-05, "loss": 0.9146, "step": 108670 }, { "epoch": 96.2577502214349, "grad_norm": 0.2529046833515167, "learning_rate": 1e-05, "loss": 0.9293, "step": 108675 }, { "epoch": 96.2621789193977, "grad_norm": 0.2482517957687378, "learning_rate": 1e-05, "loss": 0.963, "step": 108680 }, { "epoch": 96.26660761736049, "grad_norm": 0.22952386736869812, "learning_rate": 1e-05, "loss": 0.9489, "step": 108685 }, { "epoch": 96.2710363153233, "grad_norm": 0.20501549541950226, "learning_rate": 1e-05, "loss": 0.9452, "step": 108690 }, { "epoch": 96.2754650132861, "grad_norm": 0.26863911747932434, "learning_rate": 1e-05, "loss": 0.9496, "step": 108695 }, { "epoch": 96.27989371124889, "grad_norm": 0.2132173627614975, "learning_rate": 1e-05, "loss": 0.9592, "step": 108700 }, { "epoch": 96.2843224092117, "grad_norm": 0.26510581374168396, "learning_rate": 1e-05, "loss": 1.0121, "step": 108705 }, { "epoch": 96.28875110717449, "grad_norm": 0.2395889163017273, "learning_rate": 1e-05, "loss": 0.9546, "step": 108710 }, { "epoch": 96.29317980513729, "grad_norm": 0.2426411509513855, "learning_rate": 1e-05, "loss": 0.9457, "step": 108715 }, { "epoch": 96.2976085031001, "grad_norm": 0.2448132038116455, "learning_rate": 1e-05, "loss": 1.0326, "step": 108720 }, { "epoch": 96.30203720106289, "grad_norm": 0.2125806361436844, "learning_rate": 1e-05, "loss": 0.9785, "step": 108725 }, { "epoch": 96.30646589902568, "grad_norm": 0.23368892073631287, "learning_rate": 1e-05, "loss": 1.0088, "step": 108730 }, { "epoch": 96.31089459698849, "grad_norm": 0.2467987835407257, "learning_rate": 1e-05, "loss": 0.9842, "step": 108735 }, { "epoch": 96.31532329495128, "grad_norm": 0.22907590866088867, "learning_rate": 1e-05, "loss": 0.9523, "step": 108740 }, { "epoch": 96.31975199291408, "grad_norm": 0.2262767106294632, "learning_rate": 1e-05, "loss": 0.9709, "step": 108745 }, { "epoch": 96.32418069087689, "grad_norm": 0.24563902616500854, "learning_rate": 1e-05, "loss": 0.9201, "step": 108750 }, { "epoch": 96.32860938883968, "grad_norm": 0.22593539953231812, "learning_rate": 1e-05, "loss": 0.9203, "step": 108755 }, { "epoch": 96.33303808680247, "grad_norm": 0.2171163111925125, "learning_rate": 1e-05, "loss": 0.9384, "step": 108760 }, { "epoch": 96.33746678476528, "grad_norm": 0.22219586372375488, "learning_rate": 1e-05, "loss": 0.9667, "step": 108765 }, { "epoch": 96.34189548272808, "grad_norm": 0.21997058391571045, "learning_rate": 1e-05, "loss": 1.0108, "step": 108770 }, { "epoch": 96.34632418069087, "grad_norm": 0.2191559076309204, "learning_rate": 1e-05, "loss": 0.995, "step": 108775 }, { "epoch": 96.35075287865368, "grad_norm": 0.3007301092147827, "learning_rate": 1e-05, "loss": 0.9502, "step": 108780 }, { "epoch": 96.35518157661647, "grad_norm": 0.24937856197357178, "learning_rate": 1e-05, "loss": 0.9524, "step": 108785 }, { "epoch": 96.35961027457927, "grad_norm": 0.21745456755161285, "learning_rate": 1e-05, "loss": 0.9276, "step": 108790 }, { "epoch": 96.36403897254208, "grad_norm": 0.21041302382946014, "learning_rate": 1e-05, "loss": 0.9736, "step": 108795 }, { "epoch": 96.36846767050487, "grad_norm": 0.2228080779314041, "learning_rate": 1e-05, "loss": 0.9358, "step": 108800 }, { "epoch": 96.37289636846766, "grad_norm": 0.27315106987953186, "learning_rate": 1e-05, "loss": 0.9854, "step": 108805 }, { "epoch": 96.37732506643047, "grad_norm": 0.26474303007125854, "learning_rate": 1e-05, "loss": 0.9692, "step": 108810 }, { "epoch": 96.38175376439327, "grad_norm": 0.264626681804657, "learning_rate": 1e-05, "loss": 0.9773, "step": 108815 }, { "epoch": 96.38618246235606, "grad_norm": 0.24691453576087952, "learning_rate": 1e-05, "loss": 0.9675, "step": 108820 }, { "epoch": 96.39061116031887, "grad_norm": 0.23327571153640747, "learning_rate": 1e-05, "loss": 0.9771, "step": 108825 }, { "epoch": 96.39503985828166, "grad_norm": 0.2663676142692566, "learning_rate": 1e-05, "loss": 0.9942, "step": 108830 }, { "epoch": 96.39946855624446, "grad_norm": 0.2329336255788803, "learning_rate": 1e-05, "loss": 0.9587, "step": 108835 }, { "epoch": 96.40389725420727, "grad_norm": 0.23028676211833954, "learning_rate": 1e-05, "loss": 0.9276, "step": 108840 }, { "epoch": 96.40832595217006, "grad_norm": 0.23600134253501892, "learning_rate": 1e-05, "loss": 0.9664, "step": 108845 }, { "epoch": 96.41275465013285, "grad_norm": 0.27145883440971375, "learning_rate": 1e-05, "loss": 0.9625, "step": 108850 }, { "epoch": 96.41718334809566, "grad_norm": 0.25177595019340515, "learning_rate": 1e-05, "loss": 0.9772, "step": 108855 }, { "epoch": 96.42161204605846, "grad_norm": 0.2219705581665039, "learning_rate": 1e-05, "loss": 0.9831, "step": 108860 }, { "epoch": 96.42604074402126, "grad_norm": 0.24424809217453003, "learning_rate": 1e-05, "loss": 0.9252, "step": 108865 }, { "epoch": 96.43046944198406, "grad_norm": 0.26454147696495056, "learning_rate": 1e-05, "loss": 0.987, "step": 108870 }, { "epoch": 96.43489813994685, "grad_norm": 0.21250733733177185, "learning_rate": 1e-05, "loss": 0.9567, "step": 108875 }, { "epoch": 96.43932683790966, "grad_norm": 0.221794992685318, "learning_rate": 1e-05, "loss": 0.96, "step": 108880 }, { "epoch": 96.44375553587246, "grad_norm": 0.2635557949542999, "learning_rate": 1e-05, "loss": 0.9953, "step": 108885 }, { "epoch": 96.44818423383525, "grad_norm": 0.38913199305534363, "learning_rate": 1e-05, "loss": 0.879, "step": 108890 }, { "epoch": 96.45261293179806, "grad_norm": 0.2503507435321808, "learning_rate": 1e-05, "loss": 0.9595, "step": 108895 }, { "epoch": 96.45704162976085, "grad_norm": 0.2502439022064209, "learning_rate": 1e-05, "loss": 0.9415, "step": 108900 }, { "epoch": 96.46147032772365, "grad_norm": 0.22168058156967163, "learning_rate": 1e-05, "loss": 1.0167, "step": 108905 }, { "epoch": 96.46589902568645, "grad_norm": 0.22931894659996033, "learning_rate": 1e-05, "loss": 0.9628, "step": 108910 }, { "epoch": 96.47032772364925, "grad_norm": 0.26465457677841187, "learning_rate": 1e-05, "loss": 0.9401, "step": 108915 }, { "epoch": 96.47475642161204, "grad_norm": 0.22159187495708466, "learning_rate": 1e-05, "loss": 0.9812, "step": 108920 }, { "epoch": 96.47918511957485, "grad_norm": 0.22609472274780273, "learning_rate": 1e-05, "loss": 0.9961, "step": 108925 }, { "epoch": 96.48361381753764, "grad_norm": 0.23655027151107788, "learning_rate": 1e-05, "loss": 0.9686, "step": 108930 }, { "epoch": 96.48804251550044, "grad_norm": 0.2643718421459198, "learning_rate": 1e-05, "loss": 0.9838, "step": 108935 }, { "epoch": 96.49247121346325, "grad_norm": 0.2730221748352051, "learning_rate": 1e-05, "loss": 0.952, "step": 108940 }, { "epoch": 96.49689991142604, "grad_norm": 0.26589077711105347, "learning_rate": 1e-05, "loss": 0.9957, "step": 108945 }, { "epoch": 96.50132860938884, "grad_norm": 0.22781260311603546, "learning_rate": 1e-05, "loss": 0.9272, "step": 108950 }, { "epoch": 96.50575730735164, "grad_norm": 0.25082796812057495, "learning_rate": 1e-05, "loss": 0.9537, "step": 108955 }, { "epoch": 96.51018600531444, "grad_norm": 0.22763484716415405, "learning_rate": 1e-05, "loss": 0.9219, "step": 108960 }, { "epoch": 96.51461470327723, "grad_norm": 0.2302742600440979, "learning_rate": 1e-05, "loss": 0.9141, "step": 108965 }, { "epoch": 96.51904340124004, "grad_norm": 0.23829132318496704, "learning_rate": 1e-05, "loss": 0.9347, "step": 108970 }, { "epoch": 96.52347209920283, "grad_norm": 0.23597604036331177, "learning_rate": 1e-05, "loss": 0.931, "step": 108975 }, { "epoch": 96.52790079716563, "grad_norm": 0.3194771409034729, "learning_rate": 1e-05, "loss": 1.0027, "step": 108980 }, { "epoch": 96.53232949512844, "grad_norm": 0.21260182559490204, "learning_rate": 1e-05, "loss": 0.9226, "step": 108985 }, { "epoch": 96.53675819309123, "grad_norm": 0.28577062487602234, "learning_rate": 1e-05, "loss": 0.9497, "step": 108990 }, { "epoch": 96.54118689105402, "grad_norm": 0.26052534580230713, "learning_rate": 1e-05, "loss": 0.9401, "step": 108995 }, { "epoch": 96.54561558901683, "grad_norm": 0.2302294224500656, "learning_rate": 1e-05, "loss": 0.9305, "step": 109000 }, { "epoch": 96.55004428697963, "grad_norm": 0.21933409571647644, "learning_rate": 1e-05, "loss": 0.8796, "step": 109005 }, { "epoch": 96.55447298494242, "grad_norm": 0.26347604393959045, "learning_rate": 1e-05, "loss": 0.9548, "step": 109010 }, { "epoch": 96.55890168290523, "grad_norm": 0.2808287739753723, "learning_rate": 1e-05, "loss": 0.9496, "step": 109015 }, { "epoch": 96.56333038086802, "grad_norm": 0.22767126560211182, "learning_rate": 1e-05, "loss": 1.0236, "step": 109020 }, { "epoch": 96.56775907883082, "grad_norm": 0.2757602632045746, "learning_rate": 1e-05, "loss": 1.036, "step": 109025 }, { "epoch": 96.57218777679363, "grad_norm": 0.21776150166988373, "learning_rate": 1e-05, "loss": 0.9323, "step": 109030 }, { "epoch": 96.57661647475642, "grad_norm": 0.22101013362407684, "learning_rate": 1e-05, "loss": 0.9857, "step": 109035 }, { "epoch": 96.58104517271921, "grad_norm": 0.2152671217918396, "learning_rate": 1e-05, "loss": 0.9451, "step": 109040 }, { "epoch": 96.58547387068202, "grad_norm": 0.22448362410068512, "learning_rate": 1e-05, "loss": 0.9479, "step": 109045 }, { "epoch": 96.58990256864482, "grad_norm": 0.21453110873699188, "learning_rate": 1e-05, "loss": 0.9205, "step": 109050 }, { "epoch": 96.59433126660761, "grad_norm": 0.24533586204051971, "learning_rate": 1e-05, "loss": 0.9349, "step": 109055 }, { "epoch": 96.59875996457042, "grad_norm": 0.23309175670146942, "learning_rate": 1e-05, "loss": 0.9885, "step": 109060 }, { "epoch": 96.60318866253321, "grad_norm": 0.22769691050052643, "learning_rate": 1e-05, "loss": 0.988, "step": 109065 }, { "epoch": 96.60761736049601, "grad_norm": 0.21319161355495453, "learning_rate": 1e-05, "loss": 0.9642, "step": 109070 }, { "epoch": 96.61204605845882, "grad_norm": 0.2137531042098999, "learning_rate": 1e-05, "loss": 0.9373, "step": 109075 }, { "epoch": 96.61647475642161, "grad_norm": 0.2274094820022583, "learning_rate": 1e-05, "loss": 0.9705, "step": 109080 }, { "epoch": 96.6209034543844, "grad_norm": 0.2536349296569824, "learning_rate": 1e-05, "loss": 0.9409, "step": 109085 }, { "epoch": 96.62533215234721, "grad_norm": 0.25103116035461426, "learning_rate": 1e-05, "loss": 0.9781, "step": 109090 }, { "epoch": 96.62976085031, "grad_norm": 0.22480247914791107, "learning_rate": 1e-05, "loss": 0.9553, "step": 109095 }, { "epoch": 96.63418954827281, "grad_norm": 0.22571085393428802, "learning_rate": 1e-05, "loss": 0.926, "step": 109100 }, { "epoch": 96.63861824623561, "grad_norm": 0.23392663896083832, "learning_rate": 1e-05, "loss": 1.0043, "step": 109105 }, { "epoch": 96.6430469441984, "grad_norm": 0.21833255887031555, "learning_rate": 1e-05, "loss": 0.9573, "step": 109110 }, { "epoch": 96.64747564216121, "grad_norm": 0.2714911103248596, "learning_rate": 1e-05, "loss": 0.8766, "step": 109115 }, { "epoch": 96.651904340124, "grad_norm": 0.24225836992263794, "learning_rate": 1e-05, "loss": 0.9294, "step": 109120 }, { "epoch": 96.6563330380868, "grad_norm": 0.2602677047252655, "learning_rate": 1e-05, "loss": 0.9301, "step": 109125 }, { "epoch": 96.66076173604961, "grad_norm": 0.2385934293270111, "learning_rate": 1e-05, "loss": 0.9763, "step": 109130 }, { "epoch": 96.6651904340124, "grad_norm": 0.23226875066757202, "learning_rate": 1e-05, "loss": 0.9353, "step": 109135 }, { "epoch": 96.6696191319752, "grad_norm": 0.2980876564979553, "learning_rate": 1e-05, "loss": 0.9957, "step": 109140 }, { "epoch": 96.674047829938, "grad_norm": 0.23006302118301392, "learning_rate": 1e-05, "loss": 0.9872, "step": 109145 }, { "epoch": 96.6784765279008, "grad_norm": 0.2630012333393097, "learning_rate": 1e-05, "loss": 0.9436, "step": 109150 }, { "epoch": 96.68290522586359, "grad_norm": 0.22954444587230682, "learning_rate": 1e-05, "loss": 0.9099, "step": 109155 }, { "epoch": 96.6873339238264, "grad_norm": 0.22115427255630493, "learning_rate": 1e-05, "loss": 0.9148, "step": 109160 }, { "epoch": 96.6917626217892, "grad_norm": 0.22107937932014465, "learning_rate": 1e-05, "loss": 0.9426, "step": 109165 }, { "epoch": 96.69619131975199, "grad_norm": 0.2343861609697342, "learning_rate": 1e-05, "loss": 0.8955, "step": 109170 }, { "epoch": 96.7006200177148, "grad_norm": 0.24237355589866638, "learning_rate": 1e-05, "loss": 0.9139, "step": 109175 }, { "epoch": 96.70504871567759, "grad_norm": 0.2518700957298279, "learning_rate": 1e-05, "loss": 0.9076, "step": 109180 }, { "epoch": 96.70947741364039, "grad_norm": 0.24206562340259552, "learning_rate": 1e-05, "loss": 0.976, "step": 109185 }, { "epoch": 96.7139061116032, "grad_norm": 0.233285591006279, "learning_rate": 1e-05, "loss": 1.0157, "step": 109190 }, { "epoch": 96.71833480956599, "grad_norm": 0.22143955528736115, "learning_rate": 1e-05, "loss": 0.9229, "step": 109195 }, { "epoch": 96.72276350752878, "grad_norm": 0.22123315930366516, "learning_rate": 1e-05, "loss": 0.9175, "step": 109200 }, { "epoch": 96.72719220549159, "grad_norm": 0.2531883716583252, "learning_rate": 1e-05, "loss": 0.9481, "step": 109205 }, { "epoch": 96.73162090345438, "grad_norm": 0.23167569935321808, "learning_rate": 1e-05, "loss": 0.9388, "step": 109210 }, { "epoch": 96.73604960141718, "grad_norm": 0.19870179891586304, "learning_rate": 1e-05, "loss": 0.9746, "step": 109215 }, { "epoch": 96.74047829937999, "grad_norm": 0.2643187642097473, "learning_rate": 1e-05, "loss": 0.9983, "step": 109220 }, { "epoch": 96.74490699734278, "grad_norm": 0.24785612523555756, "learning_rate": 1e-05, "loss": 0.9291, "step": 109225 }, { "epoch": 96.74933569530558, "grad_norm": 0.22520042955875397, "learning_rate": 1e-05, "loss": 0.9955, "step": 109230 }, { "epoch": 96.75376439326838, "grad_norm": 0.22258657217025757, "learning_rate": 1e-05, "loss": 0.9724, "step": 109235 }, { "epoch": 96.75819309123118, "grad_norm": 0.22151513397693634, "learning_rate": 1e-05, "loss": 0.9701, "step": 109240 }, { "epoch": 96.76262178919397, "grad_norm": 0.24933257699012756, "learning_rate": 1e-05, "loss": 0.9597, "step": 109245 }, { "epoch": 96.76705048715678, "grad_norm": 0.23289336264133453, "learning_rate": 1e-05, "loss": 1.0009, "step": 109250 }, { "epoch": 96.77147918511957, "grad_norm": 0.2526431083679199, "learning_rate": 1e-05, "loss": 0.9492, "step": 109255 }, { "epoch": 96.77590788308237, "grad_norm": 0.23179958760738373, "learning_rate": 1e-05, "loss": 0.9771, "step": 109260 }, { "epoch": 96.78033658104518, "grad_norm": 0.2722504436969757, "learning_rate": 1e-05, "loss": 0.9425, "step": 109265 }, { "epoch": 96.78476527900797, "grad_norm": 0.20657408237457275, "learning_rate": 1e-05, "loss": 0.8949, "step": 109270 }, { "epoch": 96.78919397697076, "grad_norm": 0.23214073479175568, "learning_rate": 1e-05, "loss": 0.9263, "step": 109275 }, { "epoch": 96.79362267493357, "grad_norm": 0.2209925353527069, "learning_rate": 1e-05, "loss": 0.9517, "step": 109280 }, { "epoch": 96.79805137289637, "grad_norm": 0.28586846590042114, "learning_rate": 1e-05, "loss": 0.9534, "step": 109285 }, { "epoch": 96.80248007085916, "grad_norm": 0.2213120013475418, "learning_rate": 1e-05, "loss": 1.018, "step": 109290 }, { "epoch": 96.80690876882197, "grad_norm": 0.22843529284000397, "learning_rate": 1e-05, "loss": 0.986, "step": 109295 }, { "epoch": 96.81133746678476, "grad_norm": 0.22046613693237305, "learning_rate": 1e-05, "loss": 0.9438, "step": 109300 }, { "epoch": 96.81576616474756, "grad_norm": 0.23868194222450256, "learning_rate": 1e-05, "loss": 0.9902, "step": 109305 }, { "epoch": 96.82019486271037, "grad_norm": 0.22923843562602997, "learning_rate": 1e-05, "loss": 1.0222, "step": 109310 }, { "epoch": 96.82462356067316, "grad_norm": 0.23581412434577942, "learning_rate": 1e-05, "loss": 0.9077, "step": 109315 }, { "epoch": 96.82905225863595, "grad_norm": 0.2506400942802429, "learning_rate": 1e-05, "loss": 0.9708, "step": 109320 }, { "epoch": 96.83348095659876, "grad_norm": 0.22032801806926727, "learning_rate": 1e-05, "loss": 0.9326, "step": 109325 }, { "epoch": 96.83790965456156, "grad_norm": 0.25650304555892944, "learning_rate": 1e-05, "loss": 0.9655, "step": 109330 }, { "epoch": 96.84233835252435, "grad_norm": 0.2222817987203598, "learning_rate": 1e-05, "loss": 1.0027, "step": 109335 }, { "epoch": 96.84676705048716, "grad_norm": 0.20385071635246277, "learning_rate": 1e-05, "loss": 0.9578, "step": 109340 }, { "epoch": 96.85119574844995, "grad_norm": 0.2320050150156021, "learning_rate": 1e-05, "loss": 0.9614, "step": 109345 }, { "epoch": 96.85562444641276, "grad_norm": 0.24511975049972534, "learning_rate": 1e-05, "loss": 0.9126, "step": 109350 }, { "epoch": 96.86005314437556, "grad_norm": 0.22876961529254913, "learning_rate": 1e-05, "loss": 0.9137, "step": 109355 }, { "epoch": 96.86448184233835, "grad_norm": 0.22456419467926025, "learning_rate": 1e-05, "loss": 0.9539, "step": 109360 }, { "epoch": 96.86891054030116, "grad_norm": 0.22563810646533966, "learning_rate": 1e-05, "loss": 0.9646, "step": 109365 }, { "epoch": 96.87333923826395, "grad_norm": 0.21834054589271545, "learning_rate": 1e-05, "loss": 0.9681, "step": 109370 }, { "epoch": 96.87776793622675, "grad_norm": 0.24215298891067505, "learning_rate": 1e-05, "loss": 0.9844, "step": 109375 }, { "epoch": 96.88219663418955, "grad_norm": 0.20790977776050568, "learning_rate": 1e-05, "loss": 0.9334, "step": 109380 }, { "epoch": 96.88662533215235, "grad_norm": 0.22327399253845215, "learning_rate": 1e-05, "loss": 1.0133, "step": 109385 }, { "epoch": 96.89105403011514, "grad_norm": 0.23436813056468964, "learning_rate": 1e-05, "loss": 0.9689, "step": 109390 }, { "epoch": 96.89548272807795, "grad_norm": 0.23443818092346191, "learning_rate": 1e-05, "loss": 0.9248, "step": 109395 }, { "epoch": 96.89991142604075, "grad_norm": 0.22904512286186218, "learning_rate": 1e-05, "loss": 0.9247, "step": 109400 }, { "epoch": 96.90434012400354, "grad_norm": 0.21700811386108398, "learning_rate": 1e-05, "loss": 0.947, "step": 109405 }, { "epoch": 96.90876882196635, "grad_norm": 0.2270621657371521, "learning_rate": 1e-05, "loss": 0.9619, "step": 109410 }, { "epoch": 96.91319751992914, "grad_norm": 0.21223436295986176, "learning_rate": 1e-05, "loss": 0.9755, "step": 109415 }, { "epoch": 96.91762621789194, "grad_norm": 0.250649094581604, "learning_rate": 1e-05, "loss": 0.9886, "step": 109420 }, { "epoch": 96.92205491585474, "grad_norm": 0.2699147164821625, "learning_rate": 1e-05, "loss": 0.9001, "step": 109425 }, { "epoch": 96.92648361381754, "grad_norm": 0.23613402247428894, "learning_rate": 1e-05, "loss": 0.9502, "step": 109430 }, { "epoch": 96.93091231178033, "grad_norm": 0.23313196003437042, "learning_rate": 1e-05, "loss": 0.9387, "step": 109435 }, { "epoch": 96.93534100974314, "grad_norm": 0.2752964496612549, "learning_rate": 1e-05, "loss": 0.9776, "step": 109440 }, { "epoch": 96.93976970770593, "grad_norm": 0.2277020961046219, "learning_rate": 1e-05, "loss": 0.9889, "step": 109445 }, { "epoch": 96.94419840566873, "grad_norm": 0.20685577392578125, "learning_rate": 1e-05, "loss": 0.9331, "step": 109450 }, { "epoch": 96.94862710363154, "grad_norm": 0.24853739142417908, "learning_rate": 1e-05, "loss": 0.9893, "step": 109455 }, { "epoch": 96.95305580159433, "grad_norm": 0.2623380720615387, "learning_rate": 1e-05, "loss": 0.9246, "step": 109460 }, { "epoch": 96.95748449955713, "grad_norm": 0.2784663736820221, "learning_rate": 1e-05, "loss": 0.9626, "step": 109465 }, { "epoch": 96.96191319751993, "grad_norm": 0.23758426308631897, "learning_rate": 1e-05, "loss": 0.9433, "step": 109470 }, { "epoch": 96.96634189548273, "grad_norm": 0.21574918925762177, "learning_rate": 1e-05, "loss": 0.9592, "step": 109475 }, { "epoch": 96.97077059344552, "grad_norm": 0.2676204442977905, "learning_rate": 1e-05, "loss": 0.9728, "step": 109480 }, { "epoch": 96.97519929140833, "grad_norm": 0.28029686212539673, "learning_rate": 1e-05, "loss": 0.9219, "step": 109485 }, { "epoch": 96.97962798937112, "grad_norm": 0.2208881825208664, "learning_rate": 1e-05, "loss": 0.9678, "step": 109490 }, { "epoch": 96.98405668733392, "grad_norm": 0.2544923722743988, "learning_rate": 1e-05, "loss": 0.929, "step": 109495 }, { "epoch": 96.98848538529673, "grad_norm": 0.25113755464553833, "learning_rate": 1e-05, "loss": 1.0231, "step": 109500 }, { "epoch": 96.99291408325952, "grad_norm": 0.20882739126682281, "learning_rate": 1e-05, "loss": 0.9723, "step": 109505 }, { "epoch": 96.99734278122232, "grad_norm": 0.2533929646015167, "learning_rate": 1e-05, "loss": 0.9264, "step": 109510 }, { "epoch": 97.00177147918512, "grad_norm": 0.21742448210716248, "learning_rate": 1e-05, "loss": 0.9285, "step": 109515 }, { "epoch": 97.00620017714792, "grad_norm": 0.224729984998703, "learning_rate": 1e-05, "loss": 0.9773, "step": 109520 }, { "epoch": 97.01062887511071, "grad_norm": 0.25263383984565735, "learning_rate": 1e-05, "loss": 0.978, "step": 109525 }, { "epoch": 97.01505757307352, "grad_norm": 0.26445722579956055, "learning_rate": 1e-05, "loss": 0.9437, "step": 109530 }, { "epoch": 97.01948627103631, "grad_norm": 0.2620391249656677, "learning_rate": 1e-05, "loss": 0.9547, "step": 109535 }, { "epoch": 97.02391496899911, "grad_norm": 0.27834469079971313, "learning_rate": 1e-05, "loss": 0.9547, "step": 109540 }, { "epoch": 97.02834366696192, "grad_norm": 0.2653108239173889, "learning_rate": 1e-05, "loss": 0.9507, "step": 109545 }, { "epoch": 97.03277236492471, "grad_norm": 0.2766575813293457, "learning_rate": 1e-05, "loss": 0.9464, "step": 109550 }, { "epoch": 97.0372010628875, "grad_norm": 0.24768273532390594, "learning_rate": 1e-05, "loss": 0.979, "step": 109555 }, { "epoch": 97.04162976085031, "grad_norm": 0.2635209262371063, "learning_rate": 1e-05, "loss": 0.9404, "step": 109560 }, { "epoch": 97.0460584588131, "grad_norm": 0.250985711812973, "learning_rate": 1e-05, "loss": 0.9704, "step": 109565 }, { "epoch": 97.0504871567759, "grad_norm": 0.24129335582256317, "learning_rate": 1e-05, "loss": 0.9374, "step": 109570 }, { "epoch": 97.05491585473871, "grad_norm": 0.26707589626312256, "learning_rate": 1e-05, "loss": 0.9473, "step": 109575 }, { "epoch": 97.0593445527015, "grad_norm": 0.22685092687606812, "learning_rate": 1e-05, "loss": 0.9433, "step": 109580 }, { "epoch": 97.0637732506643, "grad_norm": 0.23330271244049072, "learning_rate": 1e-05, "loss": 1.0067, "step": 109585 }, { "epoch": 97.0682019486271, "grad_norm": 0.23260751366615295, "learning_rate": 1e-05, "loss": 0.9395, "step": 109590 }, { "epoch": 97.0726306465899, "grad_norm": 0.2356194704771042, "learning_rate": 1e-05, "loss": 0.9622, "step": 109595 }, { "epoch": 97.07705934455271, "grad_norm": 0.2609013319015503, "learning_rate": 1e-05, "loss": 0.9387, "step": 109600 }, { "epoch": 97.0814880425155, "grad_norm": 0.2565682530403137, "learning_rate": 1e-05, "loss": 1.0143, "step": 109605 }, { "epoch": 97.0859167404783, "grad_norm": 0.25871917605400085, "learning_rate": 1e-05, "loss": 0.9823, "step": 109610 }, { "epoch": 97.0903454384411, "grad_norm": 0.27744966745376587, "learning_rate": 1e-05, "loss": 1.0201, "step": 109615 }, { "epoch": 97.0947741364039, "grad_norm": 0.25573158264160156, "learning_rate": 1e-05, "loss": 1.0024, "step": 109620 }, { "epoch": 97.0992028343667, "grad_norm": 0.2509702444076538, "learning_rate": 1e-05, "loss": 0.9422, "step": 109625 }, { "epoch": 97.1036315323295, "grad_norm": 0.23970910906791687, "learning_rate": 1e-05, "loss": 0.9485, "step": 109630 }, { "epoch": 97.1080602302923, "grad_norm": 0.2555277645587921, "learning_rate": 1e-05, "loss": 0.9764, "step": 109635 }, { "epoch": 97.11248892825509, "grad_norm": 0.2591082453727722, "learning_rate": 1e-05, "loss": 0.9705, "step": 109640 }, { "epoch": 97.1169176262179, "grad_norm": 0.2916388213634491, "learning_rate": 1e-05, "loss": 0.9997, "step": 109645 }, { "epoch": 97.12134632418069, "grad_norm": 0.2743247151374817, "learning_rate": 1e-05, "loss": 0.9687, "step": 109650 }, { "epoch": 97.12577502214349, "grad_norm": 0.22339360415935516, "learning_rate": 1e-05, "loss": 0.9771, "step": 109655 }, { "epoch": 97.1302037201063, "grad_norm": 0.23839640617370605, "learning_rate": 1e-05, "loss": 0.9101, "step": 109660 }, { "epoch": 97.13463241806909, "grad_norm": 0.2508106827735901, "learning_rate": 1e-05, "loss": 0.9462, "step": 109665 }, { "epoch": 97.13906111603188, "grad_norm": 0.22281931340694427, "learning_rate": 1e-05, "loss": 0.94, "step": 109670 }, { "epoch": 97.14348981399469, "grad_norm": 0.2207503765821457, "learning_rate": 1e-05, "loss": 0.9695, "step": 109675 }, { "epoch": 97.14791851195749, "grad_norm": 0.20828008651733398, "learning_rate": 1e-05, "loss": 0.9693, "step": 109680 }, { "epoch": 97.15234720992028, "grad_norm": 0.20707137882709503, "learning_rate": 1e-05, "loss": 0.9343, "step": 109685 }, { "epoch": 97.15677590788309, "grad_norm": 0.23699553310871124, "learning_rate": 1e-05, "loss": 0.9495, "step": 109690 }, { "epoch": 97.16120460584588, "grad_norm": 0.22275662422180176, "learning_rate": 1e-05, "loss": 0.9442, "step": 109695 }, { "epoch": 97.16563330380868, "grad_norm": 0.2669040262699127, "learning_rate": 1e-05, "loss": 0.9584, "step": 109700 }, { "epoch": 97.17006200177148, "grad_norm": 0.23328854143619537, "learning_rate": 1e-05, "loss": 0.9362, "step": 109705 }, { "epoch": 97.17449069973428, "grad_norm": 0.26291340589523315, "learning_rate": 1e-05, "loss": 0.9331, "step": 109710 }, { "epoch": 97.17891939769707, "grad_norm": 0.24099422991275787, "learning_rate": 1e-05, "loss": 0.9189, "step": 109715 }, { "epoch": 97.18334809565988, "grad_norm": 0.21687135100364685, "learning_rate": 1e-05, "loss": 0.9761, "step": 109720 }, { "epoch": 97.18777679362267, "grad_norm": 0.24583666026592255, "learning_rate": 1e-05, "loss": 0.9293, "step": 109725 }, { "epoch": 97.19220549158547, "grad_norm": 0.2456425279378891, "learning_rate": 1e-05, "loss": 0.9218, "step": 109730 }, { "epoch": 97.19663418954828, "grad_norm": 0.2676035165786743, "learning_rate": 1e-05, "loss": 0.997, "step": 109735 }, { "epoch": 97.20106288751107, "grad_norm": 0.22334642708301544, "learning_rate": 1e-05, "loss": 0.9611, "step": 109740 }, { "epoch": 97.20549158547387, "grad_norm": 0.21113184094429016, "learning_rate": 1e-05, "loss": 0.8868, "step": 109745 }, { "epoch": 97.20992028343667, "grad_norm": 0.21757923066616058, "learning_rate": 1e-05, "loss": 0.9253, "step": 109750 }, { "epoch": 97.21434898139947, "grad_norm": 0.24147436022758484, "learning_rate": 1e-05, "loss": 0.9246, "step": 109755 }, { "epoch": 97.21877767936226, "grad_norm": 0.21963462233543396, "learning_rate": 1e-05, "loss": 0.9108, "step": 109760 }, { "epoch": 97.22320637732507, "grad_norm": 0.22466474771499634, "learning_rate": 1e-05, "loss": 0.9356, "step": 109765 }, { "epoch": 97.22763507528786, "grad_norm": 0.24098114669322968, "learning_rate": 1e-05, "loss": 0.9888, "step": 109770 }, { "epoch": 97.23206377325066, "grad_norm": 0.24827177822589874, "learning_rate": 1e-05, "loss": 0.9387, "step": 109775 }, { "epoch": 97.23649247121347, "grad_norm": 0.2679842710494995, "learning_rate": 1e-05, "loss": 0.9609, "step": 109780 }, { "epoch": 97.24092116917626, "grad_norm": 0.2706936001777649, "learning_rate": 1e-05, "loss": 0.9174, "step": 109785 }, { "epoch": 97.24534986713905, "grad_norm": 0.21122415363788605, "learning_rate": 1e-05, "loss": 0.9731, "step": 109790 }, { "epoch": 97.24977856510186, "grad_norm": 0.22289395332336426, "learning_rate": 1e-05, "loss": 0.9281, "step": 109795 }, { "epoch": 97.25420726306466, "grad_norm": 0.22101697325706482, "learning_rate": 1e-05, "loss": 0.9499, "step": 109800 }, { "epoch": 97.25863596102745, "grad_norm": 0.19991667568683624, "learning_rate": 1e-05, "loss": 0.9867, "step": 109805 }, { "epoch": 97.26306465899026, "grad_norm": 0.24198384582996368, "learning_rate": 1e-05, "loss": 0.9259, "step": 109810 }, { "epoch": 97.26749335695305, "grad_norm": 0.2563733756542206, "learning_rate": 1e-05, "loss": 0.9358, "step": 109815 }, { "epoch": 97.27192205491585, "grad_norm": 0.21383938193321228, "learning_rate": 1e-05, "loss": 0.9411, "step": 109820 }, { "epoch": 97.27635075287866, "grad_norm": 0.2876136898994446, "learning_rate": 1e-05, "loss": 0.963, "step": 109825 }, { "epoch": 97.28077945084145, "grad_norm": 0.2394861876964569, "learning_rate": 1e-05, "loss": 0.9827, "step": 109830 }, { "epoch": 97.28520814880426, "grad_norm": 0.2696128785610199, "learning_rate": 1e-05, "loss": 0.9943, "step": 109835 }, { "epoch": 97.28963684676705, "grad_norm": 0.22445590794086456, "learning_rate": 1e-05, "loss": 0.9844, "step": 109840 }, { "epoch": 97.29406554472985, "grad_norm": 0.25492721796035767, "learning_rate": 1e-05, "loss": 0.9407, "step": 109845 }, { "epoch": 97.29849424269266, "grad_norm": 0.24470648169517517, "learning_rate": 1e-05, "loss": 0.9858, "step": 109850 }, { "epoch": 97.30292294065545, "grad_norm": 0.23633340001106262, "learning_rate": 1e-05, "loss": 1.0098, "step": 109855 }, { "epoch": 97.30735163861824, "grad_norm": 0.23683921992778778, "learning_rate": 1e-05, "loss": 0.9844, "step": 109860 }, { "epoch": 97.31178033658105, "grad_norm": 0.24632786214351654, "learning_rate": 1e-05, "loss": 1.0028, "step": 109865 }, { "epoch": 97.31620903454385, "grad_norm": 0.23267455399036407, "learning_rate": 1e-05, "loss": 0.953, "step": 109870 }, { "epoch": 97.32063773250664, "grad_norm": 0.3037605583667755, "learning_rate": 1e-05, "loss": 0.9503, "step": 109875 }, { "epoch": 97.32506643046945, "grad_norm": 0.23018823564052582, "learning_rate": 1e-05, "loss": 1.0022, "step": 109880 }, { "epoch": 97.32949512843224, "grad_norm": 0.2253151684999466, "learning_rate": 1e-05, "loss": 0.9552, "step": 109885 }, { "epoch": 97.33392382639504, "grad_norm": 0.25150591135025024, "learning_rate": 1e-05, "loss": 0.9361, "step": 109890 }, { "epoch": 97.33835252435784, "grad_norm": 0.20476333796977997, "learning_rate": 1e-05, "loss": 0.9498, "step": 109895 }, { "epoch": 97.34278122232064, "grad_norm": 0.2290610820055008, "learning_rate": 1e-05, "loss": 0.9683, "step": 109900 }, { "epoch": 97.34720992028343, "grad_norm": 0.21188905835151672, "learning_rate": 1e-05, "loss": 0.9592, "step": 109905 }, { "epoch": 97.35163861824624, "grad_norm": 0.2743215560913086, "learning_rate": 1e-05, "loss": 0.9495, "step": 109910 }, { "epoch": 97.35606731620904, "grad_norm": 0.2437146157026291, "learning_rate": 1e-05, "loss": 0.9643, "step": 109915 }, { "epoch": 97.36049601417183, "grad_norm": 0.21139484643936157, "learning_rate": 1e-05, "loss": 0.9296, "step": 109920 }, { "epoch": 97.36492471213464, "grad_norm": 0.2499586045742035, "learning_rate": 1e-05, "loss": 0.9377, "step": 109925 }, { "epoch": 97.36935341009743, "grad_norm": 0.25432199239730835, "learning_rate": 1e-05, "loss": 0.9822, "step": 109930 }, { "epoch": 97.37378210806023, "grad_norm": 0.21627236902713776, "learning_rate": 1e-05, "loss": 0.925, "step": 109935 }, { "epoch": 97.37821080602303, "grad_norm": 0.22862394154071808, "learning_rate": 1e-05, "loss": 0.9209, "step": 109940 }, { "epoch": 97.38263950398583, "grad_norm": 0.2629219591617584, "learning_rate": 1e-05, "loss": 0.8942, "step": 109945 }, { "epoch": 97.38706820194862, "grad_norm": 0.2441430687904358, "learning_rate": 1e-05, "loss": 0.9411, "step": 109950 }, { "epoch": 97.39149689991143, "grad_norm": 0.24243900179862976, "learning_rate": 1e-05, "loss": 0.8985, "step": 109955 }, { "epoch": 97.39592559787422, "grad_norm": 0.22874903678894043, "learning_rate": 1e-05, "loss": 0.9008, "step": 109960 }, { "epoch": 97.40035429583702, "grad_norm": 0.24870111048221588, "learning_rate": 1e-05, "loss": 0.9321, "step": 109965 }, { "epoch": 97.40478299379983, "grad_norm": 0.2535886764526367, "learning_rate": 1e-05, "loss": 1.0632, "step": 109970 }, { "epoch": 97.40921169176262, "grad_norm": 0.2632834017276764, "learning_rate": 1e-05, "loss": 0.9168, "step": 109975 }, { "epoch": 97.41364038972542, "grad_norm": 0.22288323938846588, "learning_rate": 1e-05, "loss": 0.9684, "step": 109980 }, { "epoch": 97.41806908768822, "grad_norm": 0.22108377516269684, "learning_rate": 1e-05, "loss": 0.9278, "step": 109985 }, { "epoch": 97.42249778565102, "grad_norm": 0.21149767935276031, "learning_rate": 1e-05, "loss": 0.9698, "step": 109990 }, { "epoch": 97.42692648361381, "grad_norm": 0.288916677236557, "learning_rate": 1e-05, "loss": 0.9739, "step": 109995 }, { "epoch": 97.43135518157662, "grad_norm": 0.2233758270740509, "learning_rate": 1e-05, "loss": 0.9993, "step": 110000 }, { "epoch": 97.43578387953941, "grad_norm": 0.2551714777946472, "learning_rate": 1e-05, "loss": 0.9335, "step": 110005 }, { "epoch": 97.44021257750221, "grad_norm": 0.25014278292655945, "learning_rate": 1e-05, "loss": 0.8779, "step": 110010 }, { "epoch": 97.44464127546502, "grad_norm": 0.23546725511550903, "learning_rate": 1e-05, "loss": 0.9358, "step": 110015 }, { "epoch": 97.44906997342781, "grad_norm": 0.231979101896286, "learning_rate": 1e-05, "loss": 1.073, "step": 110020 }, { "epoch": 97.4534986713906, "grad_norm": 0.2500385642051697, "learning_rate": 1e-05, "loss": 0.9047, "step": 110025 }, { "epoch": 97.45792736935341, "grad_norm": 0.24851708114147186, "learning_rate": 1e-05, "loss": 0.9898, "step": 110030 }, { "epoch": 97.46235606731621, "grad_norm": 0.22333583235740662, "learning_rate": 1e-05, "loss": 0.9605, "step": 110035 }, { "epoch": 97.466784765279, "grad_norm": 0.2142449915409088, "learning_rate": 1e-05, "loss": 0.9599, "step": 110040 }, { "epoch": 97.47121346324181, "grad_norm": 0.19926738739013672, "learning_rate": 1e-05, "loss": 0.8815, "step": 110045 }, { "epoch": 97.4756421612046, "grad_norm": 0.24198172986507416, "learning_rate": 1e-05, "loss": 1.0151, "step": 110050 }, { "epoch": 97.4800708591674, "grad_norm": 0.279943585395813, "learning_rate": 1e-05, "loss": 0.9332, "step": 110055 }, { "epoch": 97.4844995571302, "grad_norm": 0.23222343623638153, "learning_rate": 1e-05, "loss": 0.9579, "step": 110060 }, { "epoch": 97.488928255093, "grad_norm": 0.22931066155433655, "learning_rate": 1e-05, "loss": 0.9369, "step": 110065 }, { "epoch": 97.4933569530558, "grad_norm": 0.23459193110466003, "learning_rate": 1e-05, "loss": 0.9931, "step": 110070 }, { "epoch": 97.4977856510186, "grad_norm": 0.22001387178897858, "learning_rate": 1e-05, "loss": 0.888, "step": 110075 }, { "epoch": 97.5022143489814, "grad_norm": 0.2374526709318161, "learning_rate": 1e-05, "loss": 0.9773, "step": 110080 }, { "epoch": 97.5066430469442, "grad_norm": 0.25915834307670593, "learning_rate": 1e-05, "loss": 1.0072, "step": 110085 }, { "epoch": 97.511071744907, "grad_norm": 0.24076178669929504, "learning_rate": 1e-05, "loss": 0.9312, "step": 110090 }, { "epoch": 97.5155004428698, "grad_norm": 0.25095003843307495, "learning_rate": 1e-05, "loss": 0.9594, "step": 110095 }, { "epoch": 97.5199291408326, "grad_norm": 0.240245521068573, "learning_rate": 1e-05, "loss": 0.9448, "step": 110100 }, { "epoch": 97.5243578387954, "grad_norm": 0.25898298621177673, "learning_rate": 1e-05, "loss": 0.9449, "step": 110105 }, { "epoch": 97.52878653675819, "grad_norm": 0.22489018738269806, "learning_rate": 1e-05, "loss": 0.9858, "step": 110110 }, { "epoch": 97.533215234721, "grad_norm": 0.2503131330013275, "learning_rate": 1e-05, "loss": 0.9749, "step": 110115 }, { "epoch": 97.53764393268379, "grad_norm": 0.22982026636600494, "learning_rate": 1e-05, "loss": 0.9477, "step": 110120 }, { "epoch": 97.54207263064659, "grad_norm": 0.25346243381500244, "learning_rate": 1e-05, "loss": 0.9525, "step": 110125 }, { "epoch": 97.5465013286094, "grad_norm": 0.26209741830825806, "learning_rate": 1e-05, "loss": 0.9776, "step": 110130 }, { "epoch": 97.55093002657219, "grad_norm": 0.25933605432510376, "learning_rate": 1e-05, "loss": 0.9435, "step": 110135 }, { "epoch": 97.55535872453498, "grad_norm": 0.2322559803724289, "learning_rate": 1e-05, "loss": 0.9068, "step": 110140 }, { "epoch": 97.55978742249779, "grad_norm": 0.26609495282173157, "learning_rate": 1e-05, "loss": 0.9552, "step": 110145 }, { "epoch": 97.56421612046059, "grad_norm": 0.272363543510437, "learning_rate": 1e-05, "loss": 0.9559, "step": 110150 }, { "epoch": 97.56864481842338, "grad_norm": 0.22329014539718628, "learning_rate": 1e-05, "loss": 1.0063, "step": 110155 }, { "epoch": 97.57307351638619, "grad_norm": 0.2261374443769455, "learning_rate": 1e-05, "loss": 0.9881, "step": 110160 }, { "epoch": 97.57750221434898, "grad_norm": 0.3081692159175873, "learning_rate": 1e-05, "loss": 0.9677, "step": 110165 }, { "epoch": 97.58193091231178, "grad_norm": 0.2734352946281433, "learning_rate": 1e-05, "loss": 0.9754, "step": 110170 }, { "epoch": 97.58635961027458, "grad_norm": 0.23925837874412537, "learning_rate": 1e-05, "loss": 0.9593, "step": 110175 }, { "epoch": 97.59078830823738, "grad_norm": 0.2325543761253357, "learning_rate": 1e-05, "loss": 0.9509, "step": 110180 }, { "epoch": 97.59521700620017, "grad_norm": 0.25276368856430054, "learning_rate": 1e-05, "loss": 0.9714, "step": 110185 }, { "epoch": 97.59964570416298, "grad_norm": 0.24771519005298615, "learning_rate": 1e-05, "loss": 0.9214, "step": 110190 }, { "epoch": 97.60407440212578, "grad_norm": 0.23299753665924072, "learning_rate": 1e-05, "loss": 0.9424, "step": 110195 }, { "epoch": 97.60850310008857, "grad_norm": 0.2554718255996704, "learning_rate": 1e-05, "loss": 0.9318, "step": 110200 }, { "epoch": 97.61293179805138, "grad_norm": 0.2444372922182083, "learning_rate": 1e-05, "loss": 0.9586, "step": 110205 }, { "epoch": 97.61736049601417, "grad_norm": 0.25932085514068604, "learning_rate": 1e-05, "loss": 1.0112, "step": 110210 }, { "epoch": 97.62178919397697, "grad_norm": 0.24134407937526703, "learning_rate": 1e-05, "loss": 0.9777, "step": 110215 }, { "epoch": 97.62621789193977, "grad_norm": 0.2226153314113617, "learning_rate": 1e-05, "loss": 0.9312, "step": 110220 }, { "epoch": 97.63064658990257, "grad_norm": 0.2217981517314911, "learning_rate": 1e-05, "loss": 0.9839, "step": 110225 }, { "epoch": 97.63507528786536, "grad_norm": 0.1999123990535736, "learning_rate": 1e-05, "loss": 0.9474, "step": 110230 }, { "epoch": 97.63950398582817, "grad_norm": 0.18966734409332275, "learning_rate": 1e-05, "loss": 0.9453, "step": 110235 }, { "epoch": 97.64393268379096, "grad_norm": 0.2350645512342453, "learning_rate": 1e-05, "loss": 0.9705, "step": 110240 }, { "epoch": 97.64836138175376, "grad_norm": 0.23140601813793182, "learning_rate": 1e-05, "loss": 0.9728, "step": 110245 }, { "epoch": 97.65279007971657, "grad_norm": 0.28096091747283936, "learning_rate": 1e-05, "loss": 0.961, "step": 110250 }, { "epoch": 97.65721877767936, "grad_norm": 0.24578042328357697, "learning_rate": 1e-05, "loss": 0.9337, "step": 110255 }, { "epoch": 97.66164747564216, "grad_norm": 0.20488551259040833, "learning_rate": 1e-05, "loss": 0.9616, "step": 110260 }, { "epoch": 97.66607617360496, "grad_norm": 0.2229185849428177, "learning_rate": 1e-05, "loss": 0.8908, "step": 110265 }, { "epoch": 97.67050487156776, "grad_norm": 0.3444502353668213, "learning_rate": 1e-05, "loss": 0.9304, "step": 110270 }, { "epoch": 97.67493356953055, "grad_norm": 0.23644956946372986, "learning_rate": 1e-05, "loss": 0.9585, "step": 110275 }, { "epoch": 97.67936226749336, "grad_norm": 0.22033430635929108, "learning_rate": 1e-05, "loss": 0.9854, "step": 110280 }, { "epoch": 97.68379096545615, "grad_norm": 0.22529464960098267, "learning_rate": 1e-05, "loss": 0.9746, "step": 110285 }, { "epoch": 97.68821966341895, "grad_norm": 0.2736489772796631, "learning_rate": 1e-05, "loss": 0.9303, "step": 110290 }, { "epoch": 97.69264836138176, "grad_norm": 0.24621880054473877, "learning_rate": 1e-05, "loss": 0.9626, "step": 110295 }, { "epoch": 97.69707705934455, "grad_norm": 0.236724853515625, "learning_rate": 1e-05, "loss": 0.9666, "step": 110300 }, { "epoch": 97.70150575730734, "grad_norm": 0.2793537974357605, "learning_rate": 1e-05, "loss": 0.943, "step": 110305 }, { "epoch": 97.70593445527015, "grad_norm": 0.22544656693935394, "learning_rate": 1e-05, "loss": 0.9703, "step": 110310 }, { "epoch": 97.71036315323295, "grad_norm": 0.2598043382167816, "learning_rate": 1e-05, "loss": 0.9846, "step": 110315 }, { "epoch": 97.71479185119574, "grad_norm": 0.26090654730796814, "learning_rate": 1e-05, "loss": 0.9765, "step": 110320 }, { "epoch": 97.71922054915855, "grad_norm": 0.19919486343860626, "learning_rate": 1e-05, "loss": 0.9386, "step": 110325 }, { "epoch": 97.72364924712134, "grad_norm": 0.25627952814102173, "learning_rate": 1e-05, "loss": 0.9506, "step": 110330 }, { "epoch": 97.72807794508415, "grad_norm": 0.2522720694541931, "learning_rate": 1e-05, "loss": 0.9429, "step": 110335 }, { "epoch": 97.73250664304695, "grad_norm": 0.21583019196987152, "learning_rate": 1e-05, "loss": 0.959, "step": 110340 }, { "epoch": 97.73693534100974, "grad_norm": 0.23461996018886566, "learning_rate": 1e-05, "loss": 0.9663, "step": 110345 }, { "epoch": 97.74136403897255, "grad_norm": 0.20691999793052673, "learning_rate": 1e-05, "loss": 0.956, "step": 110350 }, { "epoch": 97.74579273693534, "grad_norm": 0.26187989115715027, "learning_rate": 1e-05, "loss": 0.9544, "step": 110355 }, { "epoch": 97.75022143489814, "grad_norm": 0.20601873099803925, "learning_rate": 1e-05, "loss": 0.9857, "step": 110360 }, { "epoch": 97.75465013286095, "grad_norm": 0.23647361993789673, "learning_rate": 1e-05, "loss": 0.9168, "step": 110365 }, { "epoch": 97.75907883082374, "grad_norm": 0.22742019593715668, "learning_rate": 1e-05, "loss": 0.8856, "step": 110370 }, { "epoch": 97.76350752878653, "grad_norm": 0.26356250047683716, "learning_rate": 1e-05, "loss": 0.9304, "step": 110375 }, { "epoch": 97.76793622674934, "grad_norm": 0.26778408885002136, "learning_rate": 1e-05, "loss": 0.9503, "step": 110380 }, { "epoch": 97.77236492471214, "grad_norm": 0.24719949066638947, "learning_rate": 1e-05, "loss": 0.9233, "step": 110385 }, { "epoch": 97.77679362267493, "grad_norm": 0.21829064190387726, "learning_rate": 1e-05, "loss": 0.9144, "step": 110390 }, { "epoch": 97.78122232063774, "grad_norm": 0.21765105426311493, "learning_rate": 1e-05, "loss": 0.9756, "step": 110395 }, { "epoch": 97.78565101860053, "grad_norm": 0.24503357708454132, "learning_rate": 1e-05, "loss": 0.948, "step": 110400 }, { "epoch": 97.79007971656333, "grad_norm": 0.23801587522029877, "learning_rate": 1e-05, "loss": 0.9793, "step": 110405 }, { "epoch": 97.79450841452613, "grad_norm": 0.284613698720932, "learning_rate": 1e-05, "loss": 0.9332, "step": 110410 }, { "epoch": 97.79893711248893, "grad_norm": 0.2725047767162323, "learning_rate": 1e-05, "loss": 0.9729, "step": 110415 }, { "epoch": 97.80336581045172, "grad_norm": 0.2405172884464264, "learning_rate": 1e-05, "loss": 0.9555, "step": 110420 }, { "epoch": 97.80779450841453, "grad_norm": 0.23448525369167328, "learning_rate": 1e-05, "loss": 0.9418, "step": 110425 }, { "epoch": 97.81222320637733, "grad_norm": 0.24444708228111267, "learning_rate": 1e-05, "loss": 0.9617, "step": 110430 }, { "epoch": 97.81665190434012, "grad_norm": 0.23331406712532043, "learning_rate": 1e-05, "loss": 0.9336, "step": 110435 }, { "epoch": 97.82108060230293, "grad_norm": 0.25338447093963623, "learning_rate": 1e-05, "loss": 0.9525, "step": 110440 }, { "epoch": 97.82550930026572, "grad_norm": 0.22169671952724457, "learning_rate": 1e-05, "loss": 0.9664, "step": 110445 }, { "epoch": 97.82993799822852, "grad_norm": 0.2375779002904892, "learning_rate": 1e-05, "loss": 0.9375, "step": 110450 }, { "epoch": 97.83436669619132, "grad_norm": 0.2070441097021103, "learning_rate": 1e-05, "loss": 0.9871, "step": 110455 }, { "epoch": 97.83879539415412, "grad_norm": 0.24972273409366608, "learning_rate": 1e-05, "loss": 0.9736, "step": 110460 }, { "epoch": 97.84322409211691, "grad_norm": 0.22928331792354584, "learning_rate": 1e-05, "loss": 0.8943, "step": 110465 }, { "epoch": 97.84765279007972, "grad_norm": 0.2580093741416931, "learning_rate": 1e-05, "loss": 0.9242, "step": 110470 }, { "epoch": 97.85208148804251, "grad_norm": 0.24549056589603424, "learning_rate": 1e-05, "loss": 0.9608, "step": 110475 }, { "epoch": 97.85651018600531, "grad_norm": 0.2445269674062729, "learning_rate": 1e-05, "loss": 0.9971, "step": 110480 }, { "epoch": 97.86093888396812, "grad_norm": 0.2873865067958832, "learning_rate": 1e-05, "loss": 0.9711, "step": 110485 }, { "epoch": 97.86536758193091, "grad_norm": 0.2622067630290985, "learning_rate": 1e-05, "loss": 0.9377, "step": 110490 }, { "epoch": 97.8697962798937, "grad_norm": 0.21422092616558075, "learning_rate": 1e-05, "loss": 0.962, "step": 110495 }, { "epoch": 97.87422497785651, "grad_norm": 0.24483653903007507, "learning_rate": 1e-05, "loss": 0.9623, "step": 110500 }, { "epoch": 97.87865367581931, "grad_norm": 0.24253413081169128, "learning_rate": 1e-05, "loss": 1.0047, "step": 110505 }, { "epoch": 97.8830823737821, "grad_norm": 0.2257661074399948, "learning_rate": 1e-05, "loss": 0.95, "step": 110510 }, { "epoch": 97.88751107174491, "grad_norm": 0.27211010456085205, "learning_rate": 1e-05, "loss": 0.9837, "step": 110515 }, { "epoch": 97.8919397697077, "grad_norm": 0.27671968936920166, "learning_rate": 1e-05, "loss": 0.9929, "step": 110520 }, { "epoch": 97.8963684676705, "grad_norm": 0.22439676523208618, "learning_rate": 1e-05, "loss": 0.987, "step": 110525 }, { "epoch": 97.9007971656333, "grad_norm": 0.2122560292482376, "learning_rate": 1e-05, "loss": 0.9373, "step": 110530 }, { "epoch": 97.9052258635961, "grad_norm": 0.2398727834224701, "learning_rate": 1e-05, "loss": 0.9363, "step": 110535 }, { "epoch": 97.9096545615589, "grad_norm": 0.2522470951080322, "learning_rate": 1e-05, "loss": 0.9965, "step": 110540 }, { "epoch": 97.9140832595217, "grad_norm": 0.22640301287174225, "learning_rate": 1e-05, "loss": 0.9038, "step": 110545 }, { "epoch": 97.9185119574845, "grad_norm": 0.23604297637939453, "learning_rate": 1e-05, "loss": 0.9596, "step": 110550 }, { "epoch": 97.92294065544729, "grad_norm": 0.27733513712882996, "learning_rate": 1e-05, "loss": 0.9706, "step": 110555 }, { "epoch": 97.9273693534101, "grad_norm": 0.22241398692131042, "learning_rate": 1e-05, "loss": 0.9359, "step": 110560 }, { "epoch": 97.9317980513729, "grad_norm": 0.247050940990448, "learning_rate": 1e-05, "loss": 0.9937, "step": 110565 }, { "epoch": 97.9362267493357, "grad_norm": 0.238209068775177, "learning_rate": 1e-05, "loss": 0.9409, "step": 110570 }, { "epoch": 97.9406554472985, "grad_norm": 0.237433522939682, "learning_rate": 1e-05, "loss": 0.9951, "step": 110575 }, { "epoch": 97.94508414526129, "grad_norm": 0.20334464311599731, "learning_rate": 1e-05, "loss": 0.9219, "step": 110580 }, { "epoch": 97.9495128432241, "grad_norm": 0.20749704539775848, "learning_rate": 1e-05, "loss": 0.8973, "step": 110585 }, { "epoch": 97.9539415411869, "grad_norm": 0.2957179844379425, "learning_rate": 1e-05, "loss": 0.9547, "step": 110590 }, { "epoch": 97.95837023914969, "grad_norm": 0.2433343380689621, "learning_rate": 1e-05, "loss": 0.9777, "step": 110595 }, { "epoch": 97.9627989371125, "grad_norm": 0.22279171645641327, "learning_rate": 1e-05, "loss": 0.9135, "step": 110600 }, { "epoch": 97.96722763507529, "grad_norm": 0.22962026298046112, "learning_rate": 1e-05, "loss": 0.9683, "step": 110605 }, { "epoch": 97.97165633303808, "grad_norm": 0.2327713966369629, "learning_rate": 1e-05, "loss": 0.9933, "step": 110610 }, { "epoch": 97.97608503100089, "grad_norm": 0.305185467004776, "learning_rate": 1e-05, "loss": 0.957, "step": 110615 }, { "epoch": 97.98051372896369, "grad_norm": 0.24828708171844482, "learning_rate": 1e-05, "loss": 0.973, "step": 110620 }, { "epoch": 97.98494242692648, "grad_norm": 0.24489706754684448, "learning_rate": 1e-05, "loss": 0.9608, "step": 110625 }, { "epoch": 97.98937112488929, "grad_norm": 0.21437722444534302, "learning_rate": 1e-05, "loss": 0.9992, "step": 110630 }, { "epoch": 97.99379982285208, "grad_norm": 0.24583928287029266, "learning_rate": 1e-05, "loss": 0.9716, "step": 110635 }, { "epoch": 97.99822852081488, "grad_norm": 0.21689701080322266, "learning_rate": 1e-05, "loss": 0.9886, "step": 110640 }, { "epoch": 98.00265721877768, "grad_norm": 0.24357813596725464, "learning_rate": 1e-05, "loss": 0.9656, "step": 110645 }, { "epoch": 98.00708591674048, "grad_norm": 0.2195167988538742, "learning_rate": 1e-05, "loss": 0.955, "step": 110650 }, { "epoch": 98.01151461470327, "grad_norm": 0.2015455663204193, "learning_rate": 1e-05, "loss": 0.9651, "step": 110655 }, { "epoch": 98.01594331266608, "grad_norm": 0.2187841385602951, "learning_rate": 1e-05, "loss": 0.925, "step": 110660 }, { "epoch": 98.02037201062888, "grad_norm": 0.2483312338590622, "learning_rate": 1e-05, "loss": 0.9202, "step": 110665 }, { "epoch": 98.02480070859167, "grad_norm": 0.22294673323631287, "learning_rate": 1e-05, "loss": 0.9467, "step": 110670 }, { "epoch": 98.02922940655448, "grad_norm": 0.24270865321159363, "learning_rate": 1e-05, "loss": 0.9833, "step": 110675 }, { "epoch": 98.03365810451727, "grad_norm": 0.24967585504055023, "learning_rate": 1e-05, "loss": 0.9923, "step": 110680 }, { "epoch": 98.03808680248007, "grad_norm": 0.2395699918270111, "learning_rate": 1e-05, "loss": 0.9636, "step": 110685 }, { "epoch": 98.04251550044287, "grad_norm": 0.20785701274871826, "learning_rate": 1e-05, "loss": 0.9649, "step": 110690 }, { "epoch": 98.04694419840567, "grad_norm": 0.2575901448726654, "learning_rate": 1e-05, "loss": 0.9653, "step": 110695 }, { "epoch": 98.05137289636846, "grad_norm": 0.2463057041168213, "learning_rate": 1e-05, "loss": 0.9824, "step": 110700 }, { "epoch": 98.05580159433127, "grad_norm": 0.23454691469669342, "learning_rate": 1e-05, "loss": 0.9855, "step": 110705 }, { "epoch": 98.06023029229407, "grad_norm": 0.20412150025367737, "learning_rate": 1e-05, "loss": 0.9537, "step": 110710 }, { "epoch": 98.06465899025686, "grad_norm": 0.1934882253408432, "learning_rate": 1e-05, "loss": 0.915, "step": 110715 }, { "epoch": 98.06908768821967, "grad_norm": 0.24537554383277893, "learning_rate": 1e-05, "loss": 0.9517, "step": 110720 }, { "epoch": 98.07351638618246, "grad_norm": 0.2861257493495941, "learning_rate": 1e-05, "loss": 0.9411, "step": 110725 }, { "epoch": 98.07794508414526, "grad_norm": 0.29009395837783813, "learning_rate": 1e-05, "loss": 0.9439, "step": 110730 }, { "epoch": 98.08237378210806, "grad_norm": 0.26506465673446655, "learning_rate": 1e-05, "loss": 0.9657, "step": 110735 }, { "epoch": 98.08680248007086, "grad_norm": 0.27491116523742676, "learning_rate": 1e-05, "loss": 0.9711, "step": 110740 }, { "epoch": 98.09123117803365, "grad_norm": 0.2540963292121887, "learning_rate": 1e-05, "loss": 0.9341, "step": 110745 }, { "epoch": 98.09565987599646, "grad_norm": 0.2668173015117645, "learning_rate": 1e-05, "loss": 0.9492, "step": 110750 }, { "epoch": 98.10008857395925, "grad_norm": 0.25814470648765564, "learning_rate": 1e-05, "loss": 0.9612, "step": 110755 }, { "epoch": 98.10451727192205, "grad_norm": 0.3089388906955719, "learning_rate": 1e-05, "loss": 0.9626, "step": 110760 }, { "epoch": 98.10894596988486, "grad_norm": 0.2397197037935257, "learning_rate": 1e-05, "loss": 0.9169, "step": 110765 }, { "epoch": 98.11337466784765, "grad_norm": 0.25080448389053345, "learning_rate": 1e-05, "loss": 1.006, "step": 110770 }, { "epoch": 98.11780336581045, "grad_norm": 0.2382594496011734, "learning_rate": 1e-05, "loss": 0.9608, "step": 110775 }, { "epoch": 98.12223206377325, "grad_norm": 0.22635255753993988, "learning_rate": 1e-05, "loss": 0.8874, "step": 110780 }, { "epoch": 98.12666076173605, "grad_norm": 0.2467639446258545, "learning_rate": 1e-05, "loss": 0.9479, "step": 110785 }, { "epoch": 98.13108945969884, "grad_norm": 0.2402334064245224, "learning_rate": 1e-05, "loss": 0.9088, "step": 110790 }, { "epoch": 98.13551815766165, "grad_norm": 0.20824818313121796, "learning_rate": 1e-05, "loss": 0.9316, "step": 110795 }, { "epoch": 98.13994685562444, "grad_norm": 0.21272899210453033, "learning_rate": 1e-05, "loss": 0.9355, "step": 110800 }, { "epoch": 98.14437555358724, "grad_norm": 0.2517433762550354, "learning_rate": 1e-05, "loss": 0.921, "step": 110805 }, { "epoch": 98.14880425155005, "grad_norm": 0.2900422215461731, "learning_rate": 1e-05, "loss": 0.9661, "step": 110810 }, { "epoch": 98.15323294951284, "grad_norm": 0.26187029480934143, "learning_rate": 1e-05, "loss": 0.92, "step": 110815 }, { "epoch": 98.15766164747565, "grad_norm": 0.21175190806388855, "learning_rate": 1e-05, "loss": 0.8952, "step": 110820 }, { "epoch": 98.16209034543844, "grad_norm": 0.2889721393585205, "learning_rate": 1e-05, "loss": 0.9208, "step": 110825 }, { "epoch": 98.16651904340124, "grad_norm": 0.2230461984872818, "learning_rate": 1e-05, "loss": 0.9714, "step": 110830 }, { "epoch": 98.17094774136405, "grad_norm": 0.23982511460781097, "learning_rate": 1e-05, "loss": 0.9399, "step": 110835 }, { "epoch": 98.17537643932684, "grad_norm": 0.226076140999794, "learning_rate": 1e-05, "loss": 1.0321, "step": 110840 }, { "epoch": 98.17980513728963, "grad_norm": 0.23692019283771515, "learning_rate": 1e-05, "loss": 0.9365, "step": 110845 }, { "epoch": 98.18423383525244, "grad_norm": 0.23556871712207794, "learning_rate": 1e-05, "loss": 1.013, "step": 110850 }, { "epoch": 98.18866253321524, "grad_norm": 0.23961105942726135, "learning_rate": 1e-05, "loss": 0.957, "step": 110855 }, { "epoch": 98.19309123117803, "grad_norm": 0.23254182934761047, "learning_rate": 1e-05, "loss": 0.9627, "step": 110860 }, { "epoch": 98.19751992914084, "grad_norm": 0.24206198751926422, "learning_rate": 1e-05, "loss": 0.9729, "step": 110865 }, { "epoch": 98.20194862710363, "grad_norm": 0.24894015491008759, "learning_rate": 1e-05, "loss": 0.9469, "step": 110870 }, { "epoch": 98.20637732506643, "grad_norm": 0.2574262320995331, "learning_rate": 1e-05, "loss": 0.9475, "step": 110875 }, { "epoch": 98.21080602302924, "grad_norm": 0.27776750922203064, "learning_rate": 1e-05, "loss": 0.9662, "step": 110880 }, { "epoch": 98.21523472099203, "grad_norm": 0.2227364480495453, "learning_rate": 1e-05, "loss": 0.9648, "step": 110885 }, { "epoch": 98.21966341895482, "grad_norm": 0.24638865888118744, "learning_rate": 1e-05, "loss": 0.9116, "step": 110890 }, { "epoch": 98.22409211691763, "grad_norm": 0.2511344254016876, "learning_rate": 1e-05, "loss": 0.9451, "step": 110895 }, { "epoch": 98.22852081488043, "grad_norm": 0.2079719603061676, "learning_rate": 1e-05, "loss": 0.9457, "step": 110900 }, { "epoch": 98.23294951284322, "grad_norm": 0.2462829202413559, "learning_rate": 1e-05, "loss": 0.9582, "step": 110905 }, { "epoch": 98.23737821080603, "grad_norm": 0.24128328263759613, "learning_rate": 1e-05, "loss": 0.9927, "step": 110910 }, { "epoch": 98.24180690876882, "grad_norm": 0.278601735830307, "learning_rate": 1e-05, "loss": 0.9641, "step": 110915 }, { "epoch": 98.24623560673162, "grad_norm": 0.2555367946624756, "learning_rate": 1e-05, "loss": 0.9733, "step": 110920 }, { "epoch": 98.25066430469442, "grad_norm": 0.22950394451618195, "learning_rate": 1e-05, "loss": 0.9582, "step": 110925 }, { "epoch": 98.25509300265722, "grad_norm": 0.23852285742759705, "learning_rate": 1e-05, "loss": 0.9945, "step": 110930 }, { "epoch": 98.25952170062001, "grad_norm": 0.2170538455247879, "learning_rate": 1e-05, "loss": 0.9484, "step": 110935 }, { "epoch": 98.26395039858282, "grad_norm": 0.22956758737564087, "learning_rate": 1e-05, "loss": 0.9858, "step": 110940 }, { "epoch": 98.26837909654562, "grad_norm": 0.24847343564033508, "learning_rate": 1e-05, "loss": 1.0357, "step": 110945 }, { "epoch": 98.27280779450841, "grad_norm": 0.23312512040138245, "learning_rate": 1e-05, "loss": 0.9937, "step": 110950 }, { "epoch": 98.27723649247122, "grad_norm": 0.2557574212551117, "learning_rate": 1e-05, "loss": 0.9322, "step": 110955 }, { "epoch": 98.28166519043401, "grad_norm": 0.2281859815120697, "learning_rate": 1e-05, "loss": 0.9452, "step": 110960 }, { "epoch": 98.2860938883968, "grad_norm": 0.25699982047080994, "learning_rate": 1e-05, "loss": 1.03, "step": 110965 }, { "epoch": 98.29052258635961, "grad_norm": 0.3048710227012634, "learning_rate": 1e-05, "loss": 0.9839, "step": 110970 }, { "epoch": 98.29495128432241, "grad_norm": 0.22279497981071472, "learning_rate": 1e-05, "loss": 0.9304, "step": 110975 }, { "epoch": 98.2993799822852, "grad_norm": 0.2391633689403534, "learning_rate": 1e-05, "loss": 0.9353, "step": 110980 }, { "epoch": 98.30380868024801, "grad_norm": 0.23048844933509827, "learning_rate": 1e-05, "loss": 0.9719, "step": 110985 }, { "epoch": 98.3082373782108, "grad_norm": 0.2232014685869217, "learning_rate": 1e-05, "loss": 0.9627, "step": 110990 }, { "epoch": 98.3126660761736, "grad_norm": 0.2609648108482361, "learning_rate": 1e-05, "loss": 0.9334, "step": 110995 }, { "epoch": 98.31709477413641, "grad_norm": 0.2280598133802414, "learning_rate": 1e-05, "loss": 0.9623, "step": 111000 }, { "epoch": 98.3215234720992, "grad_norm": 0.21310418844223022, "learning_rate": 1e-05, "loss": 0.96, "step": 111005 }, { "epoch": 98.325952170062, "grad_norm": 0.2580728232860565, "learning_rate": 1e-05, "loss": 0.9772, "step": 111010 }, { "epoch": 98.3303808680248, "grad_norm": 0.2343798577785492, "learning_rate": 1e-05, "loss": 0.9081, "step": 111015 }, { "epoch": 98.3348095659876, "grad_norm": 0.23639167845249176, "learning_rate": 1e-05, "loss": 0.8776, "step": 111020 }, { "epoch": 98.33923826395039, "grad_norm": 0.24484464526176453, "learning_rate": 1e-05, "loss": 0.9898, "step": 111025 }, { "epoch": 98.3436669619132, "grad_norm": 0.22854070365428925, "learning_rate": 1e-05, "loss": 0.9693, "step": 111030 }, { "epoch": 98.348095659876, "grad_norm": 0.2719861567020416, "learning_rate": 1e-05, "loss": 0.9306, "step": 111035 }, { "epoch": 98.35252435783879, "grad_norm": 0.21270538866519928, "learning_rate": 1e-05, "loss": 0.9654, "step": 111040 }, { "epoch": 98.3569530558016, "grad_norm": 0.2241915911436081, "learning_rate": 1e-05, "loss": 0.9233, "step": 111045 }, { "epoch": 98.36138175376439, "grad_norm": 0.2666899859905243, "learning_rate": 1e-05, "loss": 0.9643, "step": 111050 }, { "epoch": 98.36581045172719, "grad_norm": 0.25463852286338806, "learning_rate": 1e-05, "loss": 0.9434, "step": 111055 }, { "epoch": 98.37023914969, "grad_norm": 0.20345906913280487, "learning_rate": 1e-05, "loss": 0.9868, "step": 111060 }, { "epoch": 98.37466784765279, "grad_norm": 0.2599492073059082, "learning_rate": 1e-05, "loss": 0.9399, "step": 111065 }, { "epoch": 98.3790965456156, "grad_norm": 0.23631832003593445, "learning_rate": 1e-05, "loss": 0.9539, "step": 111070 }, { "epoch": 98.38352524357839, "grad_norm": 0.22881107032299042, "learning_rate": 1e-05, "loss": 1.0154, "step": 111075 }, { "epoch": 98.38795394154118, "grad_norm": 0.24219910800457, "learning_rate": 1e-05, "loss": 0.9947, "step": 111080 }, { "epoch": 98.39238263950399, "grad_norm": 0.2311176061630249, "learning_rate": 1e-05, "loss": 0.9421, "step": 111085 }, { "epoch": 98.39681133746679, "grad_norm": 0.24359175562858582, "learning_rate": 1e-05, "loss": 0.9581, "step": 111090 }, { "epoch": 98.40124003542958, "grad_norm": 0.2252773642539978, "learning_rate": 1e-05, "loss": 0.9901, "step": 111095 }, { "epoch": 98.40566873339239, "grad_norm": 0.23360174894332886, "learning_rate": 1e-05, "loss": 0.9365, "step": 111100 }, { "epoch": 98.41009743135518, "grad_norm": 0.2345714271068573, "learning_rate": 1e-05, "loss": 0.9631, "step": 111105 }, { "epoch": 98.41452612931798, "grad_norm": 0.2597193717956543, "learning_rate": 1e-05, "loss": 0.9345, "step": 111110 }, { "epoch": 98.41895482728079, "grad_norm": 0.2260092943906784, "learning_rate": 1e-05, "loss": 0.9626, "step": 111115 }, { "epoch": 98.42338352524358, "grad_norm": 0.25709378719329834, "learning_rate": 1e-05, "loss": 0.9213, "step": 111120 }, { "epoch": 98.42781222320637, "grad_norm": 0.2524168789386749, "learning_rate": 1e-05, "loss": 0.9337, "step": 111125 }, { "epoch": 98.43224092116918, "grad_norm": 0.2275114357471466, "learning_rate": 1e-05, "loss": 0.9601, "step": 111130 }, { "epoch": 98.43666961913198, "grad_norm": 0.20537328720092773, "learning_rate": 1e-05, "loss": 0.9467, "step": 111135 }, { "epoch": 98.44109831709477, "grad_norm": 0.2553630471229553, "learning_rate": 1e-05, "loss": 0.9091, "step": 111140 }, { "epoch": 98.44552701505758, "grad_norm": 0.20277798175811768, "learning_rate": 1e-05, "loss": 0.9601, "step": 111145 }, { "epoch": 98.44995571302037, "grad_norm": 0.23031894862651825, "learning_rate": 1e-05, "loss": 0.8984, "step": 111150 }, { "epoch": 98.45438441098317, "grad_norm": 0.23661047220230103, "learning_rate": 1e-05, "loss": 0.9552, "step": 111155 }, { "epoch": 98.45881310894598, "grad_norm": 0.21697628498077393, "learning_rate": 1e-05, "loss": 0.9578, "step": 111160 }, { "epoch": 98.46324180690877, "grad_norm": 0.3396170139312744, "learning_rate": 1e-05, "loss": 0.9665, "step": 111165 }, { "epoch": 98.46767050487156, "grad_norm": 0.2101564109325409, "learning_rate": 1e-05, "loss": 0.9679, "step": 111170 }, { "epoch": 98.47209920283437, "grad_norm": 0.28520822525024414, "learning_rate": 1e-05, "loss": 0.9655, "step": 111175 }, { "epoch": 98.47652790079717, "grad_norm": 0.2421087920665741, "learning_rate": 1e-05, "loss": 0.9274, "step": 111180 }, { "epoch": 98.48095659875996, "grad_norm": 0.205408975481987, "learning_rate": 1e-05, "loss": 0.8917, "step": 111185 }, { "epoch": 98.48538529672277, "grad_norm": 0.2246294468641281, "learning_rate": 1e-05, "loss": 0.9508, "step": 111190 }, { "epoch": 98.48981399468556, "grad_norm": 0.23292651772499084, "learning_rate": 1e-05, "loss": 1.0048, "step": 111195 }, { "epoch": 98.49424269264836, "grad_norm": 0.23167160153388977, "learning_rate": 1e-05, "loss": 0.9262, "step": 111200 }, { "epoch": 98.49867139061116, "grad_norm": 0.2443193942308426, "learning_rate": 1e-05, "loss": 0.9092, "step": 111205 }, { "epoch": 98.50310008857396, "grad_norm": 0.2544289529323578, "learning_rate": 1e-05, "loss": 0.9123, "step": 111210 }, { "epoch": 98.50752878653675, "grad_norm": 0.22717690467834473, "learning_rate": 1e-05, "loss": 0.927, "step": 111215 }, { "epoch": 98.51195748449956, "grad_norm": 0.2539583444595337, "learning_rate": 1e-05, "loss": 0.9961, "step": 111220 }, { "epoch": 98.51638618246236, "grad_norm": 0.25394365191459656, "learning_rate": 1e-05, "loss": 0.967, "step": 111225 }, { "epoch": 98.52081488042515, "grad_norm": 0.2882280647754669, "learning_rate": 1e-05, "loss": 0.9637, "step": 111230 }, { "epoch": 98.52524357838796, "grad_norm": 0.2492353916168213, "learning_rate": 1e-05, "loss": 0.9414, "step": 111235 }, { "epoch": 98.52967227635075, "grad_norm": 0.270648330450058, "learning_rate": 1e-05, "loss": 0.9358, "step": 111240 }, { "epoch": 98.53410097431355, "grad_norm": 0.2774631977081299, "learning_rate": 1e-05, "loss": 0.997, "step": 111245 }, { "epoch": 98.53852967227635, "grad_norm": 0.2577580511569977, "learning_rate": 1e-05, "loss": 0.9568, "step": 111250 }, { "epoch": 98.54295837023915, "grad_norm": 0.26933014392852783, "learning_rate": 1e-05, "loss": 0.9203, "step": 111255 }, { "epoch": 98.54738706820194, "grad_norm": 0.22621290385723114, "learning_rate": 1e-05, "loss": 0.9412, "step": 111260 }, { "epoch": 98.55181576616475, "grad_norm": 0.23177401721477509, "learning_rate": 1e-05, "loss": 0.9204, "step": 111265 }, { "epoch": 98.55624446412754, "grad_norm": 0.24310125410556793, "learning_rate": 1e-05, "loss": 1.0037, "step": 111270 }, { "epoch": 98.56067316209034, "grad_norm": 0.270818293094635, "learning_rate": 1e-05, "loss": 1.0087, "step": 111275 }, { "epoch": 98.56510186005315, "grad_norm": 0.23837019503116608, "learning_rate": 1e-05, "loss": 0.8923, "step": 111280 }, { "epoch": 98.56953055801594, "grad_norm": 0.2304750233888626, "learning_rate": 1e-05, "loss": 0.9264, "step": 111285 }, { "epoch": 98.57395925597874, "grad_norm": 0.2091333419084549, "learning_rate": 1e-05, "loss": 0.952, "step": 111290 }, { "epoch": 98.57838795394154, "grad_norm": 0.2561667263507843, "learning_rate": 1e-05, "loss": 0.9518, "step": 111295 }, { "epoch": 98.58281665190434, "grad_norm": 0.23644684255123138, "learning_rate": 1e-05, "loss": 0.9861, "step": 111300 }, { "epoch": 98.58724534986715, "grad_norm": 0.26437947154045105, "learning_rate": 1e-05, "loss": 0.9754, "step": 111305 }, { "epoch": 98.59167404782994, "grad_norm": 0.2136402279138565, "learning_rate": 1e-05, "loss": 0.9994, "step": 111310 }, { "epoch": 98.59610274579273, "grad_norm": 0.2597300708293915, "learning_rate": 1e-05, "loss": 0.9661, "step": 111315 }, { "epoch": 98.60053144375554, "grad_norm": 0.20606687664985657, "learning_rate": 1e-05, "loss": 0.9363, "step": 111320 }, { "epoch": 98.60496014171834, "grad_norm": 0.2137267142534256, "learning_rate": 1e-05, "loss": 0.9213, "step": 111325 }, { "epoch": 98.60938883968113, "grad_norm": 0.21220698952674866, "learning_rate": 1e-05, "loss": 0.9957, "step": 111330 }, { "epoch": 98.61381753764394, "grad_norm": 0.2315533310174942, "learning_rate": 1e-05, "loss": 0.9479, "step": 111335 }, { "epoch": 98.61824623560673, "grad_norm": 0.23546794056892395, "learning_rate": 1e-05, "loss": 0.9532, "step": 111340 }, { "epoch": 98.62267493356953, "grad_norm": 0.23706048727035522, "learning_rate": 1e-05, "loss": 0.9388, "step": 111345 }, { "epoch": 98.62710363153234, "grad_norm": 0.2531529366970062, "learning_rate": 1e-05, "loss": 0.9898, "step": 111350 }, { "epoch": 98.63153232949513, "grad_norm": 0.23595298826694489, "learning_rate": 1e-05, "loss": 0.9521, "step": 111355 }, { "epoch": 98.63596102745792, "grad_norm": 0.22604885697364807, "learning_rate": 1e-05, "loss": 0.941, "step": 111360 }, { "epoch": 98.64038972542073, "grad_norm": 0.20171397924423218, "learning_rate": 1e-05, "loss": 0.9661, "step": 111365 }, { "epoch": 98.64481842338353, "grad_norm": 0.22373943030834198, "learning_rate": 1e-05, "loss": 0.9508, "step": 111370 }, { "epoch": 98.64924712134632, "grad_norm": 0.24752505123615265, "learning_rate": 1e-05, "loss": 0.9755, "step": 111375 }, { "epoch": 98.65367581930913, "grad_norm": 0.24248725175857544, "learning_rate": 1e-05, "loss": 0.9581, "step": 111380 }, { "epoch": 98.65810451727192, "grad_norm": 0.26691681146621704, "learning_rate": 1e-05, "loss": 0.9393, "step": 111385 }, { "epoch": 98.66253321523472, "grad_norm": 0.2707720994949341, "learning_rate": 1e-05, "loss": 0.9426, "step": 111390 }, { "epoch": 98.66696191319753, "grad_norm": 0.25114381313323975, "learning_rate": 1e-05, "loss": 0.9844, "step": 111395 }, { "epoch": 98.67139061116032, "grad_norm": 0.23242361843585968, "learning_rate": 1e-05, "loss": 0.9653, "step": 111400 }, { "epoch": 98.67581930912311, "grad_norm": 0.25180405378341675, "learning_rate": 1e-05, "loss": 0.9533, "step": 111405 }, { "epoch": 98.68024800708592, "grad_norm": 0.21613353490829468, "learning_rate": 1e-05, "loss": 0.8961, "step": 111410 }, { "epoch": 98.68467670504872, "grad_norm": 0.274187296628952, "learning_rate": 1e-05, "loss": 0.9182, "step": 111415 }, { "epoch": 98.68910540301151, "grad_norm": 0.24361048638820648, "learning_rate": 1e-05, "loss": 0.9965, "step": 111420 }, { "epoch": 98.69353410097432, "grad_norm": 0.2177826166152954, "learning_rate": 1e-05, "loss": 0.9584, "step": 111425 }, { "epoch": 98.69796279893711, "grad_norm": 0.2366824746131897, "learning_rate": 1e-05, "loss": 0.938, "step": 111430 }, { "epoch": 98.7023914968999, "grad_norm": 0.2475898712873459, "learning_rate": 1e-05, "loss": 1.0256, "step": 111435 }, { "epoch": 98.70682019486271, "grad_norm": 0.25118812918663025, "learning_rate": 1e-05, "loss": 0.8879, "step": 111440 }, { "epoch": 98.71124889282551, "grad_norm": 0.23733948171138763, "learning_rate": 1e-05, "loss": 0.9669, "step": 111445 }, { "epoch": 98.7156775907883, "grad_norm": 0.2434430718421936, "learning_rate": 1e-05, "loss": 0.9225, "step": 111450 }, { "epoch": 98.72010628875111, "grad_norm": 0.24628238379955292, "learning_rate": 1e-05, "loss": 0.9969, "step": 111455 }, { "epoch": 98.7245349867139, "grad_norm": 0.22682726383209229, "learning_rate": 1e-05, "loss": 0.9092, "step": 111460 }, { "epoch": 98.7289636846767, "grad_norm": 0.19881866872310638, "learning_rate": 1e-05, "loss": 0.9289, "step": 111465 }, { "epoch": 98.73339238263951, "grad_norm": 0.22952796518802643, "learning_rate": 1e-05, "loss": 0.9667, "step": 111470 }, { "epoch": 98.7378210806023, "grad_norm": 0.23233212530612946, "learning_rate": 1e-05, "loss": 0.9109, "step": 111475 }, { "epoch": 98.7422497785651, "grad_norm": 0.22479300200939178, "learning_rate": 1e-05, "loss": 0.946, "step": 111480 }, { "epoch": 98.7466784765279, "grad_norm": 0.2576109766960144, "learning_rate": 1e-05, "loss": 0.9725, "step": 111485 }, { "epoch": 98.7511071744907, "grad_norm": 0.2226061075925827, "learning_rate": 1e-05, "loss": 0.9762, "step": 111490 }, { "epoch": 98.75553587245349, "grad_norm": 0.25576964020729065, "learning_rate": 1e-05, "loss": 0.9698, "step": 111495 }, { "epoch": 98.7599645704163, "grad_norm": 0.23511266708374023, "learning_rate": 1e-05, "loss": 0.9461, "step": 111500 }, { "epoch": 98.7643932683791, "grad_norm": 0.27317604422569275, "learning_rate": 1e-05, "loss": 1.0073, "step": 111505 }, { "epoch": 98.76882196634189, "grad_norm": 0.2240634560585022, "learning_rate": 1e-05, "loss": 0.9605, "step": 111510 }, { "epoch": 98.7732506643047, "grad_norm": 0.2444314807653427, "learning_rate": 1e-05, "loss": 0.9369, "step": 111515 }, { "epoch": 98.77767936226749, "grad_norm": 0.27551326155662537, "learning_rate": 1e-05, "loss": 0.9382, "step": 111520 }, { "epoch": 98.78210806023029, "grad_norm": 0.26869821548461914, "learning_rate": 1e-05, "loss": 0.9248, "step": 111525 }, { "epoch": 98.7865367581931, "grad_norm": 0.26883891224861145, "learning_rate": 1e-05, "loss": 0.9734, "step": 111530 }, { "epoch": 98.79096545615589, "grad_norm": 0.2614055871963501, "learning_rate": 1e-05, "loss": 0.9861, "step": 111535 }, { "epoch": 98.79539415411868, "grad_norm": 0.2303207814693451, "learning_rate": 1e-05, "loss": 0.9907, "step": 111540 }, { "epoch": 98.79982285208149, "grad_norm": 0.2326149344444275, "learning_rate": 1e-05, "loss": 0.9146, "step": 111545 }, { "epoch": 98.80425155004428, "grad_norm": 0.2920328974723816, "learning_rate": 1e-05, "loss": 0.9668, "step": 111550 }, { "epoch": 98.8086802480071, "grad_norm": 0.23487339913845062, "learning_rate": 1e-05, "loss": 0.9534, "step": 111555 }, { "epoch": 98.81310894596989, "grad_norm": 0.22838394343852997, "learning_rate": 1e-05, "loss": 0.9398, "step": 111560 }, { "epoch": 98.81753764393268, "grad_norm": 0.2282736897468567, "learning_rate": 1e-05, "loss": 1.0164, "step": 111565 }, { "epoch": 98.82196634189549, "grad_norm": 0.28193390369415283, "learning_rate": 1e-05, "loss": 0.9698, "step": 111570 }, { "epoch": 98.82639503985828, "grad_norm": 0.2746627926826477, "learning_rate": 1e-05, "loss": 0.9101, "step": 111575 }, { "epoch": 98.83082373782108, "grad_norm": 0.22043448686599731, "learning_rate": 1e-05, "loss": 0.9887, "step": 111580 }, { "epoch": 98.83525243578389, "grad_norm": 0.255452960729599, "learning_rate": 1e-05, "loss": 0.9613, "step": 111585 }, { "epoch": 98.83968113374668, "grad_norm": 0.24972356855869293, "learning_rate": 1e-05, "loss": 0.9821, "step": 111590 }, { "epoch": 98.84410983170947, "grad_norm": 0.22377607226371765, "learning_rate": 1e-05, "loss": 0.9875, "step": 111595 }, { "epoch": 98.84853852967228, "grad_norm": 0.21315820515155792, "learning_rate": 1e-05, "loss": 0.9598, "step": 111600 }, { "epoch": 98.85296722763508, "grad_norm": 0.2499481439590454, "learning_rate": 1e-05, "loss": 0.9846, "step": 111605 }, { "epoch": 98.85739592559787, "grad_norm": 0.2549728453159332, "learning_rate": 1e-05, "loss": 0.9467, "step": 111610 }, { "epoch": 98.86182462356068, "grad_norm": 0.24899548292160034, "learning_rate": 1e-05, "loss": 0.9626, "step": 111615 }, { "epoch": 98.86625332152347, "grad_norm": 0.23861071467399597, "learning_rate": 1e-05, "loss": 0.9518, "step": 111620 }, { "epoch": 98.87068201948627, "grad_norm": 0.22890697419643402, "learning_rate": 1e-05, "loss": 0.9168, "step": 111625 }, { "epoch": 98.87511071744908, "grad_norm": 0.24206499755382538, "learning_rate": 1e-05, "loss": 0.926, "step": 111630 }, { "epoch": 98.87953941541187, "grad_norm": 0.28654757142066956, "learning_rate": 1e-05, "loss": 0.9368, "step": 111635 }, { "epoch": 98.88396811337466, "grad_norm": 0.26713982224464417, "learning_rate": 1e-05, "loss": 0.9411, "step": 111640 }, { "epoch": 98.88839681133747, "grad_norm": 0.22974300384521484, "learning_rate": 1e-05, "loss": 0.9542, "step": 111645 }, { "epoch": 98.89282550930027, "grad_norm": 0.19291585683822632, "learning_rate": 1e-05, "loss": 0.9675, "step": 111650 }, { "epoch": 98.89725420726306, "grad_norm": 0.23455610871315002, "learning_rate": 1e-05, "loss": 0.9642, "step": 111655 }, { "epoch": 98.90168290522587, "grad_norm": 0.2292855978012085, "learning_rate": 1e-05, "loss": 0.9755, "step": 111660 }, { "epoch": 98.90611160318866, "grad_norm": 0.24286925792694092, "learning_rate": 1e-05, "loss": 0.8819, "step": 111665 }, { "epoch": 98.91054030115146, "grad_norm": 0.2769407331943512, "learning_rate": 1e-05, "loss": 0.913, "step": 111670 }, { "epoch": 98.91496899911427, "grad_norm": 0.2178068310022354, "learning_rate": 1e-05, "loss": 0.9706, "step": 111675 }, { "epoch": 98.91939769707706, "grad_norm": 0.21476486325263977, "learning_rate": 1e-05, "loss": 0.9411, "step": 111680 }, { "epoch": 98.92382639503985, "grad_norm": 0.2701799273490906, "learning_rate": 1e-05, "loss": 0.9388, "step": 111685 }, { "epoch": 98.92825509300266, "grad_norm": 0.24470177292823792, "learning_rate": 1e-05, "loss": 0.9907, "step": 111690 }, { "epoch": 98.93268379096546, "grad_norm": 0.2537175714969635, "learning_rate": 1e-05, "loss": 0.9867, "step": 111695 }, { "epoch": 98.93711248892825, "grad_norm": 0.2429252713918686, "learning_rate": 1e-05, "loss": 0.9694, "step": 111700 }, { "epoch": 98.94154118689106, "grad_norm": 0.23320193588733673, "learning_rate": 1e-05, "loss": 0.9957, "step": 111705 }, { "epoch": 98.94596988485385, "grad_norm": 0.22364431619644165, "learning_rate": 1e-05, "loss": 1.018, "step": 111710 }, { "epoch": 98.95039858281665, "grad_norm": 0.22956855595111847, "learning_rate": 1e-05, "loss": 0.9015, "step": 111715 }, { "epoch": 98.95482728077945, "grad_norm": 0.2372560352087021, "learning_rate": 1e-05, "loss": 0.9534, "step": 111720 }, { "epoch": 98.95925597874225, "grad_norm": 0.23301735520362854, "learning_rate": 1e-05, "loss": 0.9504, "step": 111725 }, { "epoch": 98.96368467670504, "grad_norm": 0.21001306176185608, "learning_rate": 1e-05, "loss": 0.9498, "step": 111730 }, { "epoch": 98.96811337466785, "grad_norm": 0.29205936193466187, "learning_rate": 1e-05, "loss": 0.9501, "step": 111735 }, { "epoch": 98.97254207263065, "grad_norm": 0.23656350374221802, "learning_rate": 1e-05, "loss": 0.9656, "step": 111740 }, { "epoch": 98.97697077059344, "grad_norm": 0.2577022910118103, "learning_rate": 1e-05, "loss": 0.9763, "step": 111745 }, { "epoch": 98.98139946855625, "grad_norm": 0.26302826404571533, "learning_rate": 1e-05, "loss": 0.994, "step": 111750 }, { "epoch": 98.98582816651904, "grad_norm": 0.23785418272018433, "learning_rate": 1e-05, "loss": 0.9703, "step": 111755 }, { "epoch": 98.99025686448184, "grad_norm": 0.25688380002975464, "learning_rate": 1e-05, "loss": 1.0055, "step": 111760 }, { "epoch": 98.99468556244464, "grad_norm": 0.28728407621383667, "learning_rate": 1e-05, "loss": 0.9432, "step": 111765 }, { "epoch": 98.99911426040744, "grad_norm": 0.29873979091644287, "learning_rate": 1e-05, "loss": 0.9407, "step": 111770 }, { "epoch": 99.00354295837023, "grad_norm": 0.25227078795433044, "learning_rate": 1e-05, "loss": 0.9775, "step": 111775 }, { "epoch": 99.00797165633304, "grad_norm": 0.23644119501113892, "learning_rate": 1e-05, "loss": 0.9748, "step": 111780 }, { "epoch": 99.01240035429583, "grad_norm": 0.2506011724472046, "learning_rate": 1e-05, "loss": 0.9927, "step": 111785 }, { "epoch": 99.01682905225863, "grad_norm": 0.21115323901176453, "learning_rate": 1e-05, "loss": 0.9524, "step": 111790 }, { "epoch": 99.02125775022144, "grad_norm": 0.2390095293521881, "learning_rate": 1e-05, "loss": 0.9685, "step": 111795 }, { "epoch": 99.02568644818423, "grad_norm": 0.22407297790050507, "learning_rate": 1e-05, "loss": 0.9548, "step": 111800 }, { "epoch": 99.03011514614704, "grad_norm": 0.23390407860279083, "learning_rate": 1e-05, "loss": 0.945, "step": 111805 }, { "epoch": 99.03454384410983, "grad_norm": 0.2735450565814972, "learning_rate": 1e-05, "loss": 0.9973, "step": 111810 }, { "epoch": 99.03897254207263, "grad_norm": 0.26146021485328674, "learning_rate": 1e-05, "loss": 0.9204, "step": 111815 }, { "epoch": 99.04340124003544, "grad_norm": 0.2431093007326126, "learning_rate": 1e-05, "loss": 0.941, "step": 111820 }, { "epoch": 99.04782993799823, "grad_norm": 0.20601999759674072, "learning_rate": 1e-05, "loss": 0.9473, "step": 111825 }, { "epoch": 99.05225863596102, "grad_norm": 0.20047248899936676, "learning_rate": 1e-05, "loss": 0.9909, "step": 111830 }, { "epoch": 99.05668733392383, "grad_norm": 0.21753501892089844, "learning_rate": 1e-05, "loss": 0.9322, "step": 111835 }, { "epoch": 99.06111603188663, "grad_norm": 0.22059334814548492, "learning_rate": 1e-05, "loss": 0.9626, "step": 111840 }, { "epoch": 99.06554472984942, "grad_norm": 0.24513216316699982, "learning_rate": 1e-05, "loss": 0.9937, "step": 111845 }, { "epoch": 99.06997342781223, "grad_norm": 0.24849770963191986, "learning_rate": 1e-05, "loss": 0.9517, "step": 111850 }, { "epoch": 99.07440212577502, "grad_norm": 0.23517654836177826, "learning_rate": 1e-05, "loss": 0.9789, "step": 111855 }, { "epoch": 99.07883082373782, "grad_norm": 0.2622279226779938, "learning_rate": 1e-05, "loss": 0.9167, "step": 111860 }, { "epoch": 99.08325952170063, "grad_norm": 0.27186161279678345, "learning_rate": 1e-05, "loss": 0.959, "step": 111865 }, { "epoch": 99.08768821966342, "grad_norm": 0.24114683270454407, "learning_rate": 1e-05, "loss": 0.9692, "step": 111870 }, { "epoch": 99.09211691762621, "grad_norm": 0.2248872071504593, "learning_rate": 1e-05, "loss": 0.9172, "step": 111875 }, { "epoch": 99.09654561558902, "grad_norm": 0.2622697353363037, "learning_rate": 1e-05, "loss": 0.9725, "step": 111880 }, { "epoch": 99.10097431355182, "grad_norm": 0.2753954231739044, "learning_rate": 1e-05, "loss": 0.9361, "step": 111885 }, { "epoch": 99.10540301151461, "grad_norm": 0.2478536069393158, "learning_rate": 1e-05, "loss": 0.9105, "step": 111890 }, { "epoch": 99.10983170947742, "grad_norm": 0.2597898840904236, "learning_rate": 1e-05, "loss": 0.9983, "step": 111895 }, { "epoch": 99.11426040744021, "grad_norm": 0.2667137682437897, "learning_rate": 1e-05, "loss": 0.9529, "step": 111900 }, { "epoch": 99.118689105403, "grad_norm": 0.21300847828388214, "learning_rate": 1e-05, "loss": 0.9768, "step": 111905 }, { "epoch": 99.12311780336582, "grad_norm": 0.24673312902450562, "learning_rate": 1e-05, "loss": 0.9727, "step": 111910 }, { "epoch": 99.12754650132861, "grad_norm": 0.2489415407180786, "learning_rate": 1e-05, "loss": 0.9447, "step": 111915 }, { "epoch": 99.1319751992914, "grad_norm": 0.2588604688644409, "learning_rate": 1e-05, "loss": 0.9682, "step": 111920 }, { "epoch": 99.13640389725421, "grad_norm": 0.22797806560993195, "learning_rate": 1e-05, "loss": 0.9179, "step": 111925 }, { "epoch": 99.140832595217, "grad_norm": 0.25321805477142334, "learning_rate": 1e-05, "loss": 0.9664, "step": 111930 }, { "epoch": 99.1452612931798, "grad_norm": 0.23397018015384674, "learning_rate": 1e-05, "loss": 0.9544, "step": 111935 }, { "epoch": 99.14968999114261, "grad_norm": 0.23838578164577484, "learning_rate": 1e-05, "loss": 0.9282, "step": 111940 }, { "epoch": 99.1541186891054, "grad_norm": 0.23428472876548767, "learning_rate": 1e-05, "loss": 0.9677, "step": 111945 }, { "epoch": 99.1585473870682, "grad_norm": 0.22358359396457672, "learning_rate": 1e-05, "loss": 0.9785, "step": 111950 }, { "epoch": 99.162976085031, "grad_norm": 0.21769684553146362, "learning_rate": 1e-05, "loss": 0.9773, "step": 111955 }, { "epoch": 99.1674047829938, "grad_norm": 0.23592406511306763, "learning_rate": 1e-05, "loss": 0.911, "step": 111960 }, { "epoch": 99.1718334809566, "grad_norm": 0.21641094982624054, "learning_rate": 1e-05, "loss": 0.9853, "step": 111965 }, { "epoch": 99.1762621789194, "grad_norm": 0.21236218512058258, "learning_rate": 1e-05, "loss": 0.9484, "step": 111970 }, { "epoch": 99.1806908768822, "grad_norm": 0.2305438369512558, "learning_rate": 1e-05, "loss": 0.9566, "step": 111975 }, { "epoch": 99.18511957484499, "grad_norm": 0.19450995326042175, "learning_rate": 1e-05, "loss": 0.9264, "step": 111980 }, { "epoch": 99.1895482728078, "grad_norm": 0.21670381724834442, "learning_rate": 1e-05, "loss": 0.9463, "step": 111985 }, { "epoch": 99.19397697077059, "grad_norm": 0.2027820497751236, "learning_rate": 1e-05, "loss": 0.9732, "step": 111990 }, { "epoch": 99.19840566873339, "grad_norm": 0.26259005069732666, "learning_rate": 1e-05, "loss": 0.9072, "step": 111995 }, { "epoch": 99.2028343666962, "grad_norm": 0.26879674196243286, "learning_rate": 1e-05, "loss": 0.9776, "step": 112000 }, { "epoch": 99.20726306465899, "grad_norm": 0.278833270072937, "learning_rate": 1e-05, "loss": 0.9717, "step": 112005 }, { "epoch": 99.21169176262178, "grad_norm": 0.2328919917345047, "learning_rate": 1e-05, "loss": 0.9915, "step": 112010 }, { "epoch": 99.21612046058459, "grad_norm": 0.22976909577846527, "learning_rate": 1e-05, "loss": 0.9472, "step": 112015 }, { "epoch": 99.22054915854739, "grad_norm": 0.23653466999530792, "learning_rate": 1e-05, "loss": 0.9488, "step": 112020 }, { "epoch": 99.22497785651018, "grad_norm": 0.2941906452178955, "learning_rate": 1e-05, "loss": 0.9693, "step": 112025 }, { "epoch": 99.22940655447299, "grad_norm": 0.3187143802642822, "learning_rate": 1e-05, "loss": 0.9447, "step": 112030 }, { "epoch": 99.23383525243578, "grad_norm": 0.208413764834404, "learning_rate": 1e-05, "loss": 0.9776, "step": 112035 }, { "epoch": 99.23826395039858, "grad_norm": 0.2234906703233719, "learning_rate": 1e-05, "loss": 0.9477, "step": 112040 }, { "epoch": 99.24269264836138, "grad_norm": 0.21417900919914246, "learning_rate": 1e-05, "loss": 0.9976, "step": 112045 }, { "epoch": 99.24712134632418, "grad_norm": 0.23505137860774994, "learning_rate": 1e-05, "loss": 0.9573, "step": 112050 }, { "epoch": 99.25155004428699, "grad_norm": 0.2561967670917511, "learning_rate": 1e-05, "loss": 0.9643, "step": 112055 }, { "epoch": 99.25597874224978, "grad_norm": 0.322628378868103, "learning_rate": 1e-05, "loss": 0.9659, "step": 112060 }, { "epoch": 99.26040744021257, "grad_norm": 0.263893723487854, "learning_rate": 1e-05, "loss": 1.0116, "step": 112065 }, { "epoch": 99.26483613817538, "grad_norm": 0.21519190073013306, "learning_rate": 1e-05, "loss": 0.9539, "step": 112070 }, { "epoch": 99.26926483613818, "grad_norm": 0.2471558153629303, "learning_rate": 1e-05, "loss": 0.9166, "step": 112075 }, { "epoch": 99.27369353410097, "grad_norm": 0.2374553233385086, "learning_rate": 1e-05, "loss": 0.9997, "step": 112080 }, { "epoch": 99.27812223206378, "grad_norm": 0.21322056651115417, "learning_rate": 1e-05, "loss": 0.9087, "step": 112085 }, { "epoch": 99.28255093002657, "grad_norm": 0.22902484238147736, "learning_rate": 1e-05, "loss": 0.9987, "step": 112090 }, { "epoch": 99.28697962798937, "grad_norm": 0.25176161527633667, "learning_rate": 1e-05, "loss": 0.9427, "step": 112095 }, { "epoch": 99.29140832595218, "grad_norm": 0.27048763632774353, "learning_rate": 1e-05, "loss": 0.9079, "step": 112100 }, { "epoch": 99.29583702391497, "grad_norm": 0.21228544414043427, "learning_rate": 1e-05, "loss": 0.928, "step": 112105 }, { "epoch": 99.30026572187776, "grad_norm": 0.23203589022159576, "learning_rate": 1e-05, "loss": 0.9279, "step": 112110 }, { "epoch": 99.30469441984057, "grad_norm": 0.24339962005615234, "learning_rate": 1e-05, "loss": 0.9438, "step": 112115 }, { "epoch": 99.30912311780337, "grad_norm": 0.25905856490135193, "learning_rate": 1e-05, "loss": 1.0102, "step": 112120 }, { "epoch": 99.31355181576616, "grad_norm": 0.25077489018440247, "learning_rate": 1e-05, "loss": 0.9287, "step": 112125 }, { "epoch": 99.31798051372897, "grad_norm": 0.2527613937854767, "learning_rate": 1e-05, "loss": 0.9861, "step": 112130 }, { "epoch": 99.32240921169176, "grad_norm": 0.2319108545780182, "learning_rate": 1e-05, "loss": 0.9445, "step": 112135 }, { "epoch": 99.32683790965456, "grad_norm": 0.21724678575992584, "learning_rate": 1e-05, "loss": 0.936, "step": 112140 }, { "epoch": 99.33126660761737, "grad_norm": 0.2282632291316986, "learning_rate": 1e-05, "loss": 0.9216, "step": 112145 }, { "epoch": 99.33569530558016, "grad_norm": 0.27804258465766907, "learning_rate": 1e-05, "loss": 0.9684, "step": 112150 }, { "epoch": 99.34012400354295, "grad_norm": 0.2042091190814972, "learning_rate": 1e-05, "loss": 0.895, "step": 112155 }, { "epoch": 99.34455270150576, "grad_norm": 0.23103472590446472, "learning_rate": 1e-05, "loss": 0.9847, "step": 112160 }, { "epoch": 99.34898139946856, "grad_norm": 0.24889393150806427, "learning_rate": 1e-05, "loss": 0.9139, "step": 112165 }, { "epoch": 99.35341009743135, "grad_norm": 0.2449347823858261, "learning_rate": 1e-05, "loss": 0.9778, "step": 112170 }, { "epoch": 99.35783879539416, "grad_norm": 0.22189085185527802, "learning_rate": 1e-05, "loss": 0.9187, "step": 112175 }, { "epoch": 99.36226749335695, "grad_norm": 0.2285684198141098, "learning_rate": 1e-05, "loss": 0.996, "step": 112180 }, { "epoch": 99.36669619131975, "grad_norm": 0.21446222066879272, "learning_rate": 1e-05, "loss": 0.9572, "step": 112185 }, { "epoch": 99.37112488928256, "grad_norm": 0.2525879144668579, "learning_rate": 1e-05, "loss": 0.9396, "step": 112190 }, { "epoch": 99.37555358724535, "grad_norm": 0.2445988804101944, "learning_rate": 1e-05, "loss": 0.8767, "step": 112195 }, { "epoch": 99.37998228520814, "grad_norm": 0.21096929907798767, "learning_rate": 1e-05, "loss": 0.9474, "step": 112200 }, { "epoch": 99.38441098317095, "grad_norm": 0.24791206419467926, "learning_rate": 1e-05, "loss": 0.9723, "step": 112205 }, { "epoch": 99.38883968113375, "grad_norm": 0.21883061528205872, "learning_rate": 1e-05, "loss": 0.9689, "step": 112210 }, { "epoch": 99.39326837909654, "grad_norm": 0.23737597465515137, "learning_rate": 1e-05, "loss": 0.908, "step": 112215 }, { "epoch": 99.39769707705935, "grad_norm": 0.2720600664615631, "learning_rate": 1e-05, "loss": 0.9186, "step": 112220 }, { "epoch": 99.40212577502214, "grad_norm": 0.21748831868171692, "learning_rate": 1e-05, "loss": 1.002, "step": 112225 }, { "epoch": 99.40655447298494, "grad_norm": 0.23587162792682648, "learning_rate": 1e-05, "loss": 0.9685, "step": 112230 }, { "epoch": 99.41098317094774, "grad_norm": 0.22226890921592712, "learning_rate": 1e-05, "loss": 0.9153, "step": 112235 }, { "epoch": 99.41541186891054, "grad_norm": 0.2393627166748047, "learning_rate": 1e-05, "loss": 0.8845, "step": 112240 }, { "epoch": 99.41984056687333, "grad_norm": 0.2564423978328705, "learning_rate": 1e-05, "loss": 0.9091, "step": 112245 }, { "epoch": 99.42426926483614, "grad_norm": 0.22174178063869476, "learning_rate": 1e-05, "loss": 1.0151, "step": 112250 }, { "epoch": 99.42869796279894, "grad_norm": 0.23430350422859192, "learning_rate": 1e-05, "loss": 0.9137, "step": 112255 }, { "epoch": 99.43312666076173, "grad_norm": 0.22487905621528625, "learning_rate": 1e-05, "loss": 0.9852, "step": 112260 }, { "epoch": 99.43755535872454, "grad_norm": 0.22497770190238953, "learning_rate": 1e-05, "loss": 0.8792, "step": 112265 }, { "epoch": 99.44198405668733, "grad_norm": 0.23379693925380707, "learning_rate": 1e-05, "loss": 0.9724, "step": 112270 }, { "epoch": 99.44641275465013, "grad_norm": 0.22368568181991577, "learning_rate": 1e-05, "loss": 0.964, "step": 112275 }, { "epoch": 99.45084145261293, "grad_norm": 0.2857246994972229, "learning_rate": 1e-05, "loss": 0.9872, "step": 112280 }, { "epoch": 99.45527015057573, "grad_norm": 0.22969003021717072, "learning_rate": 1e-05, "loss": 0.9663, "step": 112285 }, { "epoch": 99.45969884853854, "grad_norm": 0.2787075936794281, "learning_rate": 1e-05, "loss": 0.9694, "step": 112290 }, { "epoch": 99.46412754650133, "grad_norm": 0.22288420796394348, "learning_rate": 1e-05, "loss": 0.9825, "step": 112295 }, { "epoch": 99.46855624446412, "grad_norm": 0.234920933842659, "learning_rate": 1e-05, "loss": 1.0295, "step": 112300 }, { "epoch": 99.47298494242693, "grad_norm": 0.23380663990974426, "learning_rate": 1e-05, "loss": 0.9801, "step": 112305 }, { "epoch": 99.47741364038973, "grad_norm": 0.216245636343956, "learning_rate": 1e-05, "loss": 0.9718, "step": 112310 }, { "epoch": 99.48184233835252, "grad_norm": 0.19571220874786377, "learning_rate": 1e-05, "loss": 0.9275, "step": 112315 }, { "epoch": 99.48627103631533, "grad_norm": 0.29585444927215576, "learning_rate": 1e-05, "loss": 0.9914, "step": 112320 }, { "epoch": 99.49069973427812, "grad_norm": 0.2280639111995697, "learning_rate": 1e-05, "loss": 0.9618, "step": 112325 }, { "epoch": 99.49512843224092, "grad_norm": 0.24458849430084229, "learning_rate": 1e-05, "loss": 0.9298, "step": 112330 }, { "epoch": 99.49955713020373, "grad_norm": 0.2576400339603424, "learning_rate": 1e-05, "loss": 0.9656, "step": 112335 }, { "epoch": 99.50398582816652, "grad_norm": 0.28934964537620544, "learning_rate": 1e-05, "loss": 0.9532, "step": 112340 }, { "epoch": 99.50841452612931, "grad_norm": 0.33302006125450134, "learning_rate": 1e-05, "loss": 0.9409, "step": 112345 }, { "epoch": 99.51284322409212, "grad_norm": 0.26159700751304626, "learning_rate": 1e-05, "loss": 0.907, "step": 112350 }, { "epoch": 99.51727192205492, "grad_norm": 0.22977440059185028, "learning_rate": 1e-05, "loss": 0.9532, "step": 112355 }, { "epoch": 99.52170062001771, "grad_norm": 0.23559938371181488, "learning_rate": 1e-05, "loss": 0.9889, "step": 112360 }, { "epoch": 99.52612931798052, "grad_norm": 0.24109657108783722, "learning_rate": 1e-05, "loss": 0.9309, "step": 112365 }, { "epoch": 99.53055801594331, "grad_norm": 0.252225786447525, "learning_rate": 1e-05, "loss": 0.8816, "step": 112370 }, { "epoch": 99.53498671390611, "grad_norm": 0.23753182590007782, "learning_rate": 1e-05, "loss": 0.9734, "step": 112375 }, { "epoch": 99.53941541186892, "grad_norm": 0.25965628027915955, "learning_rate": 1e-05, "loss": 0.9302, "step": 112380 }, { "epoch": 99.54384410983171, "grad_norm": 0.246441051363945, "learning_rate": 1e-05, "loss": 0.9417, "step": 112385 }, { "epoch": 99.5482728077945, "grad_norm": 0.25906872749328613, "learning_rate": 1e-05, "loss": 0.8966, "step": 112390 }, { "epoch": 99.55270150575731, "grad_norm": 0.2492685765028, "learning_rate": 1e-05, "loss": 0.9741, "step": 112395 }, { "epoch": 99.5571302037201, "grad_norm": 0.22617076337337494, "learning_rate": 1e-05, "loss": 0.8938, "step": 112400 }, { "epoch": 99.5615589016829, "grad_norm": 0.20683042705059052, "learning_rate": 1e-05, "loss": 0.9327, "step": 112405 }, { "epoch": 99.56598759964571, "grad_norm": 0.24301917850971222, "learning_rate": 1e-05, "loss": 0.9243, "step": 112410 }, { "epoch": 99.5704162976085, "grad_norm": 0.23414640128612518, "learning_rate": 1e-05, "loss": 0.9106, "step": 112415 }, { "epoch": 99.5748449955713, "grad_norm": 0.22861480712890625, "learning_rate": 1e-05, "loss": 0.9969, "step": 112420 }, { "epoch": 99.5792736935341, "grad_norm": 0.227803036570549, "learning_rate": 1e-05, "loss": 0.9774, "step": 112425 }, { "epoch": 99.5837023914969, "grad_norm": 0.2459019273519516, "learning_rate": 1e-05, "loss": 0.925, "step": 112430 }, { "epoch": 99.5881310894597, "grad_norm": 0.21924693882465363, "learning_rate": 1e-05, "loss": 0.9497, "step": 112435 }, { "epoch": 99.5925597874225, "grad_norm": 0.21996010839939117, "learning_rate": 1e-05, "loss": 0.9886, "step": 112440 }, { "epoch": 99.5969884853853, "grad_norm": 0.20834368467330933, "learning_rate": 1e-05, "loss": 0.9546, "step": 112445 }, { "epoch": 99.60141718334809, "grad_norm": 0.274662047624588, "learning_rate": 1e-05, "loss": 0.9511, "step": 112450 }, { "epoch": 99.6058458813109, "grad_norm": 0.22213467955589294, "learning_rate": 1e-05, "loss": 0.9247, "step": 112455 }, { "epoch": 99.61027457927369, "grad_norm": 0.26723361015319824, "learning_rate": 1e-05, "loss": 0.9553, "step": 112460 }, { "epoch": 99.61470327723649, "grad_norm": 0.20168496668338776, "learning_rate": 1e-05, "loss": 0.9796, "step": 112465 }, { "epoch": 99.6191319751993, "grad_norm": 0.24060934782028198, "learning_rate": 1e-05, "loss": 0.9719, "step": 112470 }, { "epoch": 99.62356067316209, "grad_norm": 0.24771174788475037, "learning_rate": 1e-05, "loss": 0.884, "step": 112475 }, { "epoch": 99.62798937112488, "grad_norm": 0.24702011048793793, "learning_rate": 1e-05, "loss": 0.9871, "step": 112480 }, { "epoch": 99.63241806908769, "grad_norm": 0.20212148129940033, "learning_rate": 1e-05, "loss": 0.885, "step": 112485 }, { "epoch": 99.63684676705049, "grad_norm": 0.2540283501148224, "learning_rate": 1e-05, "loss": 0.9958, "step": 112490 }, { "epoch": 99.64127546501328, "grad_norm": 0.23832114040851593, "learning_rate": 1e-05, "loss": 0.9773, "step": 112495 }, { "epoch": 99.64570416297609, "grad_norm": 0.273372083902359, "learning_rate": 1e-05, "loss": 0.9744, "step": 112500 }, { "epoch": 99.65013286093888, "grad_norm": 0.2506163418292999, "learning_rate": 1e-05, "loss": 0.9893, "step": 112505 }, { "epoch": 99.65456155890168, "grad_norm": 0.23988185822963715, "learning_rate": 1e-05, "loss": 0.9775, "step": 112510 }, { "epoch": 99.65899025686448, "grad_norm": 0.2652115821838379, "learning_rate": 1e-05, "loss": 0.9531, "step": 112515 }, { "epoch": 99.66341895482728, "grad_norm": 0.25321099162101746, "learning_rate": 1e-05, "loss": 0.9647, "step": 112520 }, { "epoch": 99.66784765279007, "grad_norm": 0.2707461714744568, "learning_rate": 1e-05, "loss": 0.9669, "step": 112525 }, { "epoch": 99.67227635075288, "grad_norm": 0.3014930784702301, "learning_rate": 1e-05, "loss": 0.9861, "step": 112530 }, { "epoch": 99.67670504871568, "grad_norm": 0.26914748549461365, "learning_rate": 1e-05, "loss": 0.9641, "step": 112535 }, { "epoch": 99.68113374667848, "grad_norm": 0.25054702162742615, "learning_rate": 1e-05, "loss": 1.0029, "step": 112540 }, { "epoch": 99.68556244464128, "grad_norm": 0.22802293300628662, "learning_rate": 1e-05, "loss": 0.9743, "step": 112545 }, { "epoch": 99.68999114260407, "grad_norm": 0.2554066777229309, "learning_rate": 1e-05, "loss": 0.9179, "step": 112550 }, { "epoch": 99.69441984056688, "grad_norm": 0.22374007105827332, "learning_rate": 1e-05, "loss": 1.0013, "step": 112555 }, { "epoch": 99.69884853852967, "grad_norm": 0.21790727972984314, "learning_rate": 1e-05, "loss": 0.915, "step": 112560 }, { "epoch": 99.70327723649247, "grad_norm": 0.2810305655002594, "learning_rate": 1e-05, "loss": 0.9575, "step": 112565 }, { "epoch": 99.70770593445528, "grad_norm": 0.21818692982196808, "learning_rate": 1e-05, "loss": 0.9254, "step": 112570 }, { "epoch": 99.71213463241807, "grad_norm": 0.22811481356620789, "learning_rate": 1e-05, "loss": 0.9336, "step": 112575 }, { "epoch": 99.71656333038086, "grad_norm": 0.21748724579811096, "learning_rate": 1e-05, "loss": 0.9446, "step": 112580 }, { "epoch": 99.72099202834367, "grad_norm": 0.23890122771263123, "learning_rate": 1e-05, "loss": 0.9124, "step": 112585 }, { "epoch": 99.72542072630647, "grad_norm": 0.23146294057369232, "learning_rate": 1e-05, "loss": 1.0018, "step": 112590 }, { "epoch": 99.72984942426926, "grad_norm": 0.22594179213047028, "learning_rate": 1e-05, "loss": 0.9204, "step": 112595 }, { "epoch": 99.73427812223207, "grad_norm": 0.26333120465278625, "learning_rate": 1e-05, "loss": 0.9954, "step": 112600 }, { "epoch": 99.73870682019486, "grad_norm": 0.2790357768535614, "learning_rate": 1e-05, "loss": 0.9807, "step": 112605 }, { "epoch": 99.74313551815766, "grad_norm": 0.24205732345581055, "learning_rate": 1e-05, "loss": 0.9401, "step": 112610 }, { "epoch": 99.74756421612047, "grad_norm": 0.2575497031211853, "learning_rate": 1e-05, "loss": 0.9992, "step": 112615 }, { "epoch": 99.75199291408326, "grad_norm": 0.26168960332870483, "learning_rate": 1e-05, "loss": 0.9391, "step": 112620 }, { "epoch": 99.75642161204605, "grad_norm": 0.25716209411621094, "learning_rate": 1e-05, "loss": 0.919, "step": 112625 }, { "epoch": 99.76085031000886, "grad_norm": 0.24601739645004272, "learning_rate": 1e-05, "loss": 1.0241, "step": 112630 }, { "epoch": 99.76527900797166, "grad_norm": 0.2519760727882385, "learning_rate": 1e-05, "loss": 0.9403, "step": 112635 }, { "epoch": 99.76970770593445, "grad_norm": 0.23660080134868622, "learning_rate": 1e-05, "loss": 0.9744, "step": 112640 }, { "epoch": 99.77413640389726, "grad_norm": 0.2316771298646927, "learning_rate": 1e-05, "loss": 0.9763, "step": 112645 }, { "epoch": 99.77856510186005, "grad_norm": 0.208352729678154, "learning_rate": 1e-05, "loss": 0.953, "step": 112650 }, { "epoch": 99.78299379982285, "grad_norm": 0.1979127824306488, "learning_rate": 1e-05, "loss": 0.9153, "step": 112655 }, { "epoch": 99.78742249778566, "grad_norm": 0.2557685971260071, "learning_rate": 1e-05, "loss": 0.937, "step": 112660 }, { "epoch": 99.79185119574845, "grad_norm": 0.26532456278800964, "learning_rate": 1e-05, "loss": 0.9622, "step": 112665 }, { "epoch": 99.79627989371124, "grad_norm": 0.274320125579834, "learning_rate": 1e-05, "loss": 0.9422, "step": 112670 }, { "epoch": 99.80070859167405, "grad_norm": 0.21255089342594147, "learning_rate": 1e-05, "loss": 0.9428, "step": 112675 }, { "epoch": 99.80513728963685, "grad_norm": 0.2488599419593811, "learning_rate": 1e-05, "loss": 0.9351, "step": 112680 }, { "epoch": 99.80956598759964, "grad_norm": 0.2480822503566742, "learning_rate": 1e-05, "loss": 0.8982, "step": 112685 }, { "epoch": 99.81399468556245, "grad_norm": 0.21427126228809357, "learning_rate": 1e-05, "loss": 0.9398, "step": 112690 }, { "epoch": 99.81842338352524, "grad_norm": 0.24526908993721008, "learning_rate": 1e-05, "loss": 0.9645, "step": 112695 }, { "epoch": 99.82285208148804, "grad_norm": 0.2674933671951294, "learning_rate": 1e-05, "loss": 0.9569, "step": 112700 }, { "epoch": 99.82728077945085, "grad_norm": 0.23999366164207458, "learning_rate": 1e-05, "loss": 0.9212, "step": 112705 }, { "epoch": 99.83170947741364, "grad_norm": 0.2285846322774887, "learning_rate": 1e-05, "loss": 0.9642, "step": 112710 }, { "epoch": 99.83613817537643, "grad_norm": 0.2737208306789398, "learning_rate": 1e-05, "loss": 0.946, "step": 112715 }, { "epoch": 99.84056687333924, "grad_norm": 0.23447902500629425, "learning_rate": 1e-05, "loss": 0.9794, "step": 112720 }, { "epoch": 99.84499557130204, "grad_norm": 0.22516512870788574, "learning_rate": 1e-05, "loss": 0.9601, "step": 112725 }, { "epoch": 99.84942426926483, "grad_norm": 0.24350744485855103, "learning_rate": 1e-05, "loss": 0.9531, "step": 112730 }, { "epoch": 99.85385296722764, "grad_norm": 0.23664674162864685, "learning_rate": 1e-05, "loss": 0.9145, "step": 112735 }, { "epoch": 99.85828166519043, "grad_norm": 0.22188521921634674, "learning_rate": 1e-05, "loss": 0.9249, "step": 112740 }, { "epoch": 99.86271036315323, "grad_norm": 0.23321318626403809, "learning_rate": 1e-05, "loss": 0.9664, "step": 112745 }, { "epoch": 99.86713906111603, "grad_norm": 0.19657748937606812, "learning_rate": 1e-05, "loss": 0.9016, "step": 112750 }, { "epoch": 99.87156775907883, "grad_norm": 0.22899463772773743, "learning_rate": 1e-05, "loss": 0.9513, "step": 112755 }, { "epoch": 99.87599645704162, "grad_norm": 0.21229495108127594, "learning_rate": 1e-05, "loss": 0.9538, "step": 112760 }, { "epoch": 99.88042515500443, "grad_norm": 0.21518702805042267, "learning_rate": 1e-05, "loss": 0.897, "step": 112765 }, { "epoch": 99.88485385296723, "grad_norm": 0.22340236604213715, "learning_rate": 1e-05, "loss": 0.9623, "step": 112770 }, { "epoch": 99.88928255093003, "grad_norm": 0.2334798276424408, "learning_rate": 1e-05, "loss": 0.9365, "step": 112775 }, { "epoch": 99.89371124889283, "grad_norm": 0.24507346749305725, "learning_rate": 1e-05, "loss": 0.9744, "step": 112780 }, { "epoch": 99.89813994685562, "grad_norm": 0.23902076482772827, "learning_rate": 1e-05, "loss": 0.957, "step": 112785 }, { "epoch": 99.90256864481843, "grad_norm": 0.19866837561130524, "learning_rate": 1e-05, "loss": 0.9494, "step": 112790 }, { "epoch": 99.90699734278122, "grad_norm": 0.198320671916008, "learning_rate": 1e-05, "loss": 0.9916, "step": 112795 }, { "epoch": 99.91142604074402, "grad_norm": 0.23699766397476196, "learning_rate": 1e-05, "loss": 0.9839, "step": 112800 }, { "epoch": 99.91585473870683, "grad_norm": 0.22235526144504547, "learning_rate": 1e-05, "loss": 0.9812, "step": 112805 }, { "epoch": 99.92028343666962, "grad_norm": 0.2261887937784195, "learning_rate": 1e-05, "loss": 0.9797, "step": 112810 }, { "epoch": 99.92471213463241, "grad_norm": 0.296207457780838, "learning_rate": 1e-05, "loss": 0.9708, "step": 112815 }, { "epoch": 99.92914083259522, "grad_norm": 0.2781381607055664, "learning_rate": 1e-05, "loss": 0.9372, "step": 112820 }, { "epoch": 99.93356953055802, "grad_norm": 0.27711695432662964, "learning_rate": 1e-05, "loss": 0.8998, "step": 112825 }, { "epoch": 99.93799822852081, "grad_norm": 0.2417774349451065, "learning_rate": 1e-05, "loss": 0.9276, "step": 112830 }, { "epoch": 99.94242692648362, "grad_norm": 0.2478482574224472, "learning_rate": 1e-05, "loss": 0.9142, "step": 112835 }, { "epoch": 99.94685562444641, "grad_norm": 0.2155621200799942, "learning_rate": 1e-05, "loss": 0.9532, "step": 112840 }, { "epoch": 99.95128432240921, "grad_norm": 0.23429962992668152, "learning_rate": 1e-05, "loss": 0.986, "step": 112845 }, { "epoch": 99.95571302037202, "grad_norm": 0.24005959928035736, "learning_rate": 1e-05, "loss": 0.9488, "step": 112850 }, { "epoch": 99.96014171833481, "grad_norm": 0.262634813785553, "learning_rate": 1e-05, "loss": 0.9846, "step": 112855 }, { "epoch": 99.9645704162976, "grad_norm": 0.17670851945877075, "learning_rate": 1e-05, "loss": 0.9217, "step": 112860 }, { "epoch": 99.96899911426041, "grad_norm": 0.21186046302318573, "learning_rate": 1e-05, "loss": 0.9992, "step": 112865 }, { "epoch": 99.9734278122232, "grad_norm": 0.23945198953151703, "learning_rate": 1e-05, "loss": 0.9768, "step": 112870 }, { "epoch": 99.977856510186, "grad_norm": 0.22392725944519043, "learning_rate": 1e-05, "loss": 0.9533, "step": 112875 }, { "epoch": 99.98228520814881, "grad_norm": 0.2557377219200134, "learning_rate": 1e-05, "loss": 0.9783, "step": 112880 }, { "epoch": 99.9867139061116, "grad_norm": 0.23206056654453278, "learning_rate": 1e-05, "loss": 1.0533, "step": 112885 }, { "epoch": 99.9911426040744, "grad_norm": 0.23726427555084229, "learning_rate": 1e-05, "loss": 0.9674, "step": 112890 }, { "epoch": 99.9955713020372, "grad_norm": 0.2533961832523346, "learning_rate": 1e-05, "loss": 0.9782, "step": 112895 }, { "epoch": 100.0, "grad_norm": 0.25743329524993896, "learning_rate": 1e-05, "loss": 0.9841, "step": 112900 }, { "epoch": 100.0, "step": 112900, "total_flos": 1.1509983892450836e+19, "train_loss": 0.9787479164116778, "train_runtime": 471297.9641, "train_samples_per_second": 30.672, "train_steps_per_second": 0.24 } ], "logging_steps": 5, "max_steps": 112900, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 12000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1509983892450836e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }