PyVision-Video-7B-SFT / trainer_state.json
stzhao's picture
Upload folder using huggingface_hub
0d9c830 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 6885,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0014524328249818446,
"grad_norm": 4.328955480739728,
"learning_rate": 1.3062409288824383e-07,
"loss": 0.9607,
"step": 10
},
{
"epoch": 0.002904865649963689,
"grad_norm": 4.469323164876104,
"learning_rate": 2.757619738751814e-07,
"loss": 0.9859,
"step": 20
},
{
"epoch": 0.004357298474945534,
"grad_norm": 4.000416594025176,
"learning_rate": 4.2089985486211904e-07,
"loss": 0.9872,
"step": 30
},
{
"epoch": 0.005809731299927378,
"grad_norm": 3.1566001029759914,
"learning_rate": 5.660377358490567e-07,
"loss": 0.9191,
"step": 40
},
{
"epoch": 0.007262164124909223,
"grad_norm": 2.000776925354802,
"learning_rate": 7.111756168359943e-07,
"loss": 0.866,
"step": 50
},
{
"epoch": 0.008714596949891068,
"grad_norm": 2.03383269865318,
"learning_rate": 8.563134978229319e-07,
"loss": 0.8475,
"step": 60
},
{
"epoch": 0.010167029774872912,
"grad_norm": 1.981671850063017,
"learning_rate": 1.0014513788098695e-06,
"loss": 0.8145,
"step": 70
},
{
"epoch": 0.011619462599854757,
"grad_norm": 1.9935447101504142,
"learning_rate": 1.146589259796807e-06,
"loss": 0.7874,
"step": 80
},
{
"epoch": 0.013071895424836602,
"grad_norm": 1.696794144473072,
"learning_rate": 1.2917271407837448e-06,
"loss": 0.7606,
"step": 90
},
{
"epoch": 0.014524328249818447,
"grad_norm": 1.8441704167155635,
"learning_rate": 1.4368650217706823e-06,
"loss": 0.7505,
"step": 100
},
{
"epoch": 0.01597676107480029,
"grad_norm": 1.6167640330505846,
"learning_rate": 1.5820029027576197e-06,
"loss": 0.7432,
"step": 110
},
{
"epoch": 0.017429193899782137,
"grad_norm": 1.7310300613256226,
"learning_rate": 1.7271407837445576e-06,
"loss": 0.7502,
"step": 120
},
{
"epoch": 0.01888162672476398,
"grad_norm": 1.5504171157690307,
"learning_rate": 1.872278664731495e-06,
"loss": 0.7075,
"step": 130
},
{
"epoch": 0.020334059549745823,
"grad_norm": 1.5001595551333269,
"learning_rate": 2.0174165457184327e-06,
"loss": 0.7242,
"step": 140
},
{
"epoch": 0.02178649237472767,
"grad_norm": 1.7680255328873922,
"learning_rate": 2.1625544267053704e-06,
"loss": 0.7299,
"step": 150
},
{
"epoch": 0.023238925199709513,
"grad_norm": 1.9776874021989124,
"learning_rate": 2.307692307692308e-06,
"loss": 0.7074,
"step": 160
},
{
"epoch": 0.024691358024691357,
"grad_norm": 1.645294675336186,
"learning_rate": 2.4528301886792453e-06,
"loss": 0.7003,
"step": 170
},
{
"epoch": 0.026143790849673203,
"grad_norm": 1.903626800669526,
"learning_rate": 2.597968069666183e-06,
"loss": 0.6935,
"step": 180
},
{
"epoch": 0.027596223674655047,
"grad_norm": 1.6296522016767983,
"learning_rate": 2.7431059506531207e-06,
"loss": 0.7099,
"step": 190
},
{
"epoch": 0.029048656499636893,
"grad_norm": 1.5624745122869332,
"learning_rate": 2.8882438316400583e-06,
"loss": 0.7082,
"step": 200
},
{
"epoch": 0.030501089324618737,
"grad_norm": 1.5327148829437787,
"learning_rate": 3.033381712626996e-06,
"loss": 0.6847,
"step": 210
},
{
"epoch": 0.03195352214960058,
"grad_norm": 1.4217156007581908,
"learning_rate": 3.1785195936139337e-06,
"loss": 0.6997,
"step": 220
},
{
"epoch": 0.03340595497458242,
"grad_norm": 1.678714535521671,
"learning_rate": 3.323657474600871e-06,
"loss": 0.6922,
"step": 230
},
{
"epoch": 0.034858387799564274,
"grad_norm": 1.6893028132334575,
"learning_rate": 3.4687953555878086e-06,
"loss": 0.6764,
"step": 240
},
{
"epoch": 0.03631082062454612,
"grad_norm": 1.6842923668045748,
"learning_rate": 3.6139332365747467e-06,
"loss": 0.6838,
"step": 250
},
{
"epoch": 0.03776325344952796,
"grad_norm": 2.0758637079489306,
"learning_rate": 3.759071117561684e-06,
"loss": 0.6961,
"step": 260
},
{
"epoch": 0.0392156862745098,
"grad_norm": 1.651886885559497,
"learning_rate": 3.904208998548621e-06,
"loss": 0.6619,
"step": 270
},
{
"epoch": 0.04066811909949165,
"grad_norm": 1.6813735734416895,
"learning_rate": 4.049346879535559e-06,
"loss": 0.691,
"step": 280
},
{
"epoch": 0.04212055192447349,
"grad_norm": 1.8001370749006687,
"learning_rate": 4.194484760522497e-06,
"loss": 0.6646,
"step": 290
},
{
"epoch": 0.04357298474945534,
"grad_norm": 1.8255351447030483,
"learning_rate": 4.339622641509435e-06,
"loss": 0.6595,
"step": 300
},
{
"epoch": 0.04502541757443718,
"grad_norm": 1.7918481140936697,
"learning_rate": 4.484760522496372e-06,
"loss": 0.6555,
"step": 310
},
{
"epoch": 0.04647785039941903,
"grad_norm": 1.6697318257583398,
"learning_rate": 4.629898403483309e-06,
"loss": 0.6734,
"step": 320
},
{
"epoch": 0.04793028322440087,
"grad_norm": 1.5656777878920214,
"learning_rate": 4.775036284470247e-06,
"loss": 0.6511,
"step": 330
},
{
"epoch": 0.04938271604938271,
"grad_norm": 1.6515736055504289,
"learning_rate": 4.920174165457185e-06,
"loss": 0.6651,
"step": 340
},
{
"epoch": 0.050835148874364564,
"grad_norm": 1.6517233906536315,
"learning_rate": 5.065312046444122e-06,
"loss": 0.665,
"step": 350
},
{
"epoch": 0.05228758169934641,
"grad_norm": 1.6987223199576384,
"learning_rate": 5.210449927431061e-06,
"loss": 0.6632,
"step": 360
},
{
"epoch": 0.05374001452432825,
"grad_norm": 1.578744968443496,
"learning_rate": 5.355587808417998e-06,
"loss": 0.665,
"step": 370
},
{
"epoch": 0.05519244734931009,
"grad_norm": 1.4975426293081397,
"learning_rate": 5.500725689404935e-06,
"loss": 0.6511,
"step": 380
},
{
"epoch": 0.05664488017429194,
"grad_norm": 1.7386717568110297,
"learning_rate": 5.645863570391873e-06,
"loss": 0.6676,
"step": 390
},
{
"epoch": 0.05809731299927379,
"grad_norm": 1.5916583497500596,
"learning_rate": 5.7910014513788105e-06,
"loss": 0.6635,
"step": 400
},
{
"epoch": 0.05954974582425563,
"grad_norm": 1.6931617934865184,
"learning_rate": 5.936139332365748e-06,
"loss": 0.6668,
"step": 410
},
{
"epoch": 0.06100217864923747,
"grad_norm": 1.5616372247201953,
"learning_rate": 6.081277213352685e-06,
"loss": 0.6685,
"step": 420
},
{
"epoch": 0.06245461147421932,
"grad_norm": 1.5424914283941253,
"learning_rate": 6.226415094339623e-06,
"loss": 0.659,
"step": 430
},
{
"epoch": 0.06390704429920116,
"grad_norm": 1.6468311050594455,
"learning_rate": 6.37155297532656e-06,
"loss": 0.6453,
"step": 440
},
{
"epoch": 0.06535947712418301,
"grad_norm": 1.5765402125957226,
"learning_rate": 6.5166908563134976e-06,
"loss": 0.6598,
"step": 450
},
{
"epoch": 0.06681190994916485,
"grad_norm": 1.7349394887283642,
"learning_rate": 6.6618287373004365e-06,
"loss": 0.6619,
"step": 460
},
{
"epoch": 0.0682643427741467,
"grad_norm": 1.6385635232751372,
"learning_rate": 6.806966618287374e-06,
"loss": 0.6692,
"step": 470
},
{
"epoch": 0.06971677559912855,
"grad_norm": 1.4945507177883908,
"learning_rate": 6.952104499274311e-06,
"loss": 0.6484,
"step": 480
},
{
"epoch": 0.07116920842411038,
"grad_norm": 1.583857774726375,
"learning_rate": 7.097242380261249e-06,
"loss": 0.657,
"step": 490
},
{
"epoch": 0.07262164124909223,
"grad_norm": 1.8780189334850588,
"learning_rate": 7.242380261248186e-06,
"loss": 0.6601,
"step": 500
},
{
"epoch": 0.07407407407407407,
"grad_norm": 1.5153409007972507,
"learning_rate": 7.387518142235124e-06,
"loss": 0.6542,
"step": 510
},
{
"epoch": 0.07552650689905592,
"grad_norm": 1.5243833834622142,
"learning_rate": 7.532656023222062e-06,
"loss": 0.6476,
"step": 520
},
{
"epoch": 0.07697893972403776,
"grad_norm": 1.6429693792028686,
"learning_rate": 7.677793904208998e-06,
"loss": 0.6451,
"step": 530
},
{
"epoch": 0.0784313725490196,
"grad_norm": 1.802860360098263,
"learning_rate": 7.822931785195936e-06,
"loss": 0.6527,
"step": 540
},
{
"epoch": 0.07988380537400146,
"grad_norm": 1.6594363957156038,
"learning_rate": 7.968069666182874e-06,
"loss": 0.661,
"step": 550
},
{
"epoch": 0.0813362381989833,
"grad_norm": 1.5938255936259151,
"learning_rate": 8.113207547169812e-06,
"loss": 0.6547,
"step": 560
},
{
"epoch": 0.08278867102396514,
"grad_norm": 1.3939924292770436,
"learning_rate": 8.25834542815675e-06,
"loss": 0.6609,
"step": 570
},
{
"epoch": 0.08424110384894698,
"grad_norm": 1.5321796462771227,
"learning_rate": 8.403483309143687e-06,
"loss": 0.6419,
"step": 580
},
{
"epoch": 0.08569353667392883,
"grad_norm": 1.5907007682060863,
"learning_rate": 8.548621190130625e-06,
"loss": 0.625,
"step": 590
},
{
"epoch": 0.08714596949891068,
"grad_norm": 1.6048966671231157,
"learning_rate": 8.693759071117563e-06,
"loss": 0.658,
"step": 600
},
{
"epoch": 0.08859840232389252,
"grad_norm": 1.457751877262412,
"learning_rate": 8.8388969521045e-06,
"loss": 0.6456,
"step": 610
},
{
"epoch": 0.09005083514887437,
"grad_norm": 1.3925725985786772,
"learning_rate": 8.984034833091438e-06,
"loss": 0.6494,
"step": 620
},
{
"epoch": 0.0915032679738562,
"grad_norm": 1.6476815627809678,
"learning_rate": 9.129172714078376e-06,
"loss": 0.6604,
"step": 630
},
{
"epoch": 0.09295570079883805,
"grad_norm": 1.4844043302240553,
"learning_rate": 9.274310595065312e-06,
"loss": 0.6462,
"step": 640
},
{
"epoch": 0.0944081336238199,
"grad_norm": 1.5541257847812342,
"learning_rate": 9.41944847605225e-06,
"loss": 0.6464,
"step": 650
},
{
"epoch": 0.09586056644880174,
"grad_norm": 1.5339956751582804,
"learning_rate": 9.564586357039188e-06,
"loss": 0.6471,
"step": 660
},
{
"epoch": 0.09731299927378359,
"grad_norm": 1.550006983868159,
"learning_rate": 9.709724238026126e-06,
"loss": 0.6519,
"step": 670
},
{
"epoch": 0.09876543209876543,
"grad_norm": 1.298622779401985,
"learning_rate": 9.854862119013063e-06,
"loss": 0.6508,
"step": 680
},
{
"epoch": 0.10021786492374728,
"grad_norm": 1.4545201677417376,
"learning_rate": 1e-05,
"loss": 0.6483,
"step": 690
},
{
"epoch": 0.10167029774872913,
"grad_norm": 1.7514454450540817,
"learning_rate": 9.999935728859667e-06,
"loss": 0.6517,
"step": 700
},
{
"epoch": 0.10312273057371096,
"grad_norm": 1.3010290416328456,
"learning_rate": 9.999742917090981e-06,
"loss": 0.6435,
"step": 710
},
{
"epoch": 0.10457516339869281,
"grad_norm": 1.5222737445349914,
"learning_rate": 9.999421569650833e-06,
"loss": 0.6355,
"step": 720
},
{
"epoch": 0.10602759622367465,
"grad_norm": 1.5758824439402839,
"learning_rate": 9.99897169480057e-06,
"loss": 0.6414,
"step": 730
},
{
"epoch": 0.1074800290486565,
"grad_norm": 1.3245458819453462,
"learning_rate": 9.99839330410578e-06,
"loss": 0.6416,
"step": 740
},
{
"epoch": 0.10893246187363835,
"grad_norm": 1.4753577499137038,
"learning_rate": 9.997686412435996e-06,
"loss": 0.6381,
"step": 750
},
{
"epoch": 0.11038489469862019,
"grad_norm": 1.4578988593383,
"learning_rate": 9.99685103796431e-06,
"loss": 0.6369,
"step": 760
},
{
"epoch": 0.11183732752360204,
"grad_norm": 1.389881220599468,
"learning_rate": 9.99588720216691e-06,
"loss": 0.6622,
"step": 770
},
{
"epoch": 0.11328976034858387,
"grad_norm": 1.2318560606230133,
"learning_rate": 9.994794929822527e-06,
"loss": 0.6279,
"step": 780
},
{
"epoch": 0.11474219317356572,
"grad_norm": 1.355472620629438,
"learning_rate": 9.993574249011797e-06,
"loss": 0.641,
"step": 790
},
{
"epoch": 0.11619462599854757,
"grad_norm": 1.4379602146139996,
"learning_rate": 9.992225191116538e-06,
"loss": 0.6439,
"step": 800
},
{
"epoch": 0.11764705882352941,
"grad_norm": 1.4777958226910466,
"learning_rate": 9.990747790818946e-06,
"loss": 0.6457,
"step": 810
},
{
"epoch": 0.11909949164851126,
"grad_norm": 1.2895229336241503,
"learning_rate": 9.989142086100703e-06,
"loss": 0.6483,
"step": 820
},
{
"epoch": 0.1205519244734931,
"grad_norm": 1.4811460587250382,
"learning_rate": 9.987408118241995e-06,
"loss": 0.6509,
"step": 830
},
{
"epoch": 0.12200435729847495,
"grad_norm": 1.3189208191268318,
"learning_rate": 9.985545931820463e-06,
"loss": 0.6181,
"step": 840
},
{
"epoch": 0.12345679012345678,
"grad_norm": 1.3731300368595278,
"learning_rate": 9.983555574710043e-06,
"loss": 0.6274,
"step": 850
},
{
"epoch": 0.12490922294843863,
"grad_norm": 1.4055775942483093,
"learning_rate": 9.981437098079743e-06,
"loss": 0.6398,
"step": 860
},
{
"epoch": 0.12636165577342048,
"grad_norm": 1.3307192435974602,
"learning_rate": 9.979190556392326e-06,
"loss": 0.6393,
"step": 870
},
{
"epoch": 0.12781408859840232,
"grad_norm": 1.5622917958142868,
"learning_rate": 9.976816007402912e-06,
"loss": 0.6456,
"step": 880
},
{
"epoch": 0.12926652142338416,
"grad_norm": 1.390636406480548,
"learning_rate": 9.974313512157488e-06,
"loss": 0.6288,
"step": 890
},
{
"epoch": 0.13071895424836602,
"grad_norm": 1.4427250843896926,
"learning_rate": 9.971683134991344e-06,
"loss": 0.6266,
"step": 900
},
{
"epoch": 0.13217138707334786,
"grad_norm": 1.4098179198178282,
"learning_rate": 9.968924943527418e-06,
"loss": 0.6411,
"step": 910
},
{
"epoch": 0.1336238198983297,
"grad_norm": 1.4962238363929918,
"learning_rate": 9.96603900867455e-06,
"loss": 0.6315,
"step": 920
},
{
"epoch": 0.13507625272331156,
"grad_norm": 1.3209044251278015,
"learning_rate": 9.963025404625673e-06,
"loss": 0.6423,
"step": 930
},
{
"epoch": 0.1365286855482934,
"grad_norm": 1.39955503516968,
"learning_rate": 9.959884208855893e-06,
"loss": 0.6361,
"step": 940
},
{
"epoch": 0.13798111837327523,
"grad_norm": 1.5348970475105241,
"learning_rate": 9.956615502120504e-06,
"loss": 0.6241,
"step": 950
},
{
"epoch": 0.1394335511982571,
"grad_norm": 1.48874630945738,
"learning_rate": 9.953219368452908e-06,
"loss": 0.631,
"step": 960
},
{
"epoch": 0.14088598402323893,
"grad_norm": 1.310857282598366,
"learning_rate": 9.949695895162464e-06,
"loss": 0.627,
"step": 970
},
{
"epoch": 0.14233841684822077,
"grad_norm": 1.3619342578169393,
"learning_rate": 9.946045172832224e-06,
"loss": 0.6387,
"step": 980
},
{
"epoch": 0.1437908496732026,
"grad_norm": 1.4936986486504984,
"learning_rate": 9.942267295316625e-06,
"loss": 0.6331,
"step": 990
},
{
"epoch": 0.14524328249818447,
"grad_norm": 1.32511584393411,
"learning_rate": 9.938362359739068e-06,
"loss": 0.626,
"step": 1000
},
{
"epoch": 0.1466957153231663,
"grad_norm": 1.3291454266011833,
"learning_rate": 9.934330466489414e-06,
"loss": 0.6451,
"step": 1010
},
{
"epoch": 0.14814814814814814,
"grad_norm": 1.3289648153139675,
"learning_rate": 9.930171719221418e-06,
"loss": 0.6333,
"step": 1020
},
{
"epoch": 0.14960058097313,
"grad_norm": 1.3388955314518605,
"learning_rate": 9.925886224850047e-06,
"loss": 0.6329,
"step": 1030
},
{
"epoch": 0.15105301379811184,
"grad_norm": 1.3788458990043229,
"learning_rate": 9.921474093548748e-06,
"loss": 0.6308,
"step": 1040
},
{
"epoch": 0.15250544662309368,
"grad_norm": 1.2630947233952987,
"learning_rate": 9.916935438746604e-06,
"loss": 0.6366,
"step": 1050
},
{
"epoch": 0.1539578794480755,
"grad_norm": 1.2586848110727198,
"learning_rate": 9.912270377125424e-06,
"loss": 0.6224,
"step": 1060
},
{
"epoch": 0.15541031227305738,
"grad_norm": 1.5648142512317709,
"learning_rate": 9.90747902861674e-06,
"loss": 0.6261,
"step": 1070
},
{
"epoch": 0.1568627450980392,
"grad_norm": 1.477705850244199,
"learning_rate": 9.902561516398723e-06,
"loss": 0.6207,
"step": 1080
},
{
"epoch": 0.15831517792302105,
"grad_norm": 1.2950681154644361,
"learning_rate": 9.897517966893023e-06,
"loss": 0.6218,
"step": 1090
},
{
"epoch": 0.15976761074800291,
"grad_norm": 1.4613516139089748,
"learning_rate": 9.892348509761509e-06,
"loss": 0.6237,
"step": 1100
},
{
"epoch": 0.16122004357298475,
"grad_norm": 1.2641419484176866,
"learning_rate": 9.887053277902943e-06,
"loss": 0.6425,
"step": 1110
},
{
"epoch": 0.1626724763979666,
"grad_norm": 1.2419109246681843,
"learning_rate": 9.881632407449561e-06,
"loss": 0.6423,
"step": 1120
},
{
"epoch": 0.16412490922294845,
"grad_norm": 1.4096648257937974,
"learning_rate": 9.876086037763575e-06,
"loss": 0.6383,
"step": 1130
},
{
"epoch": 0.1655773420479303,
"grad_norm": 1.2574892255736747,
"learning_rate": 9.870414311433585e-06,
"loss": 0.6059,
"step": 1140
},
{
"epoch": 0.16702977487291212,
"grad_norm": 1.2716145459010044,
"learning_rate": 9.86461737427092e-06,
"loss": 0.6098,
"step": 1150
},
{
"epoch": 0.16848220769789396,
"grad_norm": 1.1998298755084313,
"learning_rate": 9.858695375305885e-06,
"loss": 0.6214,
"step": 1160
},
{
"epoch": 0.16993464052287582,
"grad_norm": 1.4281449888166444,
"learning_rate": 9.852648466783927e-06,
"loss": 0.6241,
"step": 1170
},
{
"epoch": 0.17138707334785766,
"grad_norm": 1.4071764477667867,
"learning_rate": 9.84647680416173e-06,
"loss": 0.6474,
"step": 1180
},
{
"epoch": 0.1728395061728395,
"grad_norm": 1.2174453861834778,
"learning_rate": 9.840180546103215e-06,
"loss": 0.6326,
"step": 1190
},
{
"epoch": 0.17429193899782136,
"grad_norm": 1.3029300772595094,
"learning_rate": 9.833759854475453e-06,
"loss": 0.6185,
"step": 1200
},
{
"epoch": 0.1757443718228032,
"grad_norm": 1.271112016193465,
"learning_rate": 9.827214894344514e-06,
"loss": 0.6301,
"step": 1210
},
{
"epoch": 0.17719680464778503,
"grad_norm": 1.2997276991719462,
"learning_rate": 9.82054583397122e-06,
"loss": 0.6317,
"step": 1220
},
{
"epoch": 0.1786492374727669,
"grad_norm": 1.2096030387104992,
"learning_rate": 9.813752844806814e-06,
"loss": 0.6159,
"step": 1230
},
{
"epoch": 0.18010167029774873,
"grad_norm": 1.2973416257944899,
"learning_rate": 9.806836101488561e-06,
"loss": 0.6289,
"step": 1240
},
{
"epoch": 0.18155410312273057,
"grad_norm": 1.3197440048632956,
"learning_rate": 9.799795781835253e-06,
"loss": 0.6088,
"step": 1250
},
{
"epoch": 0.1830065359477124,
"grad_norm": 1.2535036782710556,
"learning_rate": 9.79263206684264e-06,
"loss": 0.6206,
"step": 1260
},
{
"epoch": 0.18445896877269427,
"grad_norm": 1.3190252094745194,
"learning_rate": 9.785345140678775e-06,
"loss": 0.6149,
"step": 1270
},
{
"epoch": 0.1859114015976761,
"grad_norm": 1.3148617882447478,
"learning_rate": 9.777935190679277e-06,
"loss": 0.6134,
"step": 1280
},
{
"epoch": 0.18736383442265794,
"grad_norm": 1.3368521794263946,
"learning_rate": 9.770402407342524e-06,
"loss": 0.6258,
"step": 1290
},
{
"epoch": 0.1888162672476398,
"grad_norm": 1.3941700458180073,
"learning_rate": 9.762746984324743e-06,
"loss": 0.6191,
"step": 1300
},
{
"epoch": 0.19026870007262164,
"grad_norm": 1.3152403546822757,
"learning_rate": 9.754969118435043e-06,
"loss": 0.6446,
"step": 1310
},
{
"epoch": 0.19172113289760348,
"grad_norm": 1.3013626770341264,
"learning_rate": 9.747069009630347e-06,
"loss": 0.6312,
"step": 1320
},
{
"epoch": 0.19317356572258534,
"grad_norm": 1.3966383885583535,
"learning_rate": 9.739046861010255e-06,
"loss": 0.6207,
"step": 1330
},
{
"epoch": 0.19462599854756718,
"grad_norm": 1.1439991746974036,
"learning_rate": 9.730902878811825e-06,
"loss": 0.6144,
"step": 1340
},
{
"epoch": 0.19607843137254902,
"grad_norm": 1.3540894709055364,
"learning_rate": 9.722637272404263e-06,
"loss": 0.6044,
"step": 1350
},
{
"epoch": 0.19753086419753085,
"grad_norm": 1.100639588271217,
"learning_rate": 9.71425025428355e-06,
"loss": 0.6036,
"step": 1360
},
{
"epoch": 0.19898329702251272,
"grad_norm": 1.1874319432290736,
"learning_rate": 9.705742040066977e-06,
"loss": 0.6039,
"step": 1370
},
{
"epoch": 0.20043572984749455,
"grad_norm": 1.1767671647303808,
"learning_rate": 9.697112848487591e-06,
"loss": 0.6376,
"step": 1380
},
{
"epoch": 0.2018881626724764,
"grad_norm": 1.135879944041461,
"learning_rate": 9.688362901388586e-06,
"loss": 0.6035,
"step": 1390
},
{
"epoch": 0.20334059549745825,
"grad_norm": 1.2315910796359388,
"learning_rate": 9.679492423717596e-06,
"loss": 0.6098,
"step": 1400
},
{
"epoch": 0.2047930283224401,
"grad_norm": 1.4949408462288012,
"learning_rate": 9.670501643520904e-06,
"loss": 0.6203,
"step": 1410
},
{
"epoch": 0.20624546114742193,
"grad_norm": 1.3180181445795711,
"learning_rate": 9.66139079193759e-06,
"loss": 0.6286,
"step": 1420
},
{
"epoch": 0.20769789397240376,
"grad_norm": 1.2616556885045909,
"learning_rate": 9.652160103193583e-06,
"loss": 0.6274,
"step": 1430
},
{
"epoch": 0.20915032679738563,
"grad_norm": 1.3174449455574337,
"learning_rate": 9.642809814595637e-06,
"loss": 0.6136,
"step": 1440
},
{
"epoch": 0.21060275962236746,
"grad_norm": 1.296735377133819,
"learning_rate": 9.633340166525238e-06,
"loss": 0.6145,
"step": 1450
},
{
"epoch": 0.2120551924473493,
"grad_norm": 1.2502497833244608,
"learning_rate": 9.62375140243242e-06,
"loss": 0.6031,
"step": 1460
},
{
"epoch": 0.21350762527233116,
"grad_norm": 1.2288830705505374,
"learning_rate": 9.6140437688295e-06,
"loss": 0.6128,
"step": 1470
},
{
"epoch": 0.214960058097313,
"grad_norm": 1.1119473380240397,
"learning_rate": 9.604217515284753e-06,
"loss": 0.6171,
"step": 1480
},
{
"epoch": 0.21641249092229484,
"grad_norm": 1.2070397164389806,
"learning_rate": 9.594272894415986e-06,
"loss": 0.6238,
"step": 1490
},
{
"epoch": 0.2178649237472767,
"grad_norm": 1.3345637205372078,
"learning_rate": 9.584210161884049e-06,
"loss": 0.6163,
"step": 1500
},
{
"epoch": 0.21931735657225854,
"grad_norm": 1.1385043759036517,
"learning_rate": 9.57402957638626e-06,
"loss": 0.6083,
"step": 1510
},
{
"epoch": 0.22076978939724037,
"grad_norm": 1.1936988121465326,
"learning_rate": 9.563731399649756e-06,
"loss": 0.5992,
"step": 1520
},
{
"epoch": 0.2222222222222222,
"grad_norm": 1.4103572503621762,
"learning_rate": 9.553315896424758e-06,
"loss": 0.6054,
"step": 1530
},
{
"epoch": 0.22367465504720407,
"grad_norm": 1.3209719950503893,
"learning_rate": 9.54278333447778e-06,
"loss": 0.596,
"step": 1540
},
{
"epoch": 0.2251270878721859,
"grad_norm": 1.1693016501696898,
"learning_rate": 9.532133984584721e-06,
"loss": 0.6323,
"step": 1550
},
{
"epoch": 0.22657952069716775,
"grad_norm": 1.1691510921859125,
"learning_rate": 9.521368120523931e-06,
"loss": 0.6027,
"step": 1560
},
{
"epoch": 0.2280319535221496,
"grad_norm": 1.2114364957172101,
"learning_rate": 9.510486019069154e-06,
"loss": 0.6245,
"step": 1570
},
{
"epoch": 0.22948438634713145,
"grad_norm": 1.265123327235345,
"learning_rate": 9.499487959982415e-06,
"loss": 0.6189,
"step": 1580
},
{
"epoch": 0.23093681917211328,
"grad_norm": 1.3773059483594046,
"learning_rate": 9.488374226006836e-06,
"loss": 0.6106,
"step": 1590
},
{
"epoch": 0.23238925199709515,
"grad_norm": 1.2737618179619303,
"learning_rate": 9.477145102859357e-06,
"loss": 0.6115,
"step": 1600
},
{
"epoch": 0.23384168482207698,
"grad_norm": 1.3066121502077,
"learning_rate": 9.4658008792234e-06,
"loss": 0.609,
"step": 1610
},
{
"epoch": 0.23529411764705882,
"grad_norm": 1.242518893517758,
"learning_rate": 9.45434184674144e-06,
"loss": 0.6,
"step": 1620
},
{
"epoch": 0.23674655047204066,
"grad_norm": 1.2493334973003818,
"learning_rate": 9.442768300007511e-06,
"loss": 0.6144,
"step": 1630
},
{
"epoch": 0.23819898329702252,
"grad_norm": 1.2775874117960886,
"learning_rate": 9.431080536559631e-06,
"loss": 0.6245,
"step": 1640
},
{
"epoch": 0.23965141612200436,
"grad_norm": 1.247039996382283,
"learning_rate": 9.419278856872154e-06,
"loss": 0.6279,
"step": 1650
},
{
"epoch": 0.2411038489469862,
"grad_norm": 1.302601682600637,
"learning_rate": 9.407363564348047e-06,
"loss": 0.5933,
"step": 1660
},
{
"epoch": 0.24255628177196806,
"grad_norm": 1.431347455463815,
"learning_rate": 9.39533496531108e-06,
"loss": 0.6171,
"step": 1670
},
{
"epoch": 0.2440087145969499,
"grad_norm": 1.2527655662771335,
"learning_rate": 9.38319336899797e-06,
"loss": 0.6099,
"step": 1680
},
{
"epoch": 0.24546114742193173,
"grad_norm": 1.205551788839019,
"learning_rate": 9.370939087550407e-06,
"loss": 0.6077,
"step": 1690
},
{
"epoch": 0.24691358024691357,
"grad_norm": 1.332981320431861,
"learning_rate": 9.358572436007052e-06,
"loss": 0.6126,
"step": 1700
},
{
"epoch": 0.24836601307189543,
"grad_norm": 1.2112905977700383,
"learning_rate": 9.346093732295422e-06,
"loss": 0.6141,
"step": 1710
},
{
"epoch": 0.24981844589687727,
"grad_norm": 1.1741115783770129,
"learning_rate": 9.333503297223725e-06,
"loss": 0.5977,
"step": 1720
},
{
"epoch": 0.2512708787218591,
"grad_norm": 1.2308239868942004,
"learning_rate": 9.320801454472607e-06,
"loss": 0.6213,
"step": 1730
},
{
"epoch": 0.25272331154684097,
"grad_norm": 1.3933258283474292,
"learning_rate": 9.30798853058684e-06,
"loss": 0.6217,
"step": 1740
},
{
"epoch": 0.2541757443718228,
"grad_norm": 1.2467959691205432,
"learning_rate": 9.29506485496691e-06,
"loss": 0.6089,
"step": 1750
},
{
"epoch": 0.25562817719680464,
"grad_norm": 1.106847677662664,
"learning_rate": 9.282030759860566e-06,
"loss": 0.6113,
"step": 1760
},
{
"epoch": 0.2570806100217865,
"grad_norm": 1.225606521070107,
"learning_rate": 9.268886580354272e-06,
"loss": 0.6041,
"step": 1770
},
{
"epoch": 0.2585330428467683,
"grad_norm": 1.1249241718792773,
"learning_rate": 9.255632654364591e-06,
"loss": 0.6112,
"step": 1780
},
{
"epoch": 0.2599854756717502,
"grad_norm": 1.2347205288363368,
"learning_rate": 9.242269322629494e-06,
"loss": 0.6003,
"step": 1790
},
{
"epoch": 0.26143790849673204,
"grad_norm": 1.3040805105750026,
"learning_rate": 9.228796928699613e-06,
"loss": 0.6187,
"step": 1800
},
{
"epoch": 0.26289034132171385,
"grad_norm": 1.4585670240799034,
"learning_rate": 9.215215818929392e-06,
"loss": 0.612,
"step": 1810
},
{
"epoch": 0.2643427741466957,
"grad_norm": 1.0974130075617774,
"learning_rate": 9.201526342468202e-06,
"loss": 0.6124,
"step": 1820
},
{
"epoch": 0.2657952069716776,
"grad_norm": 1.2918051377461068,
"learning_rate": 9.18772885125134e-06,
"loss": 0.6055,
"step": 1830
},
{
"epoch": 0.2672476397966594,
"grad_norm": 1.199609927095931,
"learning_rate": 9.17382369999101e-06,
"loss": 0.6086,
"step": 1840
},
{
"epoch": 0.26870007262164125,
"grad_norm": 1.2736244478450063,
"learning_rate": 9.159811246167182e-06,
"loss": 0.6111,
"step": 1850
},
{
"epoch": 0.2701525054466231,
"grad_norm": 1.2484696326393374,
"learning_rate": 9.14569185001841e-06,
"loss": 0.5951,
"step": 1860
},
{
"epoch": 0.2716049382716049,
"grad_norm": 1.3221301583704237,
"learning_rate": 9.131465874532568e-06,
"loss": 0.5861,
"step": 1870
},
{
"epoch": 0.2730573710965868,
"grad_norm": 1.2578322361866867,
"learning_rate": 9.117133685437524e-06,
"loss": 0.6073,
"step": 1880
},
{
"epoch": 0.27450980392156865,
"grad_norm": 1.3260698149158467,
"learning_rate": 9.102695651191737e-06,
"loss": 0.5838,
"step": 1890
},
{
"epoch": 0.27596223674655046,
"grad_norm": 1.2373193794097532,
"learning_rate": 9.088152142974771e-06,
"loss": 0.6013,
"step": 1900
},
{
"epoch": 0.2774146695715323,
"grad_norm": 1.1997047870357698,
"learning_rate": 9.073503534677773e-06,
"loss": 0.6219,
"step": 1910
},
{
"epoch": 0.2788671023965142,
"grad_norm": 1.2769112952981858,
"learning_rate": 9.058750202893844e-06,
"loss": 0.6052,
"step": 1920
},
{
"epoch": 0.280319535221496,
"grad_norm": 1.2302296498321919,
"learning_rate": 9.04389252690837e-06,
"loss": 0.6124,
"step": 1930
},
{
"epoch": 0.28177196804647786,
"grad_norm": 1.2009594091858158,
"learning_rate": 9.02893088868926e-06,
"loss": 0.604,
"step": 1940
},
{
"epoch": 0.28322440087145967,
"grad_norm": 1.0539872600155336,
"learning_rate": 9.013865672877133e-06,
"loss": 0.6052,
"step": 1950
},
{
"epoch": 0.28467683369644153,
"grad_norm": 1.2561895098497668,
"learning_rate": 8.998697266775433e-06,
"loss": 0.6077,
"step": 1960
},
{
"epoch": 0.2861292665214234,
"grad_norm": 1.2763583417414128,
"learning_rate": 8.98342606034046e-06,
"loss": 0.6059,
"step": 1970
},
{
"epoch": 0.2875816993464052,
"grad_norm": 1.1463184995763767,
"learning_rate": 8.96805244617135e-06,
"loss": 0.6183,
"step": 1980
},
{
"epoch": 0.28903413217138707,
"grad_norm": 1.1421597790792624,
"learning_rate": 8.952576819499998e-06,
"loss": 0.602,
"step": 1990
},
{
"epoch": 0.29048656499636893,
"grad_norm": 1.3046866547593934,
"learning_rate": 8.93699957818087e-06,
"loss": 0.5925,
"step": 2000
},
{
"epoch": 0.29193899782135074,
"grad_norm": 1.27239619384718,
"learning_rate": 8.921321122680789e-06,
"loss": 0.6037,
"step": 2010
},
{
"epoch": 0.2933914306463326,
"grad_norm": 1.3073284462474046,
"learning_rate": 8.905541856068641e-06,
"loss": 0.6077,
"step": 2020
},
{
"epoch": 0.29484386347131447,
"grad_norm": 1.2694028140938955,
"learning_rate": 8.889662184005007e-06,
"loss": 0.6076,
"step": 2030
},
{
"epoch": 0.2962962962962963,
"grad_norm": 1.1075058528848678,
"learning_rate": 8.873682514731746e-06,
"loss": 0.5986,
"step": 2040
},
{
"epoch": 0.29774872912127814,
"grad_norm": 1.25011183641691,
"learning_rate": 8.85760325906148e-06,
"loss": 0.5911,
"step": 2050
},
{
"epoch": 0.29920116194626,
"grad_norm": 1.230690665069067,
"learning_rate": 8.841424830367051e-06,
"loss": 0.5918,
"step": 2060
},
{
"epoch": 0.3006535947712418,
"grad_norm": 1.2143851276582127,
"learning_rate": 8.82514764457088e-06,
"loss": 0.6026,
"step": 2070
},
{
"epoch": 0.3021060275962237,
"grad_norm": 1.1711415813258073,
"learning_rate": 8.808772120134286e-06,
"loss": 0.6208,
"step": 2080
},
{
"epoch": 0.30355846042120554,
"grad_norm": 1.2105658122447378,
"learning_rate": 8.79229867804672e-06,
"loss": 0.6178,
"step": 2090
},
{
"epoch": 0.30501089324618735,
"grad_norm": 1.260614604486508,
"learning_rate": 8.775727741814945e-06,
"loss": 0.6033,
"step": 2100
},
{
"epoch": 0.3064633260711692,
"grad_norm": 1.1949196588242055,
"learning_rate": 8.75905973745215e-06,
"loss": 0.5954,
"step": 2110
},
{
"epoch": 0.307915758896151,
"grad_norm": 1.2358431757504627,
"learning_rate": 8.742295093466993e-06,
"loss": 0.5929,
"step": 2120
},
{
"epoch": 0.3093681917211329,
"grad_norm": 1.1788915626896657,
"learning_rate": 8.725434240852586e-06,
"loss": 0.6014,
"step": 2130
},
{
"epoch": 0.31082062454611475,
"grad_norm": 1.2899429468502281,
"learning_rate": 8.708477613075422e-06,
"loss": 0.588,
"step": 2140
},
{
"epoch": 0.31227305737109656,
"grad_norm": 1.0436767601630443,
"learning_rate": 8.691425646064222e-06,
"loss": 0.6128,
"step": 2150
},
{
"epoch": 0.3137254901960784,
"grad_norm": 1.1823668694466984,
"learning_rate": 8.674278778198731e-06,
"loss": 0.5939,
"step": 2160
},
{
"epoch": 0.3151779230210603,
"grad_norm": 1.2287777612088193,
"learning_rate": 8.657037450298449e-06,
"loss": 0.5942,
"step": 2170
},
{
"epoch": 0.3166303558460421,
"grad_norm": 1.1210160142803036,
"learning_rate": 8.6397021056113e-06,
"loss": 0.6068,
"step": 2180
},
{
"epoch": 0.31808278867102396,
"grad_norm": 1.176574092958882,
"learning_rate": 8.622273189802231e-06,
"loss": 0.6099,
"step": 2190
},
{
"epoch": 0.31953522149600583,
"grad_norm": 1.2276623152067967,
"learning_rate": 8.604751150941758e-06,
"loss": 0.598,
"step": 2200
},
{
"epoch": 0.32098765432098764,
"grad_norm": 1.2049029589388036,
"learning_rate": 8.58713643949445e-06,
"loss": 0.5934,
"step": 2210
},
{
"epoch": 0.3224400871459695,
"grad_norm": 1.2650704032924422,
"learning_rate": 8.569429508307345e-06,
"loss": 0.6039,
"step": 2220
},
{
"epoch": 0.32389251997095136,
"grad_norm": 1.088534753663297,
"learning_rate": 8.551630812598303e-06,
"loss": 0.6038,
"step": 2230
},
{
"epoch": 0.3253449527959332,
"grad_norm": 1.1678210415173849,
"learning_rate": 8.533740809944317e-06,
"loss": 0.6084,
"step": 2240
},
{
"epoch": 0.32679738562091504,
"grad_norm": 1.251355519441971,
"learning_rate": 8.515759960269731e-06,
"loss": 0.5975,
"step": 2250
},
{
"epoch": 0.3282498184458969,
"grad_norm": 1.1662322522769242,
"learning_rate": 8.497688725834432e-06,
"loss": 0.6106,
"step": 2260
},
{
"epoch": 0.3297022512708787,
"grad_norm": 1.336372713961502,
"learning_rate": 8.479527571221957e-06,
"loss": 0.6224,
"step": 2270
},
{
"epoch": 0.3311546840958606,
"grad_norm": 1.148371532122775,
"learning_rate": 8.461276963327555e-06,
"loss": 0.607,
"step": 2280
},
{
"epoch": 0.33260711692084244,
"grad_norm": 1.3691981401078914,
"learning_rate": 8.442937371346174e-06,
"loss": 0.6001,
"step": 2290
},
{
"epoch": 0.33405954974582425,
"grad_norm": 1.3343569533197541,
"learning_rate": 8.424509266760413e-06,
"loss": 0.6009,
"step": 2300
},
{
"epoch": 0.3355119825708061,
"grad_norm": 1.0903008241967769,
"learning_rate": 8.405993123328388e-06,
"loss": 0.5852,
"step": 2310
},
{
"epoch": 0.3369644153957879,
"grad_norm": 1.2770798153391716,
"learning_rate": 8.387389417071565e-06,
"loss": 0.5967,
"step": 2320
},
{
"epoch": 0.3384168482207698,
"grad_norm": 1.1893611624135727,
"learning_rate": 8.368698626262506e-06,
"loss": 0.5906,
"step": 2330
},
{
"epoch": 0.33986928104575165,
"grad_norm": 1.1182656055274527,
"learning_rate": 8.349921231412588e-06,
"loss": 0.6144,
"step": 2340
},
{
"epoch": 0.34132171387073346,
"grad_norm": 1.1569225334439495,
"learning_rate": 8.331057715259643e-06,
"loss": 0.5945,
"step": 2350
},
{
"epoch": 0.3427741466957153,
"grad_norm": 1.0553585361032343,
"learning_rate": 8.312108562755547e-06,
"loss": 0.6012,
"step": 2360
},
{
"epoch": 0.3442265795206972,
"grad_norm": 1.0429439932782214,
"learning_rate": 8.29307426105376e-06,
"loss": 0.602,
"step": 2370
},
{
"epoch": 0.345679012345679,
"grad_norm": 1.0397368512389722,
"learning_rate": 8.273955299496787e-06,
"loss": 0.5932,
"step": 2380
},
{
"epoch": 0.34713144517066086,
"grad_norm": 1.0989788243486265,
"learning_rate": 8.254752169603614e-06,
"loss": 0.5987,
"step": 2390
},
{
"epoch": 0.3485838779956427,
"grad_norm": 1.2513128657031618,
"learning_rate": 8.235465365057067e-06,
"loss": 0.597,
"step": 2400
},
{
"epoch": 0.35003631082062453,
"grad_norm": 1.2696804086094644,
"learning_rate": 8.21609538169111e-06,
"loss": 0.5962,
"step": 2410
},
{
"epoch": 0.3514887436456064,
"grad_norm": 1.3765675743894579,
"learning_rate": 8.196642717478113e-06,
"loss": 0.6083,
"step": 2420
},
{
"epoch": 0.35294117647058826,
"grad_norm": 1.1525716644685924,
"learning_rate": 8.177107872516041e-06,
"loss": 0.5912,
"step": 2430
},
{
"epoch": 0.35439360929557007,
"grad_norm": 1.1930516036081553,
"learning_rate": 8.157491349015599e-06,
"loss": 0.601,
"step": 2440
},
{
"epoch": 0.35584604212055193,
"grad_norm": 1.3453249916774477,
"learning_rate": 8.137793651287317e-06,
"loss": 0.62,
"step": 2450
},
{
"epoch": 0.3572984749455338,
"grad_norm": 1.216543063547056,
"learning_rate": 8.118015285728598e-06,
"loss": 0.6037,
"step": 2460
},
{
"epoch": 0.3587509077705156,
"grad_norm": 1.129394528084983,
"learning_rate": 8.098156760810683e-06,
"loss": 0.598,
"step": 2470
},
{
"epoch": 0.36020334059549747,
"grad_norm": 1.124156367954234,
"learning_rate": 8.078218587065589e-06,
"loss": 0.5813,
"step": 2480
},
{
"epoch": 0.3616557734204793,
"grad_norm": 1.2039082584679666,
"learning_rate": 8.058201277072981e-06,
"loss": 0.5876,
"step": 2490
},
{
"epoch": 0.36310820624546114,
"grad_norm": 1.1919842026488203,
"learning_rate": 8.038105345446994e-06,
"loss": 0.6115,
"step": 2500
},
{
"epoch": 0.364560639070443,
"grad_norm": 1.2851968482663827,
"learning_rate": 8.017931308823006e-06,
"loss": 0.592,
"step": 2510
},
{
"epoch": 0.3660130718954248,
"grad_norm": 1.1538243634302991,
"learning_rate": 7.997679685844353e-06,
"loss": 0.5867,
"step": 2520
},
{
"epoch": 0.3674655047204067,
"grad_norm": 1.0704432112589999,
"learning_rate": 7.977350997148994e-06,
"loss": 0.6007,
"step": 2530
},
{
"epoch": 0.36891793754538854,
"grad_norm": 1.2707334756597408,
"learning_rate": 7.956945765356133e-06,
"loss": 0.5746,
"step": 2540
},
{
"epoch": 0.37037037037037035,
"grad_norm": 1.2061421625898763,
"learning_rate": 7.936464515052776e-06,
"loss": 0.601,
"step": 2550
},
{
"epoch": 0.3718228031953522,
"grad_norm": 1.318015728266432,
"learning_rate": 7.915907772780244e-06,
"loss": 0.6081,
"step": 2560
},
{
"epoch": 0.3732752360203341,
"grad_norm": 1.253197445356757,
"learning_rate": 7.89527606702065e-06,
"loss": 0.6046,
"step": 2570
},
{
"epoch": 0.3747276688453159,
"grad_norm": 1.190199765539676,
"learning_rate": 7.87456992818329e-06,
"loss": 0.5986,
"step": 2580
},
{
"epoch": 0.37618010167029775,
"grad_norm": 1.193398450040499,
"learning_rate": 7.853789888591032e-06,
"loss": 0.5889,
"step": 2590
},
{
"epoch": 0.3776325344952796,
"grad_norm": 1.035053671117003,
"learning_rate": 7.832936482466612e-06,
"loss": 0.5934,
"step": 2600
},
{
"epoch": 0.3790849673202614,
"grad_norm": 1.1386993400574172,
"learning_rate": 7.812010245918903e-06,
"loss": 0.586,
"step": 2610
},
{
"epoch": 0.3805374001452433,
"grad_norm": 1.1022458257608025,
"learning_rate": 7.79101171692914e-06,
"loss": 0.5806,
"step": 2620
},
{
"epoch": 0.38198983297022515,
"grad_norm": 1.1758543851880188,
"learning_rate": 7.769941435337083e-06,
"loss": 0.5618,
"step": 2630
},
{
"epoch": 0.38344226579520696,
"grad_norm": 1.2426818455480244,
"learning_rate": 7.748799942827147e-06,
"loss": 0.6012,
"step": 2640
},
{
"epoch": 0.3848946986201888,
"grad_norm": 1.0718204571931684,
"learning_rate": 7.72758778291446e-06,
"loss": 0.5887,
"step": 2650
},
{
"epoch": 0.3863471314451707,
"grad_norm": 1.0289005823465374,
"learning_rate": 7.706305500930909e-06,
"loss": 0.6037,
"step": 2660
},
{
"epoch": 0.3877995642701525,
"grad_norm": 1.2478985029233107,
"learning_rate": 7.684953644011103e-06,
"loss": 0.584,
"step": 2670
},
{
"epoch": 0.38925199709513436,
"grad_norm": 1.1066991243562059,
"learning_rate": 7.66353276107832e-06,
"loss": 0.6007,
"step": 2680
},
{
"epoch": 0.39070442992011617,
"grad_norm": 1.2345614999374477,
"learning_rate": 7.64204340283039e-06,
"loss": 0.6033,
"step": 2690
},
{
"epoch": 0.39215686274509803,
"grad_norm": 1.0798799696274017,
"learning_rate": 7.620486121725536e-06,
"loss": 0.59,
"step": 2700
},
{
"epoch": 0.3936092955700799,
"grad_norm": 1.1600968806836478,
"learning_rate": 7.598861471968174e-06,
"loss": 0.5948,
"step": 2710
},
{
"epoch": 0.3950617283950617,
"grad_norm": 1.1860847221048887,
"learning_rate": 7.577170009494665e-06,
"loss": 0.5981,
"step": 2720
},
{
"epoch": 0.39651416122004357,
"grad_norm": 1.0670434364146835,
"learning_rate": 7.555412291959018e-06,
"loss": 0.5772,
"step": 2730
},
{
"epoch": 0.39796659404502543,
"grad_norm": 1.1865817610815497,
"learning_rate": 7.533588878718561e-06,
"loss": 0.584,
"step": 2740
},
{
"epoch": 0.39941902687000724,
"grad_norm": 1.2092053148497965,
"learning_rate": 7.511700330819556e-06,
"loss": 0.5832,
"step": 2750
},
{
"epoch": 0.4008714596949891,
"grad_norm": 1.1770338237370501,
"learning_rate": 7.489747210982777e-06,
"loss": 0.5984,
"step": 2760
},
{
"epoch": 0.40232389251997097,
"grad_norm": 1.1434774901575833,
"learning_rate": 7.4677300835890424e-06,
"loss": 0.5755,
"step": 2770
},
{
"epoch": 0.4037763253449528,
"grad_norm": 1.0366368031771818,
"learning_rate": 7.445649514664703e-06,
"loss": 0.5886,
"step": 2780
},
{
"epoch": 0.40522875816993464,
"grad_norm": 1.2729396302065998,
"learning_rate": 7.423506071867101e-06,
"loss": 0.6134,
"step": 2790
},
{
"epoch": 0.4066811909949165,
"grad_norm": 1.0518352889412923,
"learning_rate": 7.401300324469961e-06,
"loss": 0.5737,
"step": 2800
},
{
"epoch": 0.4081336238198983,
"grad_norm": 1.2001944481237583,
"learning_rate": 7.3790328433487665e-06,
"loss": 0.5874,
"step": 2810
},
{
"epoch": 0.4095860566448802,
"grad_norm": 1.250231920993964,
"learning_rate": 7.3567042009660786e-06,
"loss": 0.5862,
"step": 2820
},
{
"epoch": 0.41103848946986205,
"grad_norm": 1.1512872210708966,
"learning_rate": 7.3343149713568215e-06,
"loss": 0.593,
"step": 2830
},
{
"epoch": 0.41249092229484385,
"grad_norm": 1.1605256860138091,
"learning_rate": 7.311865730113525e-06,
"loss": 0.5939,
"step": 2840
},
{
"epoch": 0.4139433551198257,
"grad_norm": 1.3940208410225592,
"learning_rate": 7.2893570543715174e-06,
"loss": 0.6028,
"step": 2850
},
{
"epoch": 0.4153957879448075,
"grad_norm": 1.1976078557092422,
"learning_rate": 7.266789522794104e-06,
"loss": 0.6065,
"step": 2860
},
{
"epoch": 0.4168482207697894,
"grad_norm": 1.035110243445679,
"learning_rate": 7.244163715557683e-06,
"loss": 0.5915,
"step": 2870
},
{
"epoch": 0.41830065359477125,
"grad_norm": 1.1865073190747897,
"learning_rate": 7.2214802143368225e-06,
"loss": 0.5961,
"step": 2880
},
{
"epoch": 0.41975308641975306,
"grad_norm": 1.0991372561424138,
"learning_rate": 7.1987396022893216e-06,
"loss": 0.5857,
"step": 2890
},
{
"epoch": 0.4212055192447349,
"grad_norm": 1.0801243737112538,
"learning_rate": 7.175942464041209e-06,
"loss": 0.5829,
"step": 2900
},
{
"epoch": 0.4226579520697168,
"grad_norm": 1.3295568712189132,
"learning_rate": 7.15308938567171e-06,
"loss": 0.5869,
"step": 2910
},
{
"epoch": 0.4241103848946986,
"grad_norm": 1.0402363831702612,
"learning_rate": 7.130180954698187e-06,
"loss": 0.5842,
"step": 2920
},
{
"epoch": 0.42556281771968046,
"grad_norm": 1.1031276144488775,
"learning_rate": 7.107217760061036e-06,
"loss": 0.5923,
"step": 2930
},
{
"epoch": 0.42701525054466233,
"grad_norm": 1.183086396688286,
"learning_rate": 7.0842003921085376e-06,
"loss": 0.6053,
"step": 2940
},
{
"epoch": 0.42846768336964414,
"grad_norm": 1.244303339507363,
"learning_rate": 7.061129442581685e-06,
"loss": 0.5924,
"step": 2950
},
{
"epoch": 0.429920116194626,
"grad_norm": 1.2478572360385807,
"learning_rate": 7.038005504598975e-06,
"loss": 0.5922,
"step": 2960
},
{
"epoch": 0.43137254901960786,
"grad_norm": 1.0447681879549313,
"learning_rate": 7.0148291726411486e-06,
"loss": 0.5825,
"step": 2970
},
{
"epoch": 0.4328249818445897,
"grad_norm": 1.1025428022026995,
"learning_rate": 6.9916010425359214e-06,
"loss": 0.5956,
"step": 2980
},
{
"epoch": 0.43427741466957154,
"grad_norm": 1.329010163267056,
"learning_rate": 6.968321711442658e-06,
"loss": 0.5772,
"step": 2990
},
{
"epoch": 0.4357298474945534,
"grad_norm": 1.2330587975332181,
"learning_rate": 6.9449917778370216e-06,
"loss": 0.5933,
"step": 3000
},
{
"epoch": 0.4371822803195352,
"grad_norm": 1.1656344009683823,
"learning_rate": 6.921611841495584e-06,
"loss": 0.5922,
"step": 3010
},
{
"epoch": 0.4386347131445171,
"grad_norm": 1.2709734185927093,
"learning_rate": 6.898182503480414e-06,
"loss": 0.5911,
"step": 3020
},
{
"epoch": 0.4400871459694989,
"grad_norm": 1.269770194129687,
"learning_rate": 6.8747043661236215e-06,
"loss": 0.6103,
"step": 3030
},
{
"epoch": 0.44153957879448075,
"grad_norm": 1.106713465551905,
"learning_rate": 6.851178033011869e-06,
"loss": 0.5997,
"step": 3040
},
{
"epoch": 0.4429920116194626,
"grad_norm": 1.1985970638971495,
"learning_rate": 6.82760410897086e-06,
"loss": 0.5727,
"step": 3050
},
{
"epoch": 0.4444444444444444,
"grad_norm": 1.1259472634689607,
"learning_rate": 6.8039832000497865e-06,
"loss": 0.5983,
"step": 3060
},
{
"epoch": 0.4458968772694263,
"grad_norm": 1.212189906596056,
"learning_rate": 6.78031591350575e-06,
"loss": 0.5958,
"step": 3070
},
{
"epoch": 0.44734931009440815,
"grad_norm": 1.0999728539824523,
"learning_rate": 6.756602857788148e-06,
"loss": 0.5717,
"step": 3080
},
{
"epoch": 0.44880174291938996,
"grad_norm": 1.1130187014726358,
"learning_rate": 6.732844642523032e-06,
"loss": 0.5793,
"step": 3090
},
{
"epoch": 0.4502541757443718,
"grad_norm": 1.075132513625087,
"learning_rate": 6.70904187849744e-06,
"loss": 0.562,
"step": 3100
},
{
"epoch": 0.4517066085693537,
"grad_norm": 1.2147850552839328,
"learning_rate": 6.685195177643684e-06,
"loss": 0.5978,
"step": 3110
},
{
"epoch": 0.4531590413943355,
"grad_norm": 1.2836246837826484,
"learning_rate": 6.661305153023628e-06,
"loss": 0.5912,
"step": 3120
},
{
"epoch": 0.45461147421931736,
"grad_norm": 1.1766776836268427,
"learning_rate": 6.637372418812921e-06,
"loss": 0.586,
"step": 3130
},
{
"epoch": 0.4560639070442992,
"grad_norm": 1.3613669267848012,
"learning_rate": 6.613397590285211e-06,
"loss": 0.5998,
"step": 3140
},
{
"epoch": 0.45751633986928103,
"grad_norm": 1.2051701552338834,
"learning_rate": 6.589381283796325e-06,
"loss": 0.5812,
"step": 3150
},
{
"epoch": 0.4589687726942629,
"grad_norm": 1.1519365736041338,
"learning_rate": 6.565324116768428e-06,
"loss": 0.583,
"step": 3160
},
{
"epoch": 0.46042120551924476,
"grad_norm": 1.1475917123110242,
"learning_rate": 6.54122670767414e-06,
"loss": 0.5765,
"step": 3170
},
{
"epoch": 0.46187363834422657,
"grad_norm": 1.088676956077236,
"learning_rate": 6.517089676020648e-06,
"loss": 0.5997,
"step": 3180
},
{
"epoch": 0.46332607116920843,
"grad_norm": 1.1195203213303881,
"learning_rate": 6.492913642333768e-06,
"loss": 0.565,
"step": 3190
},
{
"epoch": 0.4647785039941903,
"grad_norm": 1.0927178103796473,
"learning_rate": 6.468699228142004e-06,
"loss": 0.5988,
"step": 3200
},
{
"epoch": 0.4662309368191721,
"grad_norm": 1.1180323598233408,
"learning_rate": 6.444447055960559e-06,
"loss": 0.6034,
"step": 3210
},
{
"epoch": 0.46768336964415397,
"grad_norm": 1.1581218721076667,
"learning_rate": 6.420157749275341e-06,
"loss": 0.5792,
"step": 3220
},
{
"epoch": 0.4691358024691358,
"grad_norm": 1.2355006071990586,
"learning_rate": 6.395831932526924e-06,
"loss": 0.5914,
"step": 3230
},
{
"epoch": 0.47058823529411764,
"grad_norm": 1.2628642644632941,
"learning_rate": 6.371470231094498e-06,
"loss": 0.5972,
"step": 3240
},
{
"epoch": 0.4720406681190995,
"grad_norm": 1.30372441555249,
"learning_rate": 6.3470732712798e-06,
"loss": 0.5943,
"step": 3250
},
{
"epoch": 0.4734931009440813,
"grad_norm": 1.2732465621842586,
"learning_rate": 6.322641680290997e-06,
"loss": 0.59,
"step": 3260
},
{
"epoch": 0.4749455337690632,
"grad_norm": 1.1957460012906904,
"learning_rate": 6.298176086226577e-06,
"loss": 0.5908,
"step": 3270
},
{
"epoch": 0.47639796659404504,
"grad_norm": 1.2666436895215651,
"learning_rate": 6.273677118059192e-06,
"loss": 0.579,
"step": 3280
},
{
"epoch": 0.47785039941902685,
"grad_norm": 1.1740612442844354,
"learning_rate": 6.24914540561949e-06,
"loss": 0.5849,
"step": 3290
},
{
"epoch": 0.4793028322440087,
"grad_norm": 1.170368029656733,
"learning_rate": 6.2245815795799235e-06,
"loss": 0.5914,
"step": 3300
},
{
"epoch": 0.4807552650689906,
"grad_norm": 1.060432274782722,
"learning_rate": 6.199986271438536e-06,
"loss": 0.5692,
"step": 3310
},
{
"epoch": 0.4822076978939724,
"grad_norm": 1.133481629336483,
"learning_rate": 6.17536011350273e-06,
"loss": 0.5789,
"step": 3320
},
{
"epoch": 0.48366013071895425,
"grad_norm": 1.0779584839433474,
"learning_rate": 6.150703738873004e-06,
"loss": 0.5815,
"step": 3330
},
{
"epoch": 0.4851125635439361,
"grad_norm": 1.138478981177591,
"learning_rate": 6.1260177814266855e-06,
"loss": 0.5754,
"step": 3340
},
{
"epoch": 0.4865649963689179,
"grad_norm": 1.1290987276585867,
"learning_rate": 6.101302875801628e-06,
"loss": 0.5778,
"step": 3350
},
{
"epoch": 0.4880174291938998,
"grad_norm": 1.1468009205478524,
"learning_rate": 6.0765596573798994e-06,
"loss": 0.5689,
"step": 3360
},
{
"epoch": 0.48946986201888165,
"grad_norm": 1.0683998313181482,
"learning_rate": 6.051788762271442e-06,
"loss": 0.5692,
"step": 3370
},
{
"epoch": 0.49092229484386346,
"grad_norm": 1.1889646870467425,
"learning_rate": 6.0269908272977295e-06,
"loss": 0.5808,
"step": 3380
},
{
"epoch": 0.4923747276688453,
"grad_norm": 1.2529890364621932,
"learning_rate": 6.002166489975385e-06,
"loss": 0.5772,
"step": 3390
},
{
"epoch": 0.49382716049382713,
"grad_norm": 1.1925487080641164,
"learning_rate": 5.977316388499794e-06,
"loss": 0.5862,
"step": 3400
},
{
"epoch": 0.495279593318809,
"grad_norm": 1.1372201366075154,
"learning_rate": 5.952441161728701e-06,
"loss": 0.5662,
"step": 3410
},
{
"epoch": 0.49673202614379086,
"grad_norm": 1.2981299245914195,
"learning_rate": 5.927541449165783e-06,
"loss": 0.5682,
"step": 3420
},
{
"epoch": 0.49818445896877267,
"grad_norm": 1.1198285033650917,
"learning_rate": 5.902617890944207e-06,
"loss": 0.5894,
"step": 3430
},
{
"epoch": 0.49963689179375453,
"grad_norm": 1.1442459802118357,
"learning_rate": 5.8776711278101765e-06,
"loss": 0.5735,
"step": 3440
},
{
"epoch": 0.5010893246187363,
"grad_norm": 1.10045421098352,
"learning_rate": 5.852701801106458e-06,
"loss": 0.5838,
"step": 3450
},
{
"epoch": 0.5025417574437182,
"grad_norm": 1.1675311387395517,
"learning_rate": 5.82771055275589e-06,
"loss": 0.5847,
"step": 3460
},
{
"epoch": 0.5039941902687001,
"grad_norm": 1.0028532762834719,
"learning_rate": 5.802698025244886e-06,
"loss": 0.5656,
"step": 3470
},
{
"epoch": 0.5054466230936819,
"grad_norm": 1.028656973511835,
"learning_rate": 5.777664861606912e-06,
"loss": 0.5871,
"step": 3480
},
{
"epoch": 0.5068990559186638,
"grad_norm": 1.2007383871296113,
"learning_rate": 5.752611705405957e-06,
"loss": 0.5895,
"step": 3490
},
{
"epoch": 0.5083514887436456,
"grad_norm": 1.1281898149999334,
"learning_rate": 5.7275392007199896e-06,
"loss": 0.573,
"step": 3500
},
{
"epoch": 0.5098039215686274,
"grad_norm": 1.282146433020574,
"learning_rate": 5.702447992124394e-06,
"loss": 0.57,
"step": 3510
},
{
"epoch": 0.5112563543936093,
"grad_norm": 1.05801689608913,
"learning_rate": 5.677338724675406e-06,
"loss": 0.5751,
"step": 3520
},
{
"epoch": 0.5127087872185911,
"grad_norm": 1.2511793245069922,
"learning_rate": 5.652212043893528e-06,
"loss": 0.5805,
"step": 3530
},
{
"epoch": 0.514161220043573,
"grad_norm": 1.2496537928999953,
"learning_rate": 5.627068595746931e-06,
"loss": 0.5734,
"step": 3540
},
{
"epoch": 0.5156136528685549,
"grad_norm": 1.0586939290192166,
"learning_rate": 5.601909026634846e-06,
"loss": 0.573,
"step": 3550
},
{
"epoch": 0.5170660856935366,
"grad_norm": 1.2135072197108623,
"learning_rate": 5.576733983370955e-06,
"loss": 0.5696,
"step": 3560
},
{
"epoch": 0.5185185185185185,
"grad_norm": 1.096951604322022,
"learning_rate": 5.551544113166752e-06,
"loss": 0.5764,
"step": 3570
},
{
"epoch": 0.5199709513435004,
"grad_norm": 1.067656908278471,
"learning_rate": 5.5263400636149104e-06,
"loss": 0.5945,
"step": 3580
},
{
"epoch": 0.5214233841684822,
"grad_norm": 1.2528345132805765,
"learning_rate": 5.50112248267263e-06,
"loss": 0.5698,
"step": 3590
},
{
"epoch": 0.5228758169934641,
"grad_norm": 1.153586426579592,
"learning_rate": 5.475892018644989e-06,
"loss": 0.5939,
"step": 3600
},
{
"epoch": 0.524328249818446,
"grad_norm": 1.321281822598792,
"learning_rate": 5.450649320168263e-06,
"loss": 0.5764,
"step": 3610
},
{
"epoch": 0.5257806826434277,
"grad_norm": 1.1546247883125684,
"learning_rate": 5.4253950361932565e-06,
"loss": 0.5698,
"step": 3620
},
{
"epoch": 0.5272331154684096,
"grad_norm": 1.3090075714265825,
"learning_rate": 5.400129815968623e-06,
"loss": 0.58,
"step": 3630
},
{
"epoch": 0.5286855482933914,
"grad_norm": 1.3546772950978652,
"learning_rate": 5.374854309024167e-06,
"loss": 0.5906,
"step": 3640
},
{
"epoch": 0.5301379811183733,
"grad_norm": 1.0728126839197956,
"learning_rate": 5.349569165154153e-06,
"loss": 0.5617,
"step": 3650
},
{
"epoch": 0.5315904139433552,
"grad_norm": 1.0481388119854531,
"learning_rate": 5.32427503440059e-06,
"loss": 0.5752,
"step": 3660
},
{
"epoch": 0.533042846768337,
"grad_norm": 1.251734474368655,
"learning_rate": 5.29897256703653e-06,
"loss": 0.577,
"step": 3670
},
{
"epoch": 0.5344952795933188,
"grad_norm": 1.1273771235496188,
"learning_rate": 5.2736624135493465e-06,
"loss": 0.5604,
"step": 3680
},
{
"epoch": 0.5359477124183006,
"grad_norm": 1.1728285082039356,
"learning_rate": 5.248345224624007e-06,
"loss": 0.5799,
"step": 3690
},
{
"epoch": 0.5374001452432825,
"grad_norm": 1.1207082347004158,
"learning_rate": 5.223021651126356e-06,
"loss": 0.5792,
"step": 3700
},
{
"epoch": 0.5388525780682644,
"grad_norm": 1.096111126610637,
"learning_rate": 5.197692344086369e-06,
"loss": 0.582,
"step": 3710
},
{
"epoch": 0.5403050108932462,
"grad_norm": 1.1432895144261512,
"learning_rate": 5.172357954681427e-06,
"loss": 0.5669,
"step": 3720
},
{
"epoch": 0.541757443718228,
"grad_norm": 1.2795186578480655,
"learning_rate": 5.147019134219569e-06,
"loss": 0.5727,
"step": 3730
},
{
"epoch": 0.5432098765432098,
"grad_norm": 1.1497619263404009,
"learning_rate": 5.121676534122746e-06,
"loss": 0.5665,
"step": 3740
},
{
"epoch": 0.5446623093681917,
"grad_norm": 1.053760679670929,
"learning_rate": 5.096330805910085e-06,
"loss": 0.5758,
"step": 3750
},
{
"epoch": 0.5461147421931736,
"grad_norm": 1.2455461930319618,
"learning_rate": 5.0709826011811246e-06,
"loss": 0.5715,
"step": 3760
},
{
"epoch": 0.5475671750181554,
"grad_norm": 1.2714142743729588,
"learning_rate": 5.045632571599076e-06,
"loss": 0.5764,
"step": 3770
},
{
"epoch": 0.5490196078431373,
"grad_norm": 1.2596602396359573,
"learning_rate": 5.020281368874063e-06,
"loss": 0.5777,
"step": 3780
},
{
"epoch": 0.5504720406681191,
"grad_norm": 1.096076072807335,
"learning_rate": 4.994929644746366e-06,
"loss": 0.5752,
"step": 3790
},
{
"epoch": 0.5519244734931009,
"grad_norm": 1.1180419407959938,
"learning_rate": 4.969578050969675e-06,
"loss": 0.5783,
"step": 3800
},
{
"epoch": 0.5533769063180828,
"grad_norm": 1.1457632992717688,
"learning_rate": 4.944227239294327e-06,
"loss": 0.5706,
"step": 3810
},
{
"epoch": 0.5548293391430646,
"grad_norm": 1.0431686309314605,
"learning_rate": 4.918877861450553e-06,
"loss": 0.5629,
"step": 3820
},
{
"epoch": 0.5562817719680465,
"grad_norm": 1.1033442319502207,
"learning_rate": 4.893530569131716e-06,
"loss": 0.5611,
"step": 3830
},
{
"epoch": 0.5577342047930284,
"grad_norm": 1.1929600913303742,
"learning_rate": 4.8681860139775745e-06,
"loss": 0.568,
"step": 3840
},
{
"epoch": 0.5591866376180101,
"grad_norm": 1.281488846532093,
"learning_rate": 4.842844847557508e-06,
"loss": 0.5882,
"step": 3850
},
{
"epoch": 0.560639070442992,
"grad_norm": 1.1195048036816224,
"learning_rate": 4.817507721353785e-06,
"loss": 0.596,
"step": 3860
},
{
"epoch": 0.5620915032679739,
"grad_norm": 1.1077419816516767,
"learning_rate": 4.792175286744802e-06,
"loss": 0.5747,
"step": 3870
},
{
"epoch": 0.5635439360929557,
"grad_norm": 1.3502747193694702,
"learning_rate": 4.766848194988344e-06,
"loss": 0.5915,
"step": 3880
},
{
"epoch": 0.5649963689179376,
"grad_norm": 1.001203957804234,
"learning_rate": 4.741527097204837e-06,
"loss": 0.5732,
"step": 3890
},
{
"epoch": 0.5664488017429193,
"grad_norm": 1.1428305709772093,
"learning_rate": 4.7162126443606145e-06,
"loss": 0.5682,
"step": 3900
},
{
"epoch": 0.5679012345679012,
"grad_norm": 1.220191866232699,
"learning_rate": 4.690905487251174e-06,
"loss": 0.5695,
"step": 3910
},
{
"epoch": 0.5693536673928831,
"grad_norm": 1.0555952997249456,
"learning_rate": 4.665606276484455e-06,
"loss": 0.5684,
"step": 3920
},
{
"epoch": 0.5708061002178649,
"grad_norm": 1.1675138439049109,
"learning_rate": 4.6403156624641085e-06,
"loss": 0.5876,
"step": 3930
},
{
"epoch": 0.5722585330428468,
"grad_norm": 1.2418849374572543,
"learning_rate": 4.615034295372777e-06,
"loss": 0.5838,
"step": 3940
},
{
"epoch": 0.5737109658678287,
"grad_norm": 1.0616817293128535,
"learning_rate": 4.589762825155374e-06,
"loss": 0.57,
"step": 3950
},
{
"epoch": 0.5751633986928104,
"grad_norm": 1.2414737852232787,
"learning_rate": 4.564501901502386e-06,
"loss": 0.5521,
"step": 3960
},
{
"epoch": 0.5766158315177923,
"grad_norm": 1.0962764476368352,
"learning_rate": 4.5392521738331585e-06,
"loss": 0.5761,
"step": 3970
},
{
"epoch": 0.5780682643427741,
"grad_norm": 1.2445755051746221,
"learning_rate": 4.514014291279208e-06,
"loss": 0.5612,
"step": 3980
},
{
"epoch": 0.579520697167756,
"grad_norm": 1.1248791169953434,
"learning_rate": 4.488788902667534e-06,
"loss": 0.5651,
"step": 3990
},
{
"epoch": 0.5809731299927379,
"grad_norm": 1.1052395709597995,
"learning_rate": 4.463576656503927e-06,
"loss": 0.5624,
"step": 4000
},
{
"epoch": 0.5824255628177197,
"grad_norm": 1.0979993545936089,
"learning_rate": 4.438378200956318e-06,
"loss": 0.5747,
"step": 4010
},
{
"epoch": 0.5838779956427015,
"grad_norm": 1.1585156096079503,
"learning_rate": 4.413194183838091e-06,
"loss": 0.5757,
"step": 4020
},
{
"epoch": 0.5853304284676834,
"grad_norm": 1.0657343307419072,
"learning_rate": 4.388025252591448e-06,
"loss": 0.5826,
"step": 4030
},
{
"epoch": 0.5867828612926652,
"grad_norm": 1.1584399941372348,
"learning_rate": 4.362872054270753e-06,
"loss": 0.561,
"step": 4040
},
{
"epoch": 0.5882352941176471,
"grad_norm": 1.1136815017444102,
"learning_rate": 4.337735235525904e-06,
"loss": 0.5801,
"step": 4050
},
{
"epoch": 0.5896877269426289,
"grad_norm": 1.2048049573288624,
"learning_rate": 4.312615442585699e-06,
"loss": 0.5748,
"step": 4060
},
{
"epoch": 0.5911401597676107,
"grad_norm": 1.106968794623351,
"learning_rate": 4.287513321241237e-06,
"loss": 0.5665,
"step": 4070
},
{
"epoch": 0.5925925925925926,
"grad_norm": 1.0773536810915454,
"learning_rate": 4.262429516829299e-06,
"loss": 0.5739,
"step": 4080
},
{
"epoch": 0.5940450254175744,
"grad_norm": 1.2780512286596586,
"learning_rate": 4.237364674215774e-06,
"loss": 0.573,
"step": 4090
},
{
"epoch": 0.5954974582425563,
"grad_norm": 1.015175880325257,
"learning_rate": 4.212319437779066e-06,
"loss": 0.5637,
"step": 4100
},
{
"epoch": 0.5969498910675382,
"grad_norm": 1.1403330329394572,
"learning_rate": 4.187294451393541e-06,
"loss": 0.5807,
"step": 4110
},
{
"epoch": 0.59840232389252,
"grad_norm": 1.1083139371642667,
"learning_rate": 4.162290358412962e-06,
"loss": 0.5704,
"step": 4120
},
{
"epoch": 0.5998547567175018,
"grad_norm": 1.1372343052927192,
"learning_rate": 4.1373078016539535e-06,
"loss": 0.5559,
"step": 4130
},
{
"epoch": 0.6013071895424836,
"grad_norm": 1.2137905963682751,
"learning_rate": 4.1123474233794845e-06,
"loss": 0.5588,
"step": 4140
},
{
"epoch": 0.6027596223674655,
"grad_norm": 1.2130103389722957,
"learning_rate": 4.087409865282341e-06,
"loss": 0.5776,
"step": 4150
},
{
"epoch": 0.6042120551924474,
"grad_norm": 1.21914550825707,
"learning_rate": 4.062495768468646e-06,
"loss": 0.5618,
"step": 4160
},
{
"epoch": 0.6056644880174292,
"grad_norm": 1.1540562248868875,
"learning_rate": 4.03760577344136e-06,
"loss": 0.5784,
"step": 4170
},
{
"epoch": 0.6071169208424111,
"grad_norm": 1.214796762228358,
"learning_rate": 4.012740520083832e-06,
"loss": 0.5814,
"step": 4180
},
{
"epoch": 0.6085693536673928,
"grad_norm": 1.157806370832285,
"learning_rate": 3.987900647643334e-06,
"loss": 0.5791,
"step": 4190
},
{
"epoch": 0.6100217864923747,
"grad_norm": 1.1517956672556253,
"learning_rate": 3.963086794714639e-06,
"loss": 0.5652,
"step": 4200
},
{
"epoch": 0.6114742193173566,
"grad_norm": 1.1605789001720612,
"learning_rate": 3.9382995992235955e-06,
"loss": 0.5728,
"step": 4210
},
{
"epoch": 0.6129266521423384,
"grad_norm": 1.0630436480054268,
"learning_rate": 3.913539698410734e-06,
"loss": 0.5684,
"step": 4220
},
{
"epoch": 0.6143790849673203,
"grad_norm": 1.175513347812724,
"learning_rate": 3.888807728814874e-06,
"loss": 0.5664,
"step": 4230
},
{
"epoch": 0.615831517792302,
"grad_norm": 1.1583525329647688,
"learning_rate": 3.864104326256775e-06,
"loss": 0.5805,
"step": 4240
},
{
"epoch": 0.6172839506172839,
"grad_norm": 1.1058170223844426,
"learning_rate": 3.8394301258227756e-06,
"loss": 0.5622,
"step": 4250
},
{
"epoch": 0.6187363834422658,
"grad_norm": 1.2295319541574912,
"learning_rate": 3.814785761848475e-06,
"loss": 0.5583,
"step": 4260
},
{
"epoch": 0.6201888162672476,
"grad_norm": 1.092280135001415,
"learning_rate": 3.790171867902426e-06,
"loss": 0.5755,
"step": 4270
},
{
"epoch": 0.6216412490922295,
"grad_norm": 1.274653674496685,
"learning_rate": 3.7655890767698384e-06,
"loss": 0.5729,
"step": 4280
},
{
"epoch": 0.6230936819172114,
"grad_norm": 1.2166924621577075,
"learning_rate": 3.741038020436323e-06,
"loss": 0.5572,
"step": 4290
},
{
"epoch": 0.6245461147421931,
"grad_norm": 1.0296689666125658,
"learning_rate": 3.7165193300716297e-06,
"loss": 0.5664,
"step": 4300
},
{
"epoch": 0.625998547567175,
"grad_norm": 1.0530929308425294,
"learning_rate": 3.6920336360134378e-06,
"loss": 0.5679,
"step": 4310
},
{
"epoch": 0.6274509803921569,
"grad_norm": 1.1137539642969592,
"learning_rate": 3.6675815677511382e-06,
"loss": 0.5607,
"step": 4320
},
{
"epoch": 0.6289034132171387,
"grad_norm": 1.0875536687719785,
"learning_rate": 3.6431637539096565e-06,
"loss": 0.5691,
"step": 4330
},
{
"epoch": 0.6303558460421206,
"grad_norm": 1.1268225507247402,
"learning_rate": 3.6187808222332852e-06,
"loss": 0.5668,
"step": 4340
},
{
"epoch": 0.6318082788671024,
"grad_norm": 1.1757316218974525,
"learning_rate": 3.594433399569559e-06,
"loss": 0.5551,
"step": 4350
},
{
"epoch": 0.6332607116920842,
"grad_norm": 1.1554119314408926,
"learning_rate": 3.5701221118531195e-06,
"loss": 0.5785,
"step": 4360
},
{
"epoch": 0.6347131445170661,
"grad_norm": 1.0947128171930913,
"learning_rate": 3.5458475840896434e-06,
"loss": 0.5677,
"step": 4370
},
{
"epoch": 0.6361655773420479,
"grad_norm": 1.2477952532418557,
"learning_rate": 3.5216104403397623e-06,
"loss": 0.5504,
"step": 4380
},
{
"epoch": 0.6376180101670298,
"grad_norm": 1.1149755483280817,
"learning_rate": 3.4974113037030257e-06,
"loss": 0.5753,
"step": 4390
},
{
"epoch": 0.6390704429920117,
"grad_norm": 1.214526641921585,
"learning_rate": 3.473250796301874e-06,
"loss": 0.5669,
"step": 4400
},
{
"epoch": 0.6405228758169934,
"grad_norm": 1.1149175312128623,
"learning_rate": 3.4491295392656497e-06,
"loss": 0.5604,
"step": 4410
},
{
"epoch": 0.6419753086419753,
"grad_norm": 1.1763746140746527,
"learning_rate": 3.425048152714635e-06,
"loss": 0.5651,
"step": 4420
},
{
"epoch": 0.6434277414669571,
"grad_norm": 1.169802661186734,
"learning_rate": 3.4010072557440967e-06,
"loss": 0.5685,
"step": 4430
},
{
"epoch": 0.644880174291939,
"grad_norm": 1.1404701148865375,
"learning_rate": 3.3770074664083827e-06,
"loss": 0.577,
"step": 4440
},
{
"epoch": 0.6463326071169209,
"grad_norm": 1.2951511455390947,
"learning_rate": 3.353049401705022e-06,
"loss": 0.5546,
"step": 4450
},
{
"epoch": 0.6477850399419027,
"grad_norm": 1.2188858191779428,
"learning_rate": 3.329133677558873e-06,
"loss": 0.5697,
"step": 4460
},
{
"epoch": 0.6492374727668845,
"grad_norm": 1.1239635889524127,
"learning_rate": 3.3052609088062767e-06,
"loss": 0.5901,
"step": 4470
},
{
"epoch": 0.6506899055918663,
"grad_norm": 1.0931476283773633,
"learning_rate": 3.281431709179264e-06,
"loss": 0.566,
"step": 4480
},
{
"epoch": 0.6521423384168482,
"grad_norm": 1.4718901865939953,
"learning_rate": 3.2576466912897674e-06,
"loss": 0.5761,
"step": 4490
},
{
"epoch": 0.6535947712418301,
"grad_norm": 1.2062192465520678,
"learning_rate": 3.2339064666138783e-06,
"loss": 0.5757,
"step": 4500
},
{
"epoch": 0.6550472040668119,
"grad_norm": 1.2732571104572175,
"learning_rate": 3.2102116454761168e-06,
"loss": 0.5615,
"step": 4510
},
{
"epoch": 0.6564996368917938,
"grad_norm": 1.198522063919598,
"learning_rate": 3.1865628370337575e-06,
"loss": 0.5632,
"step": 4520
},
{
"epoch": 0.6579520697167756,
"grad_norm": 1.208764455797361,
"learning_rate": 3.162960649261152e-06,
"loss": 0.5472,
"step": 4530
},
{
"epoch": 0.6594045025417574,
"grad_norm": 1.2300085896818644,
"learning_rate": 3.1394056889341086e-06,
"loss": 0.5737,
"step": 4540
},
{
"epoch": 0.6608569353667393,
"grad_norm": 1.2362227883984134,
"learning_rate": 3.1158985616142944e-06,
"loss": 0.5467,
"step": 4550
},
{
"epoch": 0.6623093681917211,
"grad_norm": 1.2577141886691818,
"learning_rate": 3.092439871633658e-06,
"loss": 0.5652,
"step": 4560
},
{
"epoch": 0.663761801016703,
"grad_norm": 1.2246719550977323,
"learning_rate": 3.0690302220789036e-06,
"loss": 0.564,
"step": 4570
},
{
"epoch": 0.6652142338416849,
"grad_norm": 0.952770111510269,
"learning_rate": 3.0456702147759797e-06,
"loss": 0.5538,
"step": 4580
},
{
"epoch": 0.6666666666666666,
"grad_norm": 1.2114290005968387,
"learning_rate": 3.0223604502746097e-06,
"loss": 0.5624,
"step": 4590
},
{
"epoch": 0.6681190994916485,
"grad_norm": 1.2379634249474247,
"learning_rate": 2.999101527832849e-06,
"loss": 0.5581,
"step": 4600
},
{
"epoch": 0.6695715323166304,
"grad_norm": 1.2432970361649818,
"learning_rate": 2.9758940454016893e-06,
"loss": 0.5519,
"step": 4610
},
{
"epoch": 0.6710239651416122,
"grad_norm": 1.1827840525798392,
"learning_rate": 2.9527385996096702e-06,
"loss": 0.5512,
"step": 4620
},
{
"epoch": 0.6724763979665941,
"grad_norm": 1.1313263342846276,
"learning_rate": 2.929635785747558e-06,
"loss": 0.5615,
"step": 4630
},
{
"epoch": 0.6739288307915758,
"grad_norm": 1.0718626125088186,
"learning_rate": 2.9065861977530263e-06,
"loss": 0.5577,
"step": 4640
},
{
"epoch": 0.6753812636165577,
"grad_norm": 1.2058366328226908,
"learning_rate": 2.8835904281953984e-06,
"loss": 0.5543,
"step": 4650
},
{
"epoch": 0.6768336964415396,
"grad_norm": 1.2044090066060698,
"learning_rate": 2.8606490682604083e-06,
"loss": 0.563,
"step": 4660
},
{
"epoch": 0.6782861292665214,
"grad_norm": 1.2440783490748353,
"learning_rate": 2.837762707734999e-06,
"loss": 0.5678,
"step": 4670
},
{
"epoch": 0.6797385620915033,
"grad_norm": 1.1447619754452882,
"learning_rate": 2.8149319349921678e-06,
"loss": 0.5443,
"step": 4680
},
{
"epoch": 0.6811909949164852,
"grad_norm": 1.0682059420594845,
"learning_rate": 2.7921573369758344e-06,
"loss": 0.5548,
"step": 4690
},
{
"epoch": 0.6826434277414669,
"grad_norm": 1.0786981942796325,
"learning_rate": 2.769439499185752e-06,
"loss": 0.557,
"step": 4700
},
{
"epoch": 0.6840958605664488,
"grad_norm": 1.1021974391300458,
"learning_rate": 2.7467790056624565e-06,
"loss": 0.5641,
"step": 4710
},
{
"epoch": 0.6855482933914306,
"grad_norm": 1.172642324603278,
"learning_rate": 2.7241764389722536e-06,
"loss": 0.5579,
"step": 4720
},
{
"epoch": 0.6870007262164125,
"grad_norm": 1.1739344769196898,
"learning_rate": 2.7016323801922327e-06,
"loss": 0.5426,
"step": 4730
},
{
"epoch": 0.6884531590413944,
"grad_norm": 1.0908808031509236,
"learning_rate": 2.679147408895349e-06,
"loss": 0.5667,
"step": 4740
},
{
"epoch": 0.6899055918663762,
"grad_norm": 1.1345661062696517,
"learning_rate": 2.6567221031354907e-06,
"loss": 0.5639,
"step": 4750
},
{
"epoch": 0.691358024691358,
"grad_norm": 1.0249096917283105,
"learning_rate": 2.634357039432656e-06,
"loss": 0.5648,
"step": 4760
},
{
"epoch": 0.6928104575163399,
"grad_norm": 1.1583880032183098,
"learning_rate": 2.612052792758095e-06,
"loss": 0.5651,
"step": 4770
},
{
"epoch": 0.6942628903413217,
"grad_norm": 1.069684864764473,
"learning_rate": 2.5898099365195626e-06,
"loss": 0.5722,
"step": 4780
},
{
"epoch": 0.6957153231663036,
"grad_norm": 1.0867414593247826,
"learning_rate": 2.5676290425465496e-06,
"loss": 0.5664,
"step": 4790
},
{
"epoch": 0.6971677559912854,
"grad_norm": 1.1375716473128172,
"learning_rate": 2.5455106810755957e-06,
"loss": 0.5585,
"step": 4800
},
{
"epoch": 0.6986201888162672,
"grad_norm": 1.034623153574018,
"learning_rate": 2.5234554207356266e-06,
"loss": 0.5722,
"step": 4810
},
{
"epoch": 0.7000726216412491,
"grad_norm": 1.0654655922639538,
"learning_rate": 2.5014638285333357e-06,
"loss": 0.5643,
"step": 4820
},
{
"epoch": 0.7015250544662309,
"grad_norm": 1.0988829596394427,
"learning_rate": 2.479536469838606e-06,
"loss": 0.5635,
"step": 4830
},
{
"epoch": 0.7029774872912128,
"grad_norm": 1.050301540250255,
"learning_rate": 2.4576739083699764e-06,
"loss": 0.55,
"step": 4840
},
{
"epoch": 0.7044299201161947,
"grad_norm": 1.3185971209726384,
"learning_rate": 2.43587670618015e-06,
"loss": 0.5686,
"step": 4850
},
{
"epoch": 0.7058823529411765,
"grad_norm": 1.1036440984293434,
"learning_rate": 2.4141454236415428e-06,
"loss": 0.5617,
"step": 4860
},
{
"epoch": 0.7073347857661583,
"grad_norm": 1.0669150287420783,
"learning_rate": 2.392480619431879e-06,
"loss": 0.5416,
"step": 4870
},
{
"epoch": 0.7087872185911401,
"grad_norm": 1.0472161733755885,
"learning_rate": 2.3708828505198265e-06,
"loss": 0.5777,
"step": 4880
},
{
"epoch": 0.710239651416122,
"grad_norm": 1.1252884484776227,
"learning_rate": 2.349352672150681e-06,
"loss": 0.5535,
"step": 4890
},
{
"epoch": 0.7116920842411039,
"grad_norm": 1.1423409076437527,
"learning_rate": 2.3278906378320854e-06,
"loss": 0.5598,
"step": 4900
},
{
"epoch": 0.7131445170660857,
"grad_norm": 0.9801237939355479,
"learning_rate": 2.306497299319814e-06,
"loss": 0.5551,
"step": 4910
},
{
"epoch": 0.7145969498910676,
"grad_norm": 1.0526887175825372,
"learning_rate": 2.285173206603564e-06,
"loss": 0.5683,
"step": 4920
},
{
"epoch": 0.7160493827160493,
"grad_norm": 1.1758853714133906,
"learning_rate": 2.2639189078928453e-06,
"loss": 0.5581,
"step": 4930
},
{
"epoch": 0.7175018155410312,
"grad_norm": 1.107044757903735,
"learning_rate": 2.242734949602856e-06,
"loss": 0.5448,
"step": 4940
},
{
"epoch": 0.7189542483660131,
"grad_norm": 1.2037164103649114,
"learning_rate": 2.2216218763404647e-06,
"loss": 0.5531,
"step": 4950
},
{
"epoch": 0.7204066811909949,
"grad_norm": 1.0588992084011324,
"learning_rate": 2.200580230890188e-06,
"loss": 0.5501,
"step": 4960
},
{
"epoch": 0.7218591140159768,
"grad_norm": 1.2543824405997601,
"learning_rate": 2.17961055420024e-06,
"loss": 0.5769,
"step": 4970
},
{
"epoch": 0.7233115468409586,
"grad_norm": 1.1899069770329052,
"learning_rate": 2.1587133853686422e-06,
"loss": 0.5683,
"step": 4980
},
{
"epoch": 0.7247639796659404,
"grad_norm": 1.144536370052011,
"learning_rate": 2.137889261629334e-06,
"loss": 0.5648,
"step": 4990
},
{
"epoch": 0.7262164124909223,
"grad_norm": 1.1936078152653293,
"learning_rate": 2.1171387183383936e-06,
"loss": 0.5646,
"step": 5000
},
{
"epoch": 0.7276688453159041,
"grad_norm": 1.26324013915445,
"learning_rate": 2.096462288960251e-06,
"loss": 0.5682,
"step": 5010
},
{
"epoch": 0.729121278140886,
"grad_norm": 1.1381437228179463,
"learning_rate": 2.0758605050539836e-06,
"loss": 0.5571,
"step": 5020
},
{
"epoch": 0.7305737109658679,
"grad_norm": 1.3500933515295954,
"learning_rate": 2.0553338962596492e-06,
"loss": 0.5716,
"step": 5030
},
{
"epoch": 0.7320261437908496,
"grad_norm": 1.0940717331908218,
"learning_rate": 2.03488299028467e-06,
"loss": 0.5626,
"step": 5040
},
{
"epoch": 0.7334785766158315,
"grad_norm": 1.1116999445105729,
"learning_rate": 2.0145083128902647e-06,
"loss": 0.5625,
"step": 5050
},
{
"epoch": 0.7349310094408134,
"grad_norm": 1.144025480175903,
"learning_rate": 1.9942103878779335e-06,
"loss": 0.5601,
"step": 5060
},
{
"epoch": 0.7363834422657952,
"grad_norm": 1.0557283567612936,
"learning_rate": 1.9739897370759886e-06,
"loss": 0.5523,
"step": 5070
},
{
"epoch": 0.7378358750907771,
"grad_norm": 1.243995372081041,
"learning_rate": 1.9538468803261514e-06,
"loss": 0.5521,
"step": 5080
},
{
"epoch": 0.739288307915759,
"grad_norm": 1.1122614530495916,
"learning_rate": 1.9337823354701617e-06,
"loss": 0.5615,
"step": 5090
},
{
"epoch": 0.7407407407407407,
"grad_norm": 1.012804702506735,
"learning_rate": 1.913796618336499e-06,
"loss": 0.5514,
"step": 5100
},
{
"epoch": 0.7421931735657226,
"grad_norm": 1.1487569184157758,
"learning_rate": 1.8938902427270905e-06,
"loss": 0.5595,
"step": 5110
},
{
"epoch": 0.7436456063907044,
"grad_norm": 1.222308594990331,
"learning_rate": 1.8740637204041195e-06,
"loss": 0.5645,
"step": 5120
},
{
"epoch": 0.7450980392156863,
"grad_norm": 1.1354476091482255,
"learning_rate": 1.8543175610768715e-06,
"loss": 0.5607,
"step": 5130
},
{
"epoch": 0.7465504720406682,
"grad_norm": 1.2205544178436005,
"learning_rate": 1.83465227238861e-06,
"loss": 0.542,
"step": 5140
},
{
"epoch": 0.7480029048656499,
"grad_norm": 1.2462160753237452,
"learning_rate": 1.8150683599035517e-06,
"loss": 0.5606,
"step": 5150
},
{
"epoch": 0.7494553376906318,
"grad_norm": 1.1396860492016365,
"learning_rate": 1.7955663270938501e-06,
"loss": 0.5689,
"step": 5160
},
{
"epoch": 0.7509077705156136,
"grad_norm": 1.1228524828818305,
"learning_rate": 1.7761466753266598e-06,
"loss": 0.5625,
"step": 5170
},
{
"epoch": 0.7523602033405955,
"grad_norm": 1.1360291736903685,
"learning_rate": 1.7568099038512466e-06,
"loss": 0.5724,
"step": 5180
},
{
"epoch": 0.7538126361655774,
"grad_norm": 1.226701284666325,
"learning_rate": 1.7375565097861518e-06,
"loss": 0.5653,
"step": 5190
},
{
"epoch": 0.7552650689905592,
"grad_norm": 1.1971595467490777,
"learning_rate": 1.7183869881064125e-06,
"loss": 0.5681,
"step": 5200
},
{
"epoch": 0.756717501815541,
"grad_norm": 1.003433379963408,
"learning_rate": 1.6993018316308351e-06,
"loss": 0.5497,
"step": 5210
},
{
"epoch": 0.7581699346405228,
"grad_norm": 1.0677706687056256,
"learning_rate": 1.6803015310093286e-06,
"loss": 0.5663,
"step": 5220
},
{
"epoch": 0.7596223674655047,
"grad_norm": 1.1960572257973088,
"learning_rate": 1.6613865747102876e-06,
"loss": 0.5566,
"step": 5230
},
{
"epoch": 0.7610748002904866,
"grad_norm": 1.1110041512712467,
"learning_rate": 1.6425574490080355e-06,
"loss": 0.5474,
"step": 5240
},
{
"epoch": 0.7625272331154684,
"grad_norm": 1.1953866183465143,
"learning_rate": 1.6238146379703257e-06,
"loss": 0.5602,
"step": 5250
},
{
"epoch": 0.7639796659404503,
"grad_norm": 1.184221410195916,
"learning_rate": 1.6051586234458932e-06,
"loss": 0.558,
"step": 5260
},
{
"epoch": 0.7654320987654321,
"grad_norm": 1.1917994670950118,
"learning_rate": 1.5865898850520671e-06,
"loss": 0.573,
"step": 5270
},
{
"epoch": 0.7668845315904139,
"grad_norm": 1.205079091727242,
"learning_rate": 1.5681089001624488e-06,
"loss": 0.5565,
"step": 5280
},
{
"epoch": 0.7683369644153958,
"grad_norm": 1.0590014592765518,
"learning_rate": 1.5497161438946218e-06,
"loss": 0.5537,
"step": 5290
},
{
"epoch": 0.7697893972403776,
"grad_norm": 1.3045355829406655,
"learning_rate": 1.5314120890979596e-06,
"loss": 0.5608,
"step": 5300
},
{
"epoch": 0.7712418300653595,
"grad_norm": 1.227226173650366,
"learning_rate": 1.5131972063414451e-06,
"loss": 0.563,
"step": 5310
},
{
"epoch": 0.7726942628903414,
"grad_norm": 1.1505400844326525,
"learning_rate": 1.4950719639015987e-06,
"loss": 0.5618,
"step": 5320
},
{
"epoch": 0.7741466957153231,
"grad_norm": 1.1971910791582392,
"learning_rate": 1.4770368277504183e-06,
"loss": 0.5559,
"step": 5330
},
{
"epoch": 0.775599128540305,
"grad_norm": 1.1465426761189066,
"learning_rate": 1.45909226154341e-06,
"loss": 0.5757,
"step": 5340
},
{
"epoch": 0.7770515613652869,
"grad_norm": 1.0530342043982832,
"learning_rate": 1.4412387266076677e-06,
"loss": 0.5699,
"step": 5350
},
{
"epoch": 0.7785039941902687,
"grad_norm": 1.1921772808125664,
"learning_rate": 1.4234766819300106e-06,
"loss": 0.5592,
"step": 5360
},
{
"epoch": 0.7799564270152506,
"grad_norm": 1.1969217401024441,
"learning_rate": 1.4058065841451856e-06,
"loss": 0.5658,
"step": 5370
},
{
"epoch": 0.7814088598402323,
"grad_norm": 1.1371738180522346,
"learning_rate": 1.3882288875241262e-06,
"loss": 0.5523,
"step": 5380
},
{
"epoch": 0.7828612926652142,
"grad_norm": 1.119312116230787,
"learning_rate": 1.3707440439622754e-06,
"loss": 0.5501,
"step": 5390
},
{
"epoch": 0.7843137254901961,
"grad_norm": 1.200972988458609,
"learning_rate": 1.353352502967966e-06,
"loss": 0.5393,
"step": 5400
},
{
"epoch": 0.7857661583151779,
"grad_norm": 1.005244568846047,
"learning_rate": 1.336054711650867e-06,
"loss": 0.5552,
"step": 5410
},
{
"epoch": 0.7872185911401598,
"grad_norm": 0.9811514201367332,
"learning_rate": 1.3188511147104882e-06,
"loss": 0.5615,
"step": 5420
},
{
"epoch": 0.7886710239651417,
"grad_norm": 1.2124333619418073,
"learning_rate": 1.3017421544247466e-06,
"loss": 0.5731,
"step": 5430
},
{
"epoch": 0.7901234567901234,
"grad_norm": 1.0164638888045425,
"learning_rate": 1.2847282706385962e-06,
"loss": 0.5449,
"step": 5440
},
{
"epoch": 0.7915758896151053,
"grad_norm": 1.0692055130184748,
"learning_rate": 1.267809900752725e-06,
"loss": 0.5581,
"step": 5450
},
{
"epoch": 0.7930283224400871,
"grad_norm": 1.2243966381535343,
"learning_rate": 1.2509874797122983e-06,
"loss": 0.5694,
"step": 5460
},
{
"epoch": 0.794480755265069,
"grad_norm": 1.1192058071022615,
"learning_rate": 1.2342614399957952e-06,
"loss": 0.5601,
"step": 5470
},
{
"epoch": 0.7959331880900509,
"grad_norm": 1.210664779695526,
"learning_rate": 1.217632211603868e-06,
"loss": 0.5383,
"step": 5480
},
{
"epoch": 0.7973856209150327,
"grad_norm": 1.2306429782422048,
"learning_rate": 1.2011002220483099e-06,
"loss": 0.5503,
"step": 5490
},
{
"epoch": 0.7988380537400145,
"grad_norm": 1.1449496150562748,
"learning_rate": 1.1846658963410472e-06,
"loss": 0.561,
"step": 5500
},
{
"epoch": 0.8002904865649964,
"grad_norm": 1.1809146975647171,
"learning_rate": 1.168329656983222e-06,
"loss": 0.5489,
"step": 5510
},
{
"epoch": 0.8017429193899782,
"grad_norm": 1.1865786985653701,
"learning_rate": 1.1520919239543272e-06,
"loss": 0.5443,
"step": 5520
},
{
"epoch": 0.8031953522149601,
"grad_norm": 1.2819514449232758,
"learning_rate": 1.1359531147014102e-06,
"loss": 0.5784,
"step": 5530
},
{
"epoch": 0.8046477850399419,
"grad_norm": 1.140249494732679,
"learning_rate": 1.11991364412834e-06,
"loss": 0.5472,
"step": 5540
},
{
"epoch": 0.8061002178649237,
"grad_norm": 1.0963574239357976,
"learning_rate": 1.1039739245851426e-06,
"loss": 0.5614,
"step": 5550
},
{
"epoch": 0.8075526506899056,
"grad_norm": 1.1963836912036798,
"learning_rate": 1.088134365857399e-06,
"loss": 0.5516,
"step": 5560
},
{
"epoch": 0.8090050835148874,
"grad_norm": 1.320400739555157,
"learning_rate": 1.0723953751557098e-06,
"loss": 0.5643,
"step": 5570
},
{
"epoch": 0.8104575163398693,
"grad_norm": 1.2261172403861758,
"learning_rate": 1.0567573571052265e-06,
"loss": 0.545,
"step": 5580
},
{
"epoch": 0.8119099491648512,
"grad_norm": 1.1363072652624087,
"learning_rate": 1.0412207137352504e-06,
"loss": 0.5562,
"step": 5590
},
{
"epoch": 0.813362381989833,
"grad_norm": 1.0696753091917897,
"learning_rate": 1.0257858444688968e-06,
"loss": 0.5584,
"step": 5600
},
{
"epoch": 0.8148148148148148,
"grad_norm": 1.092336652561905,
"learning_rate": 1.0104531461128224e-06,
"loss": 0.5509,
"step": 5610
},
{
"epoch": 0.8162672476397966,
"grad_norm": 1.2190453226296554,
"learning_rate": 9.952230128470358e-07,
"loss": 0.5552,
"step": 5620
},
{
"epoch": 0.8177196804647785,
"grad_norm": 1.1756174285580154,
"learning_rate": 9.800958362147433e-07,
"loss": 0.5611,
"step": 5630
},
{
"epoch": 0.8191721132897604,
"grad_norm": 1.050298389841538,
"learning_rate": 9.65072005112308e-07,
"loss": 0.5536,
"step": 5640
},
{
"epoch": 0.8206245461147422,
"grad_norm": 1.2990174959407426,
"learning_rate": 9.501519057792275e-07,
"loss": 0.5495,
"step": 5650
},
{
"epoch": 0.8220769789397241,
"grad_norm": 1.1318695700100998,
"learning_rate": 9.353359217882241e-07,
"loss": 0.5557,
"step": 5660
},
{
"epoch": 0.8235294117647058,
"grad_norm": 1.1818056539247317,
"learning_rate": 9.206244340353732e-07,
"loss": 0.5703,
"step": 5670
},
{
"epoch": 0.8249818445896877,
"grad_norm": 1.191491253002993,
"learning_rate": 9.060178207303077e-07,
"loss": 0.5543,
"step": 5680
},
{
"epoch": 0.8264342774146696,
"grad_norm": 1.2775803771232788,
"learning_rate": 8.915164573865109e-07,
"loss": 0.5673,
"step": 5690
},
{
"epoch": 0.8278867102396514,
"grad_norm": 1.0993365384271814,
"learning_rate": 8.771207168116407e-07,
"loss": 0.5526,
"step": 5700
},
{
"epoch": 0.8293391430646333,
"grad_norm": 1.2010857578242673,
"learning_rate": 8.628309690979658e-07,
"loss": 0.5465,
"step": 5710
},
{
"epoch": 0.830791575889615,
"grad_norm": 1.1363204888828164,
"learning_rate": 8.486475816128376e-07,
"loss": 0.5522,
"step": 5720
},
{
"epoch": 0.8322440087145969,
"grad_norm": 1.237168492535083,
"learning_rate": 8.345709189892504e-07,
"loss": 0.5377,
"step": 5730
},
{
"epoch": 0.8336964415395788,
"grad_norm": 1.1890926723132464,
"learning_rate": 8.206013431164683e-07,
"loss": 0.5613,
"step": 5740
},
{
"epoch": 0.8351488743645606,
"grad_norm": 1.2611972496063513,
"learning_rate": 8.0673921313072e-07,
"loss": 0.5562,
"step": 5750
},
{
"epoch": 0.8366013071895425,
"grad_norm": 1.1453681982727373,
"learning_rate": 7.929848854059663e-07,
"loss": 0.5469,
"step": 5760
},
{
"epoch": 0.8380537400145244,
"grad_norm": 1.1161546893459802,
"learning_rate": 7.793387135447372e-07,
"loss": 0.5688,
"step": 5770
},
{
"epoch": 0.8395061728395061,
"grad_norm": 1.242951008236561,
"learning_rate": 7.658010483690431e-07,
"loss": 0.5516,
"step": 5780
},
{
"epoch": 0.840958605664488,
"grad_norm": 1.1291848404892897,
"learning_rate": 7.52372237911358e-07,
"loss": 0.5558,
"step": 5790
},
{
"epoch": 0.8424110384894699,
"grad_norm": 1.1344340429459099,
"learning_rate": 7.390526274056625e-07,
"loss": 0.5368,
"step": 5800
},
{
"epoch": 0.8438634713144517,
"grad_norm": 1.2369341276497008,
"learning_rate": 7.25842559278584e-07,
"loss": 0.5438,
"step": 5810
},
{
"epoch": 0.8453159041394336,
"grad_norm": 1.161564478717058,
"learning_rate": 7.127423731405747e-07,
"loss": 0.5524,
"step": 5820
},
{
"epoch": 0.8467683369644154,
"grad_norm": 1.3389378618000198,
"learning_rate": 6.997524057771964e-07,
"loss": 0.5411,
"step": 5830
},
{
"epoch": 0.8482207697893972,
"grad_norm": 1.2324708082947882,
"learning_rate": 6.868729911404582e-07,
"loss": 0.5594,
"step": 5840
},
{
"epoch": 0.8496732026143791,
"grad_norm": 1.0931906751127958,
"learning_rate": 6.741044603402214e-07,
"loss": 0.5394,
"step": 5850
},
{
"epoch": 0.8511256354393609,
"grad_norm": 1.1045798920330345,
"learning_rate": 6.614471416357055e-07,
"loss": 0.5517,
"step": 5860
},
{
"epoch": 0.8525780682643428,
"grad_norm": 1.1003308882789462,
"learning_rate": 6.489013604270277e-07,
"loss": 0.5432,
"step": 5870
},
{
"epoch": 0.8540305010893247,
"grad_norm": 1.1511825195957979,
"learning_rate": 6.364674392468578e-07,
"loss": 0.5543,
"step": 5880
},
{
"epoch": 0.8554829339143064,
"grad_norm": 1.1016772920186344,
"learning_rate": 6.241456977521115e-07,
"loss": 0.5511,
"step": 5890
},
{
"epoch": 0.8569353667392883,
"grad_norm": 1.2345711604547172,
"learning_rate": 6.119364527157401e-07,
"loss": 0.5546,
"step": 5900
},
{
"epoch": 0.8583877995642701,
"grad_norm": 1.1026866190660687,
"learning_rate": 5.998400180185838e-07,
"loss": 0.5534,
"step": 5910
},
{
"epoch": 0.859840232389252,
"grad_norm": 1.0696348901565953,
"learning_rate": 5.878567046413025e-07,
"loss": 0.5431,
"step": 5920
},
{
"epoch": 0.8612926652142339,
"grad_norm": 1.074925388402079,
"learning_rate": 5.759868206563834e-07,
"loss": 0.5564,
"step": 5930
},
{
"epoch": 0.8627450980392157,
"grad_norm": 1.1892355845709555,
"learning_rate": 5.642306712202183e-07,
"loss": 0.56,
"step": 5940
},
{
"epoch": 0.8641975308641975,
"grad_norm": 1.1714018297678883,
"learning_rate": 5.525885585652591e-07,
"loss": 0.5477,
"step": 5950
},
{
"epoch": 0.8656499636891793,
"grad_norm": 1.2243789216177572,
"learning_rate": 5.410607819922481e-07,
"loss": 0.5561,
"step": 5960
},
{
"epoch": 0.8671023965141612,
"grad_norm": 1.158429282768604,
"learning_rate": 5.296476378625237e-07,
"loss": 0.5246,
"step": 5970
},
{
"epoch": 0.8685548293391431,
"grad_norm": 1.2064879125921322,
"learning_rate": 5.183494195904015e-07,
"loss": 0.5434,
"step": 5980
},
{
"epoch": 0.8700072621641249,
"grad_norm": 1.0370084252960212,
"learning_rate": 5.071664176356294e-07,
"loss": 0.556,
"step": 5990
},
{
"epoch": 0.8714596949891068,
"grad_norm": 1.1529022886105922,
"learning_rate": 4.960989194959225e-07,
"loss": 0.5349,
"step": 6000
},
{
"epoch": 0.8729121278140886,
"grad_norm": 1.0702466803229502,
"learning_rate": 4.851472096995741e-07,
"loss": 0.5641,
"step": 6010
},
{
"epoch": 0.8743645606390704,
"grad_norm": 1.195504112892932,
"learning_rate": 4.7431156979813097e-07,
"loss": 0.5627,
"step": 6020
},
{
"epoch": 0.8758169934640523,
"grad_norm": 1.0424744381436926,
"learning_rate": 4.6359227835916954e-07,
"loss": 0.5457,
"step": 6030
},
{
"epoch": 0.8772694262890341,
"grad_norm": 1.136106426677912,
"learning_rate": 4.529896109591203e-07,
"loss": 0.5536,
"step": 6040
},
{
"epoch": 0.878721859114016,
"grad_norm": 1.1941194023099557,
"learning_rate": 4.425038401761961e-07,
"loss": 0.5512,
"step": 6050
},
{
"epoch": 0.8801742919389978,
"grad_norm": 1.1005592964409183,
"learning_rate": 4.3213523558337354e-07,
"loss": 0.5522,
"step": 6060
},
{
"epoch": 0.8816267247639796,
"grad_norm": 1.3046172497671011,
"learning_rate": 4.218840637414695e-07,
"loss": 0.5389,
"step": 6070
},
{
"epoch": 0.8830791575889615,
"grad_norm": 1.2050786337197097,
"learning_rate": 4.117505881922856e-07,
"loss": 0.5637,
"step": 6080
},
{
"epoch": 0.8845315904139434,
"grad_norm": 1.1086711189663023,
"learning_rate": 4.0173506945183295e-07,
"loss": 0.5637,
"step": 6090
},
{
"epoch": 0.8859840232389252,
"grad_norm": 1.142760086036647,
"learning_rate": 3.9183776500363593e-07,
"loss": 0.5639,
"step": 6100
},
{
"epoch": 0.8874364560639071,
"grad_norm": 1.211597985547058,
"learning_rate": 3.8205892929211175e-07,
"loss": 0.5534,
"step": 6110
},
{
"epoch": 0.8888888888888888,
"grad_norm": 1.125094111731544,
"learning_rate": 3.7239881371603005e-07,
"loss": 0.5514,
"step": 6120
},
{
"epoch": 0.8903413217138707,
"grad_norm": 1.1253410539349802,
"learning_rate": 3.6285766662204735e-07,
"loss": 0.5593,
"step": 6130
},
{
"epoch": 0.8917937545388526,
"grad_norm": 1.076054931723469,
"learning_rate": 3.534357332983257e-07,
"loss": 0.5494,
"step": 6140
},
{
"epoch": 0.8932461873638344,
"grad_norm": 1.2433138382241562,
"learning_rate": 3.441332559682242e-07,
"loss": 0.5507,
"step": 6150
},
{
"epoch": 0.8946986201888163,
"grad_norm": 1.172111145318429,
"learning_rate": 3.349504737840742e-07,
"loss": 0.5632,
"step": 6160
},
{
"epoch": 0.8961510530137982,
"grad_norm": 1.2018077073853302,
"learning_rate": 3.258876228210267e-07,
"loss": 0.5381,
"step": 6170
},
{
"epoch": 0.8976034858387799,
"grad_norm": 1.1218901853415595,
"learning_rate": 3.169449360709914e-07,
"loss": 0.5651,
"step": 6180
},
{
"epoch": 0.8990559186637618,
"grad_norm": 1.075452696669577,
"learning_rate": 3.0812264343663467e-07,
"loss": 0.5518,
"step": 6190
},
{
"epoch": 0.9005083514887436,
"grad_norm": 1.2898875627777047,
"learning_rate": 2.99420971725482e-07,
"loss": 0.5535,
"step": 6200
},
{
"epoch": 0.9019607843137255,
"grad_norm": 1.064409341720963,
"learning_rate": 2.9084014464407837e-07,
"loss": 0.551,
"step": 6210
},
{
"epoch": 0.9034132171387074,
"grad_norm": 1.1430289990560287,
"learning_rate": 2.8238038279224e-07,
"loss": 0.5351,
"step": 6220
},
{
"epoch": 0.9048656499636892,
"grad_norm": 1.0942084433621513,
"learning_rate": 2.740419036573844e-07,
"loss": 0.5628,
"step": 6230
},
{
"epoch": 0.906318082788671,
"grad_norm": 1.1827726416299507,
"learning_rate": 2.6582492160893536e-07,
"loss": 0.5698,
"step": 6240
},
{
"epoch": 0.9077705156136529,
"grad_norm": 1.0512203056975564,
"learning_rate": 2.5772964789281593e-07,
"loss": 0.539,
"step": 6250
},
{
"epoch": 0.9092229484386347,
"grad_norm": 1.177449766279641,
"learning_rate": 2.4975629062601534e-07,
"loss": 0.5475,
"step": 6260
},
{
"epoch": 0.9106753812636166,
"grad_norm": 1.2124754199233574,
"learning_rate": 2.419050547912388e-07,
"loss": 0.541,
"step": 6270
},
{
"epoch": 0.9121278140885984,
"grad_norm": 1.3580937630552576,
"learning_rate": 2.3417614223163908e-07,
"loss": 0.5588,
"step": 6280
},
{
"epoch": 0.9135802469135802,
"grad_norm": 1.1170472146222037,
"learning_rate": 2.26569751645625e-07,
"loss": 0.5436,
"step": 6290
},
{
"epoch": 0.9150326797385621,
"grad_norm": 1.1184802548299553,
"learning_rate": 2.1908607858175612e-07,
"loss": 0.5377,
"step": 6300
},
{
"epoch": 0.9164851125635439,
"grad_norm": 1.1396702009546613,
"learning_rate": 2.117253154337118e-07,
"loss": 0.5683,
"step": 6310
},
{
"epoch": 0.9179375453885258,
"grad_norm": 1.2119088736658123,
"learning_rate": 2.0448765143534942e-07,
"loss": 0.5668,
"step": 6320
},
{
"epoch": 0.9193899782135077,
"grad_norm": 1.0448734314632342,
"learning_rate": 1.973732726558364e-07,
"loss": 0.5437,
"step": 6330
},
{
"epoch": 0.9208424110384895,
"grad_norm": 1.2851112602098311,
"learning_rate": 1.9038236199486693e-07,
"loss": 0.5622,
"step": 6340
},
{
"epoch": 0.9222948438634713,
"grad_norm": 1.1700640178574329,
"learning_rate": 1.8351509917796218e-07,
"loss": 0.542,
"step": 6350
},
{
"epoch": 0.9237472766884531,
"grad_norm": 1.1416778336018678,
"learning_rate": 1.7677166075184548e-07,
"loss": 0.5529,
"step": 6360
},
{
"epoch": 0.925199709513435,
"grad_norm": 1.1230308913216087,
"learning_rate": 1.7015222007990883e-07,
"loss": 0.5559,
"step": 6370
},
{
"epoch": 0.9266521423384169,
"grad_norm": 1.1568250466964043,
"learning_rate": 1.6365694733775305e-07,
"loss": 0.5507,
"step": 6380
},
{
"epoch": 0.9281045751633987,
"grad_norm": 1.1602815569402067,
"learning_rate": 1.572860095088108e-07,
"loss": 0.552,
"step": 6390
},
{
"epoch": 0.9295570079883806,
"grad_norm": 1.0423401424679095,
"learning_rate": 1.5103957038005935e-07,
"loss": 0.5446,
"step": 6400
},
{
"epoch": 0.9310094408133623,
"grad_norm": 1.1374874233890928,
"learning_rate": 1.4491779053780298e-07,
"loss": 0.5473,
"step": 6410
},
{
"epoch": 0.9324618736383442,
"grad_norm": 1.1755709384042587,
"learning_rate": 1.3892082736355283e-07,
"loss": 0.5486,
"step": 6420
},
{
"epoch": 0.9339143064633261,
"grad_norm": 1.1744643775241368,
"learning_rate": 1.3304883502997133e-07,
"loss": 0.5518,
"step": 6430
},
{
"epoch": 0.9353667392883079,
"grad_norm": 1.1216236591765696,
"learning_rate": 1.2730196449691756e-07,
"loss": 0.5492,
"step": 6440
},
{
"epoch": 0.9368191721132898,
"grad_norm": 1.1470393369010776,
"learning_rate": 1.2168036350755975e-07,
"loss": 0.5322,
"step": 6450
},
{
"epoch": 0.9382716049382716,
"grad_norm": 1.1985354195876317,
"learning_rate": 1.1618417658458003e-07,
"loss": 0.5616,
"step": 6460
},
{
"epoch": 0.9397240377632534,
"grad_norm": 1.1475497479759824,
"learning_rate": 1.1081354502645913e-07,
"loss": 0.5531,
"step": 6470
},
{
"epoch": 0.9411764705882353,
"grad_norm": 1.1396353932104606,
"learning_rate": 1.0556860690384252e-07,
"loss": 0.5472,
"step": 6480
},
{
"epoch": 0.9426289034132171,
"grad_norm": 1.1215848254083782,
"learning_rate": 1.0044949705599216e-07,
"loss": 0.5429,
"step": 6490
},
{
"epoch": 0.944081336238199,
"grad_norm": 1.005591582016032,
"learning_rate": 9.545634708731988e-08,
"loss": 0.5418,
"step": 6500
},
{
"epoch": 0.9455337690631809,
"grad_norm": 1.215225242394237,
"learning_rate": 9.058928536400058e-08,
"loss": 0.5578,
"step": 6510
},
{
"epoch": 0.9469862018881626,
"grad_norm": 1.152537711229488,
"learning_rate": 8.584843701067935e-08,
"loss": 0.5404,
"step": 6520
},
{
"epoch": 0.9484386347131445,
"grad_norm": 1.175848365037797,
"learning_rate": 8.123392390724682e-08,
"loss": 0.5522,
"step": 6530
},
{
"epoch": 0.9498910675381264,
"grad_norm": 1.0183498527962453,
"learning_rate": 7.674586468570999e-08,
"loss": 0.5564,
"step": 6540
},
{
"epoch": 0.9513435003631082,
"grad_norm": 1.2151729065782833,
"learning_rate": 7.238437472714466e-08,
"loss": 0.5561,
"step": 6550
},
{
"epoch": 0.9527959331880901,
"grad_norm": 1.1402236462651618,
"learning_rate": 6.81495661587217e-08,
"loss": 0.5411,
"step": 6560
},
{
"epoch": 0.954248366013072,
"grad_norm": 1.1521868862152016,
"learning_rate": 6.404154785083383e-08,
"loss": 0.5539,
"step": 6570
},
{
"epoch": 0.9557007988380537,
"grad_norm": 1.1258302178296054,
"learning_rate": 6.006042541428669e-08,
"loss": 0.5532,
"step": 6580
},
{
"epoch": 0.9571532316630356,
"grad_norm": 1.173412519187008,
"learning_rate": 5.6206301197594404e-08,
"loss": 0.5505,
"step": 6590
},
{
"epoch": 0.9586056644880174,
"grad_norm": 1.136513704911577,
"learning_rate": 5.247927428433885e-08,
"loss": 0.5435,
"step": 6600
},
{
"epoch": 0.9600580973129993,
"grad_norm": 1.1972723133655234,
"learning_rate": 4.887944049062843e-08,
"loss": 0.548,
"step": 6610
},
{
"epoch": 0.9615105301379812,
"grad_norm": 1.240930781464282,
"learning_rate": 4.5406892362632185e-08,
"loss": 0.5538,
"step": 6620
},
{
"epoch": 0.9629629629629629,
"grad_norm": 1.2645184421648727,
"learning_rate": 4.206171917420121e-08,
"loss": 0.5616,
"step": 6630
},
{
"epoch": 0.9644153957879448,
"grad_norm": 1.1619344530688336,
"learning_rate": 3.884400692457435e-08,
"loss": 0.5578,
"step": 6640
},
{
"epoch": 0.9658678286129266,
"grad_norm": 1.0415045949293107,
"learning_rate": 3.575383833616497e-08,
"loss": 0.536,
"step": 6650
},
{
"epoch": 0.9673202614379085,
"grad_norm": 1.1707683296063809,
"learning_rate": 3.2791292852437096e-08,
"loss": 0.5444,
"step": 6660
},
{
"epoch": 0.9687726942628904,
"grad_norm": 0.9579807050337852,
"learning_rate": 2.99564466358615e-08,
"loss": 0.5604,
"step": 6670
},
{
"epoch": 0.9702251270878722,
"grad_norm": 1.155540906901066,
"learning_rate": 2.7249372565957277e-08,
"loss": 0.5495,
"step": 6680
},
{
"epoch": 0.971677559912854,
"grad_norm": 1.0959456715901421,
"learning_rate": 2.4670140237419428e-08,
"loss": 0.5483,
"step": 6690
},
{
"epoch": 0.9731299927378358,
"grad_norm": 1.0366185075689953,
"learning_rate": 2.2218815958329754e-08,
"loss": 0.5497,
"step": 6700
},
{
"epoch": 0.9745824255628177,
"grad_norm": 1.0759294981597065,
"learning_rate": 1.9895462748450444e-08,
"loss": 0.5634,
"step": 6710
},
{
"epoch": 0.9760348583877996,
"grad_norm": 1.1209995693338786,
"learning_rate": 1.770014033760592e-08,
"loss": 0.5508,
"step": 6720
},
{
"epoch": 0.9774872912127814,
"grad_norm": 1.210238366549934,
"learning_rate": 1.5632905164145173e-08,
"loss": 0.5813,
"step": 6730
},
{
"epoch": 0.9789397240377633,
"grad_norm": 1.15542524575641,
"learning_rate": 1.3693810373494598e-08,
"loss": 0.5421,
"step": 6740
},
{
"epoch": 0.9803921568627451,
"grad_norm": 1.194050906215969,
"learning_rate": 1.188290581678575e-08,
"loss": 0.5586,
"step": 6750
},
{
"epoch": 0.9818445896877269,
"grad_norm": 1.1566645017111077,
"learning_rate": 1.0200238049580258e-08,
"loss": 0.5632,
"step": 6760
},
{
"epoch": 0.9832970225127088,
"grad_norm": 1.0710546930410338,
"learning_rate": 8.645850330668559e-09,
"loss": 0.5368,
"step": 6770
},
{
"epoch": 0.9847494553376906,
"grad_norm": 1.175731861197897,
"learning_rate": 7.219782620958571e-09,
"loss": 0.5388,
"step": 6780
},
{
"epoch": 0.9862018881626725,
"grad_norm": 1.0791848418311811,
"learning_rate": 5.922071582449285e-09,
"loss": 0.5585,
"step": 6790
},
{
"epoch": 0.9876543209876543,
"grad_norm": 1.21651622954666,
"learning_rate": 4.752750577288745e-09,
"loss": 0.5603,
"step": 6800
},
{
"epoch": 0.9891067538126361,
"grad_norm": 1.294701087862953,
"learning_rate": 3.711849666914735e-09,
"loss": 0.5713,
"step": 6810
},
{
"epoch": 0.990559186637618,
"grad_norm": 1.100757408335571,
"learning_rate": 2.799395611281508e-09,
"loss": 0.5587,
"step": 6820
},
{
"epoch": 0.9920116194625999,
"grad_norm": 1.282263624241459,
"learning_rate": 2.0154118681753322e-09,
"loss": 0.5588,
"step": 6830
},
{
"epoch": 0.9934640522875817,
"grad_norm": 1.0975199346392859,
"learning_rate": 1.3599185926072012e-09,
"loss": 0.5724,
"step": 6840
},
{
"epoch": 0.9949164851125636,
"grad_norm": 1.1620574281790235,
"learning_rate": 8.329326362976897e-10,
"loss": 0.5621,
"step": 6850
},
{
"epoch": 0.9963689179375453,
"grad_norm": 1.1717561623715795,
"learning_rate": 4.34467547242301e-10,
"loss": 0.5506,
"step": 6860
},
{
"epoch": 0.9978213507625272,
"grad_norm": 1.155270191238308,
"learning_rate": 1.645335693623018e-10,
"loss": 0.5533,
"step": 6870
},
{
"epoch": 0.9992737835875091,
"grad_norm": 1.240301119345841,
"learning_rate": 2.3137642244375202e-11,
"loss": 0.5538,
"step": 6880
},
{
"epoch": 1.0,
"step": 6885,
"total_flos": 1942329112002560.0,
"train_loss": 0.5927019230420812,
"train_runtime": 56356.5973,
"train_samples_per_second": 1.955,
"train_steps_per_second": 0.122
}
],
"logging_steps": 10,
"max_steps": 6885,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1942329112002560.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}