Training in progress, step 16300, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +704 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e34874bc5d610e79a943aa178d84773a68b730b29bd1b8283a91a8c53b1970ff
 size 340808816

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad22cbf1fcb8571501c12f895012cee7abbe6eee1012214b48c079a3d8a18d24
 size 340808816

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b656ff4c57cb328eab480566bbf3e4113649555f2693d5ea3c499adf2b39a470
 size 173247691

 version https://git-lfs.github.com/spec/v1
+oid sha256:a469a6f0e3e6af8ccbac47588ca951c29baef9062f65ae65783830c17aa52835
 size 173247691

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:64f901e3dd85487be972809864a9546772f7d63c315505a0aa325ee6514a89c3
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:7108ab94f8617d52d176b95881a14724face9a9e73a376ff8d0ba2273d9232f4
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 19.87730061349693,
   "eval_steps": 500,
-  "global_step": 16200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -113408,6 +113408,706 @@
       "learning_rate": 5.139489109923234e-09,
       "loss": 0.7659933567047119,
       "step": 16200
     }
   ],
   "logging_steps": 1,
@@ -113422,12 +114122,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.531283788994445e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 20.0,
   "eval_steps": 500,
+  "global_step": 16300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.139489109923234e-09,
       "loss": 0.7659933567047119,
       "step": 16200
+    },
+    {
+      "epoch": 19.878527607361963,
+      "grad_norm": 0.31444302201271057,
+      "learning_rate": 5.038224271980419e-09,
+      "loss": 0.674436092376709,
+      "step": 16201
+    },
+    {
+      "epoch": 19.879754601226995,
+      "grad_norm": 0.2814277112483978,
+      "learning_rate": 4.937966909657199e-09,
+      "loss": 0.6168652772903442,
+      "step": 16202
+    },
+    {
+      "epoch": 19.880981595092024,
+      "grad_norm": 0.29662981629371643,
+      "learning_rate": 4.838717026997563e-09,
+      "loss": 0.5741356015205383,
+      "step": 16203
+    },
+    {
+      "epoch": 19.882208588957056,
+      "grad_norm": 0.25058355927467346,
+      "learning_rate": 4.74047462800109e-09,
+      "loss": 0.5623442530632019,
+      "step": 16204
+    },
+    {
+      "epoch": 19.883435582822084,
+      "grad_norm": 0.3032751977443695,
+      "learning_rate": 4.6432397166285e-09,
+      "loss": 0.7177292108535767,
+      "step": 16205
+    },
+    {
+      "epoch": 19.884662576687116,
+      "grad_norm": 0.2613092064857483,
+      "learning_rate": 4.547012296796105e-09,
+      "loss": 0.705600380897522,
+      "step": 16206
+    },
+    {
+      "epoch": 19.88588957055215,
+      "grad_norm": 0.25566813349723816,
+      "learning_rate": 4.4517923723841335e-09,
+      "loss": 0.5601306557655334,
+      "step": 16207
+    },
+    {
+      "epoch": 19.887116564417177,
+      "grad_norm": 0.23225629329681396,
+      "learning_rate": 4.357579947233959e-09,
+      "loss": 0.539310097694397,
+      "step": 16208
+    },
+    {
+      "epoch": 19.88834355828221,
+      "grad_norm": 0.26849034428596497,
+      "learning_rate": 4.26437502513699e-09,
+      "loss": 0.6267234683036804,
+      "step": 16209
+    },
+    {
+      "epoch": 19.88957055214724,
+      "grad_norm": 0.29642733931541443,
+      "learning_rate": 4.172177609854111e-09,
+      "loss": 0.7983720302581787,
+      "step": 16210
+    },
+    {
+      "epoch": 19.89079754601227,
+      "grad_norm": 0.3397233784198761,
+      "learning_rate": 4.080987705099016e-09,
+      "loss": 0.4387090802192688,
+      "step": 16211
+    },
+    {
+      "epoch": 19.8920245398773,
+      "grad_norm": 0.24321438372135162,
+      "learning_rate": 3.990805314549318e-09,
+      "loss": 0.44809988141059875,
+      "step": 16212
+    },
+    {
+      "epoch": 19.89325153374233,
+      "grad_norm": 0.3020300567150116,
+      "learning_rate": 3.901630441840998e-09,
+      "loss": 0.6333901882171631,
+      "step": 16213
+    },
+    {
+      "epoch": 19.89447852760736,
+      "grad_norm": 0.23879368603229523,
+      "learning_rate": 3.8134630905656274e-09,
+      "loss": 0.3145845830440521,
+      "step": 16214
+    },
+    {
+      "epoch": 19.895705521472394,
+      "grad_norm": 0.281195729970932,
+      "learning_rate": 3.726303264278696e-09,
+      "loss": 0.4267631769180298,
+      "step": 16215
+    },
+    {
+      "epoch": 19.896932515337422,
+      "grad_norm": 0.2915148437023163,
+      "learning_rate": 3.6401509664912848e-09,
+      "loss": 0.7021991014480591,
+      "step": 16216
+    },
+    {
+      "epoch": 19.898159509202454,
+      "grad_norm": 0.2826932966709137,
+      "learning_rate": 3.5550062006811656e-09,
+      "loss": 0.6565602421760559,
+      "step": 16217
+    },
+    {
+      "epoch": 19.899386503067486,
+      "grad_norm": 0.2678978443145752,
+      "learning_rate": 3.4708689702733776e-09,
+      "loss": 0.49907374382019043,
+      "step": 16218
+    },
+    {
+      "epoch": 19.900613496932515,
+      "grad_norm": 0.2627774178981781,
+      "learning_rate": 3.3877392786624273e-09,
+      "loss": 0.5407178401947021,
+      "step": 16219
+    },
+    {
+      "epoch": 19.901840490797547,
+      "grad_norm": 0.28935369849205017,
+      "learning_rate": 3.3056171292011882e-09,
+      "loss": 0.6748589873313904,
+      "step": 16220
+    },
+    {
+      "epoch": 19.903067484662575,
+      "grad_norm": 0.27842453122138977,
+      "learning_rate": 3.2245025251953496e-09,
+      "loss": 0.7646443843841553,
+      "step": 16221
+    },
+    {
+      "epoch": 19.904294478527607,
+      "grad_norm": 0.2733169496059418,
+      "learning_rate": 3.1443954699200694e-09,
+      "loss": 0.6724434494972229,
+      "step": 16222
+    },
+    {
+      "epoch": 19.90552147239264,
+      "grad_norm": 0.29514604806900024,
+      "learning_rate": 3.06529596659777e-09,
+      "loss": 0.5396702289581299,
+      "step": 16223
+    },
+    {
+      "epoch": 19.906748466257667,
+      "grad_norm": 0.2790028750896454,
+      "learning_rate": 2.987204018420342e-09,
+      "loss": 0.736526608467102,
+      "step": 16224
+    },
+    {
+      "epoch": 19.9079754601227,
+      "grad_norm": 0.2966996729373932,
+      "learning_rate": 2.9101196285352684e-09,
+      "loss": 0.8032228946685791,
+      "step": 16225
+    },
+    {
+      "epoch": 19.90920245398773,
+      "grad_norm": 0.24839043617248535,
+      "learning_rate": 2.834042800051173e-09,
+      "loss": 0.37957412004470825,
+      "step": 16226
+    },
+    {
+      "epoch": 19.91042944785276,
+      "grad_norm": 0.2736720144748688,
+      "learning_rate": 2.7589735360322724e-09,
+      "loss": 0.7692750096321106,
+      "step": 16227
+    },
+    {
+      "epoch": 19.911656441717792,
+      "grad_norm": 0.252001017332077,
+      "learning_rate": 2.684911839503923e-09,
+      "loss": 0.5340638160705566,
+      "step": 16228
+    },
+    {
+      "epoch": 19.91288343558282,
+      "grad_norm": 0.26210591197013855,
+      "learning_rate": 2.6118577134498503e-09,
+      "loss": 0.5110554695129395,
+      "step": 16229
+    },
+    {
+      "epoch": 19.914110429447852,
+      "grad_norm": 0.30329135060310364,
+      "learning_rate": 2.5398111608204713e-09,
+      "loss": 0.7488691806793213,
+      "step": 16230
+    },
+    {
+      "epoch": 19.915337423312884,
+      "grad_norm": 0.28727856278419495,
+      "learning_rate": 2.4687721845162437e-09,
+      "loss": 0.6793022155761719,
+      "step": 16231
+    },
+    {
+      "epoch": 19.916564417177913,
+      "grad_norm": 0.2557753026485443,
+      "learning_rate": 2.3987407873987675e-09,
+      "loss": 0.4969423711299896,
+      "step": 16232
+    },
+    {
+      "epoch": 19.917791411042945,
+      "grad_norm": 0.25132516026496887,
+      "learning_rate": 2.329716972293561e-09,
+      "loss": 0.5655540823936462,
+      "step": 16233
+    },
+    {
+      "epoch": 19.919018404907977,
+      "grad_norm": 0.3251507580280304,
+      "learning_rate": 2.2617007419817313e-09,
+      "loss": 0.7593796253204346,
+      "step": 16234
+    },
+    {
+      "epoch": 19.920245398773005,
+      "grad_norm": 0.2656572759151459,
+      "learning_rate": 2.1946920992027553e-09,
+      "loss": 0.6801402568817139,
+      "step": 16235
+    },
+    {
+      "epoch": 19.921472392638037,
+      "grad_norm": 0.26032915711402893,
+      "learning_rate": 2.1286910466628006e-09,
+      "loss": 0.5409698486328125,
+      "step": 16236
+    },
+    {
+      "epoch": 19.92269938650307,
+      "grad_norm": 0.25649771094322205,
+      "learning_rate": 2.0636975870180764e-09,
+      "loss": 0.42548850178718567,
+      "step": 16237
+    },
+    {
+      "epoch": 19.923926380368098,
+      "grad_norm": 0.28882917761802673,
+      "learning_rate": 1.999711722891484e-09,
+      "loss": 0.6696641445159912,
+      "step": 16238
+    },
+    {
+      "epoch": 19.92515337423313,
+      "grad_norm": 0.25472167134284973,
+      "learning_rate": 1.936733456855966e-09,
+      "loss": 0.5762321949005127,
+      "step": 16239
+    },
+    {
+      "epoch": 19.926380368098158,
+      "grad_norm": 0.24897408485412598,
+      "learning_rate": 1.8747627914567077e-09,
+      "loss": 0.606791615486145,
+      "step": 16240
+    },
+    {
+      "epoch": 19.92760736196319,
+      "grad_norm": 0.30643516778945923,
+      "learning_rate": 1.8137997291889363e-09,
+      "loss": 0.605108380317688,
+      "step": 16241
+    },
+    {
+      "epoch": 19.928834355828222,
+      "grad_norm": 0.3065126836299896,
+      "learning_rate": 1.7538442725090198e-09,
+      "loss": 0.6616383790969849,
+      "step": 16242
+    },
+    {
+      "epoch": 19.93006134969325,
+      "grad_norm": 0.27115145325660706,
+      "learning_rate": 1.694896423834469e-09,
+      "loss": 0.7380319237709045,
+      "step": 16243
+    },
+    {
+      "epoch": 19.931288343558283,
+      "grad_norm": 0.2318820357322693,
+      "learning_rate": 1.6369561855411608e-09,
+      "loss": 0.47997498512268066,
+      "step": 16244
+    },
+    {
+      "epoch": 19.93251533742331,
+      "grad_norm": 0.2823431193828583,
+      "learning_rate": 1.580023559966115e-09,
+      "loss": 0.7115047574043274,
+      "step": 16245
+    },
+    {
+      "epoch": 19.933742331288343,
+      "grad_norm": 0.30200645327568054,
+      "learning_rate": 1.5240985493991667e-09,
+      "loss": 0.7860561609268188,
+      "step": 16246
+    },
+    {
+      "epoch": 19.934969325153375,
+      "grad_norm": 0.2997346520423889,
+      "learning_rate": 1.4691811560996193e-09,
+      "loss": 0.6527193784713745,
+      "step": 16247
+    },
+    {
+      "epoch": 19.936196319018403,
+      "grad_norm": 0.24736203253269196,
+      "learning_rate": 1.4152713822795927e-09,
+      "loss": 0.5359556674957275,
+      "step": 16248
+    },
+    {
+      "epoch": 19.937423312883435,
+      "grad_norm": 0.26944491267204285,
+      "learning_rate": 1.362369230112348e-09,
+      "loss": 0.5682604312896729,
+      "step": 16249
+    },
+    {
+      "epoch": 19.938650306748468,
+      "grad_norm": 0.2371739149093628,
+      "learning_rate": 1.3104747017295139e-09,
+      "loss": 0.5160844326019287,
+      "step": 16250
+    },
+    {
+      "epoch": 19.939877300613496,
+      "grad_norm": 0.28414422273635864,
+      "learning_rate": 1.2595877992238603e-09,
+      "loss": 0.6134630441665649,
+      "step": 16251
+    },
+    {
+      "epoch": 19.941104294478528,
+      "grad_norm": 0.2545883059501648,
+      "learning_rate": 1.209708524643749e-09,
+      "loss": 0.3812911808490753,
+      "step": 16252
+    },
+    {
+      "epoch": 19.94233128834356,
+      "grad_norm": 0.24534358084201813,
+      "learning_rate": 1.160836880001459e-09,
+      "loss": 0.4503448009490967,
+      "step": 16253
+    },
+    {
+      "epoch": 19.94355828220859,
+      "grad_norm": 0.3093093931674957,
+      "learning_rate": 1.1129728672676364e-09,
+      "loss": 0.614890456199646,
+      "step": 16254
+    },
+    {
+      "epoch": 19.94478527607362,
+      "grad_norm": 0.2969963550567627,
+      "learning_rate": 1.0661164883712937e-09,
+      "loss": 0.6617600917816162,
+      "step": 16255
+    },
+    {
+      "epoch": 19.94601226993865,
+      "grad_norm": 0.24619616568088531,
+      "learning_rate": 1.02026774519981e-09,
+      "loss": 0.61571204662323,
+      "step": 16256
+    },
+    {
+      "epoch": 19.94723926380368,
+      "grad_norm": 0.2435264140367508,
+      "learning_rate": 9.754266396017065e-10,
+      "loss": 0.44566264748573303,
+      "step": 16257
+    },
+    {
+      "epoch": 19.948466257668713,
+      "grad_norm": 0.290048748254776,
+      "learning_rate": 9.315931733866467e-10,
+      "loss": 0.7629603743553162,
+      "step": 16258
+    },
+    {
+      "epoch": 19.94969325153374,
+      "grad_norm": 0.2462032437324524,
+      "learning_rate": 8.887673483171099e-10,
+      "loss": 0.3848365545272827,
+      "step": 16259
+    },
+    {
+      "epoch": 19.950920245398773,
+      "grad_norm": 0.2963328957557678,
+      "learning_rate": 8.469491661222684e-10,
+      "loss": 0.7475588321685791,
+      "step": 16260
+    },
+    {
+      "epoch": 19.952147239263805,
+      "grad_norm": 0.27099505066871643,
+      "learning_rate": 8.06138628489661e-10,
+      "loss": 0.49332553148269653,
+      "step": 16261
+    },
+    {
+      "epoch": 19.953374233128834,
+      "grad_norm": 0.25060924887657166,
+      "learning_rate": 7.663357370596425e-10,
+      "loss": 0.5212326049804688,
+      "step": 16262
+    },
+    {
+      "epoch": 19.954601226993866,
+      "grad_norm": 0.2802629768848419,
+      "learning_rate": 7.275404934364849e-10,
+      "loss": 0.6265736222267151,
+      "step": 16263
+    },
+    {
+      "epoch": 19.955828220858894,
+      "grad_norm": 0.24711941182613373,
+      "learning_rate": 6.897528991883784e-10,
+      "loss": 0.6305452585220337,
+      "step": 16264
+    },
+    {
+      "epoch": 19.957055214723926,
+      "grad_norm": 0.2828010320663452,
+      "learning_rate": 6.529729558363284e-10,
+      "loss": 0.5041950941085815,
+      "step": 16265
+    },
+    {
+      "epoch": 19.958282208588958,
+      "grad_norm": 0.26520174741744995,
+      "learning_rate": 6.172006648624828e-10,
+      "loss": 0.7630486488342285,
+      "step": 16266
+    },
+    {
+      "epoch": 19.959509202453987,
+      "grad_norm": 0.22848939895629883,
+      "learning_rate": 5.824360277073559e-10,
+      "loss": 0.39715874195098877,
+      "step": 16267
+    },
+    {
+      "epoch": 19.96073619631902,
+      "grad_norm": 0.25177448987960815,
+      "learning_rate": 5.486790457753799e-10,
+      "loss": 0.4023863971233368,
+      "step": 16268
+    },
+    {
+      "epoch": 19.96196319018405,
+      "grad_norm": 0.29539355635643005,
+      "learning_rate": 5.159297204238023e-10,
+      "loss": 0.5298452377319336,
+      "step": 16269
+    },
+    {
+      "epoch": 19.96319018404908,
+      "grad_norm": 0.2694055438041687,
+      "learning_rate": 4.841880529765641e-10,
+      "loss": 0.45743635296821594,
+      "step": 16270
+    },
+    {
+      "epoch": 19.96441717791411,
+      "grad_norm": 0.25338271260261536,
+      "learning_rate": 4.534540447076463e-10,
+      "loss": 0.5844091176986694,
+      "step": 16271
+    },
+    {
+      "epoch": 19.96564417177914,
+      "grad_norm": 0.2750682234764099,
+      "learning_rate": 4.2372769686049863e-10,
+      "loss": 0.4653807282447815,
+      "step": 16272
+    },
+    {
+      "epoch": 19.96687116564417,
+      "grad_norm": 0.23543284833431244,
+      "learning_rate": 3.9500901063416194e-10,
+      "loss": 0.5624647736549377,
+      "step": 16273
+    },
+    {
+      "epoch": 19.968098159509204,
+      "grad_norm": 0.28273266553878784,
+      "learning_rate": 3.672979871804927e-10,
+      "loss": 0.6415307521820068,
+      "step": 16274
+    },
+    {
+      "epoch": 19.969325153374232,
+      "grad_norm": 0.26589664816856384,
+      "learning_rate": 3.4059462762359164e-10,
+      "loss": 0.629359781742096,
+      "step": 16275
+    },
+    {
+      "epoch": 19.970552147239264,
+      "grad_norm": 0.30133673548698425,
+      "learning_rate": 3.1489893303204843e-10,
+      "loss": 0.6743714213371277,
+      "step": 16276
+    },
+    {
+      "epoch": 19.971779141104296,
+      "grad_norm": 0.24149096012115479,
+      "learning_rate": 2.902109044466972e-10,
+      "loss": 0.5655030012130737,
+      "step": 16277
+    },
+    {
+      "epoch": 19.973006134969324,
+      "grad_norm": 0.2459070384502411,
+      "learning_rate": 2.665305428639631e-10,
+      "loss": 0.5063390731811523,
+      "step": 16278
+    },
+    {
+      "epoch": 19.974233128834356,
+      "grad_norm": 0.27514341473579407,
+      "learning_rate": 2.4385784923308674e-10,
+      "loss": 0.6695590615272522,
+      "step": 16279
+    },
+    {
+      "epoch": 19.975460122699385,
+      "grad_norm": 0.29874908924102783,
+      "learning_rate": 2.2219282447277778e-10,
+      "loss": 0.34940165281295776,
+      "step": 16280
+    },
+    {
+      "epoch": 19.976687116564417,
+      "grad_norm": 0.27747642993927,
+      "learning_rate": 2.015354694517857e-10,
+      "loss": 0.6391558647155762,
+      "step": 16281
+    },
+    {
+      "epoch": 19.97791411042945,
+      "grad_norm": 0.2599896788597107,
+      "learning_rate": 1.8188578500832888e-10,
+      "loss": 0.5198867321014404,
+      "step": 16282
+    },
+    {
+      "epoch": 19.979141104294477,
+      "grad_norm": 0.291477769613266,
+      "learning_rate": 1.6324377193066565e-10,
+      "loss": 0.6670821905136108,
+      "step": 16283
+    },
+    {
+      "epoch": 19.98036809815951,
+      "grad_norm": 0.2764919102191925,
+      "learning_rate": 1.4560943096819658e-10,
+      "loss": 0.5779229402542114,
+      "step": 16284
+    },
+    {
+      "epoch": 19.98159509202454,
+      "grad_norm": 0.2672038972377777,
+      "learning_rate": 1.289827628370155e-10,
+      "loss": 0.5859079957008362,
+      "step": 16285
+    },
+    {
+      "epoch": 19.98282208588957,
+      "grad_norm": 0.25981295108795166,
+      "learning_rate": 1.1336376820325623e-10,
+      "loss": 0.6229197978973389,
+      "step": 16286
+    },
+    {
+      "epoch": 19.9840490797546,
+      "grad_norm": 0.2849527895450592,
+      "learning_rate": 9.87524476997459e-11,
+      "loss": 0.7990140914916992,
+      "step": 16287
+    },
+    {
+      "epoch": 19.98527607361963,
+      "grad_norm": 0.27847468852996826,
+      "learning_rate": 8.514880191212715e-11,
+      "loss": 0.5412949323654175,
+      "step": 16288
+    },
+    {
+      "epoch": 19.986503067484662,
+      "grad_norm": 0.23352737724781036,
+      "learning_rate": 7.255283138996039e-11,
+      "loss": 0.3678314983844757,
+      "step": 16289
+    },
+    {
+      "epoch": 19.987730061349694,
+      "grad_norm": 0.2763413190841675,
+      "learning_rate": 6.096453664117263e-11,
+      "loss": 0.6856487989425659,
+      "step": 16290
+    },
+    {
+      "epoch": 19.988957055214723,
+      "grad_norm": 0.25349316000938416,
+      "learning_rate": 5.038391813483312e-11,
+      "loss": 0.5396376848220825,
+      "step": 16291
+    },
+    {
+      "epoch": 19.990184049079755,
+      "grad_norm": 0.25586146116256714,
+      "learning_rate": 4.0810976292826597e-11,
+      "loss": 0.6023741960525513,
+      "step": 16292
+    },
+    {
+      "epoch": 19.991411042944787,
+      "grad_norm": 0.8770232200622559,
+      "learning_rate": 3.224571150373112e-11,
+      "loss": 0.4764806628227234,
+      "step": 16293
+    },
+    {
+      "epoch": 19.992638036809815,
+      "grad_norm": 0.3072080910205841,
+      "learning_rate": 2.4688124114491397e-11,
+      "loss": 0.6098132133483887,
+      "step": 16294
+    },
+    {
+      "epoch": 19.993865030674847,
+      "grad_norm": 0.2564147114753723,
+      "learning_rate": 1.8138214427643185e-11,
+      "loss": 0.5958019495010376,
+      "step": 16295
+    },
+    {
+      "epoch": 19.99509202453988,
+      "grad_norm": 0.25169652700424194,
+      "learning_rate": 1.2595982706864461e-11,
+      "loss": 0.42747119069099426,
+      "step": 16296
+    },
+    {
+      "epoch": 19.996319018404908,
+      "grad_norm": 0.25083857774734497,
+      "learning_rate": 8.061429176975388e-12,
+      "loss": 0.5245164632797241,
+      "step": 16297
+    },
+    {
+      "epoch": 19.99754601226994,
+      "grad_norm": 0.24573828279972076,
+      "learning_rate": 4.5345540183872046e-12,
+      "loss": 0.4817044734954834,
+      "step": 16298
+    },
+    {
+      "epoch": 19.998773006134968,
+      "grad_norm": 0.2476826310157776,
+      "learning_rate": 2.0153573754289058e-12,
+      "loss": 0.5343716144561768,
+      "step": 16299
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.27893704175949097,
+      "learning_rate": 5.038393480205628e-13,
+      "loss": 0.3585406243801117,
+      "step": 16300
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 4.559192932598415e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null