Training in progress, step 400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b915467464f8b803b33ca073fb0597dc5f8e94a3a3d27e6062c9ed0c6919b583
 size 859942080

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee5b1434a373cdcc5dad1a7067047ecfac41dd752c5091ac03443cfcd45e757f
 size 859942080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d109f981c5c1df0d534dcc8ba8b77ece29c425af0989248f8edec0257953d1e
 size 90187222

 version https://git-lfs.github.com/spec/v1
+oid sha256:b25555b38342e1ba69cdf11c523aea0de6238361604dd97d0958ab7cd0805b0d
 size 90187222

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d25e1211c9046b57a33acc5132889e25842980cf549f2f259dad9359efcc4211
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bede8cf68b594f047239405eb697b986cf2eee2f6054fce463ec7f666ac8d8a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f23c15d28ca8e830a4a7dce196129a0e658718ff1073df92eaf612f2c3f31ab6
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:92be4570eef35209dec51037fce376384799ea8aa047398b6b8d22e11ae0faf5
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.802311658859253,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
-  "epoch": 0.0021845031347619984,
   "eval_steps": 100,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2139,6 +2139,714 @@
       "eval_samples_per_second": 5.711,
       "eval_steps_per_second": 1.904,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2153,7 +2861,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -2162,12 +2870,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.98915152412672e+16,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.802311658859253,
   "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 0.0029126708463493313,
   "eval_steps": 100,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 5.711,
       "eval_steps_per_second": 1.904,
       "step": 300
+    },
+    {
+      "epoch": 0.002191784811877872,
+      "grad_norm": 8.730939865112305,
+      "learning_rate": 0.0001999997766688466,
+      "loss": 1.4397,
+      "step": 301
+    },
+    {
+      "epoch": 0.002199066488993745,
+      "grad_norm": 11.153186798095703,
+      "learning_rate": 0.0001999997766688466,
+      "loss": 1.4227,
+      "step": 302
+    },
+    {
+      "epoch": 0.002206348166109618,
+      "grad_norm": 17.12929344177246,
+      "learning_rate": 0.0001999997766688466,
+      "loss": 2.2076,
+      "step": 303
+    },
+    {
+      "epoch": 0.0022136298432254915,
+      "grad_norm": 25.37128257751465,
+      "learning_rate": 0.0001999997766688466,
+      "loss": 1.7579,
+      "step": 304
+    },
+    {
+      "epoch": 0.002220911520341365,
+      "grad_norm": 12.106339454650879,
+      "learning_rate": 0.0001999997766688466,
+      "loss": 1.8253,
+      "step": 305
+    },
+    {
+      "epoch": 0.0022281931974572383,
+      "grad_norm": 27.969057083129883,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.8772,
+      "step": 306
+    },
+    {
+      "epoch": 0.0022354748745731117,
+      "grad_norm": 22.067020416259766,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 2.8781,
+      "step": 307
+    },
+    {
+      "epoch": 0.002242756551688985,
+      "grad_norm": 9.046490669250488,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.531,
+      "step": 308
+    },
+    {
+      "epoch": 0.0022500382288048585,
+      "grad_norm": 7.611325740814209,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.7938,
+      "step": 309
+    },
+    {
+      "epoch": 0.0022573199059207315,
+      "grad_norm": 14.951436996459961,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.4902,
+      "step": 310
+    },
+    {
+      "epoch": 0.002264601583036605,
+      "grad_norm": 15.602734565734863,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.874,
+      "step": 311
+    },
+    {
+      "epoch": 0.0022718832601524783,
+      "grad_norm": 8.054279327392578,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.3136,
+      "step": 312
+    },
+    {
+      "epoch": 0.0022791649372683517,
+      "grad_norm": 7.248758316040039,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 2.018,
+      "step": 313
+    },
+    {
+      "epoch": 0.002286446614384225,
+      "grad_norm": 13.146790504455566,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 2.1679,
+      "step": 314
+    },
+    {
+      "epoch": 0.0022937282915000985,
+      "grad_norm": 13.931244850158691,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 2.2164,
+      "step": 315
+    },
+    {
+      "epoch": 0.002301009968615972,
+      "grad_norm": 15.501871109008789,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 2.2128,
+      "step": 316
+    },
+    {
+      "epoch": 0.002308291645731845,
+      "grad_norm": 11.91884708404541,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.908,
+      "step": 317
+    },
+    {
+      "epoch": 0.002315573322847718,
+      "grad_norm": 8.88620662689209,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.7827,
+      "step": 318
+    },
+    {
+      "epoch": 0.0023228549999635916,
+      "grad_norm": 8.506556510925293,
+      "learning_rate": 0.00019999976211693138,
+      "loss": 1.6667,
+      "step": 319
+    },
+    {
+      "epoch": 0.002330136677079465,
+      "grad_norm": 7.398971080780029,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 1.7337,
+      "step": 320
+    },
+    {
+      "epoch": 0.0023374183541953384,
+      "grad_norm": 14.629541397094727,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 2.8259,
+      "step": 321
+    },
+    {
+      "epoch": 0.0023447000313112118,
+      "grad_norm": 10.868115425109863,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 1.6023,
+      "step": 322
+    },
+    {
+      "epoch": 0.0023519817084270847,
+      "grad_norm": 6.996133327484131,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 1.479,
+      "step": 323
+    },
+    {
+      "epoch": 0.002359263385542958,
+      "grad_norm": 12.882672309875488,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 1.9858,
+      "step": 324
+    },
+    {
+      "epoch": 0.0023665450626588315,
+      "grad_norm": 15.347146987915039,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 2.4452,
+      "step": 325
+    },
+    {
+      "epoch": 0.002373826739774705,
+      "grad_norm": 12.391621589660645,
+      "learning_rate": 0.00019999974756501615,
+      "loss": 1.6006,
+      "step": 326
+    },
+    {
+      "epoch": 0.0023811084168905783,
+      "grad_norm": 9.05659294128418,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 1.5926,
+      "step": 327
+    },
+    {
+      "epoch": 0.0023883900940064517,
+      "grad_norm": 17.410442352294922,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 1.8012,
+      "step": 328
+    },
+    {
+      "epoch": 0.002395671771122325,
+      "grad_norm": 10.399604797363281,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 1.7668,
+      "step": 329
+    },
+    {
+      "epoch": 0.002402953448238198,
+      "grad_norm": 10.19865894317627,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 1.2579,
+      "step": 330
+    },
+    {
+      "epoch": 0.0024102351253540715,
+      "grad_norm": 11.40501594543457,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 2.3442,
+      "step": 331
+    },
+    {
+      "epoch": 0.002417516802469945,
+      "grad_norm": 157.9639892578125,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 2.4881,
+      "step": 332
+    },
+    {
+      "epoch": 0.0024247984795858183,
+      "grad_norm": 38.14295196533203,
+      "learning_rate": 0.00019999973301310092,
+      "loss": 1.6426,
+      "step": 333
+    },
+    {
+      "epoch": 0.0024320801567016916,
+      "grad_norm": 81.4266357421875,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 2.0941,
+      "step": 334
+    },
+    {
+      "epoch": 0.002439361833817565,
+      "grad_norm": 543.3828735351562,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 3.5042,
+      "step": 335
+    },
+    {
+      "epoch": 0.002446643510933438,
+      "grad_norm": 136.7288360595703,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 3.6408,
+      "step": 336
+    },
+    {
+      "epoch": 0.0024539251880493114,
+      "grad_norm": 351.3304443359375,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 4.8935,
+      "step": 337
+    },
+    {
+      "epoch": 0.002461206865165185,
+      "grad_norm": 25.63802146911621,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 1.973,
+      "step": 338
+    },
+    {
+      "epoch": 0.002468488542281058,
+      "grad_norm": 944.34716796875,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 3.7135,
+      "step": 339
+    },
+    {
+      "epoch": 0.0024757702193969316,
+      "grad_norm": 53.539249420166016,
+      "learning_rate": 0.0001999997184611857,
+      "loss": 2.938,
+      "step": 340
+    },
+    {
+      "epoch": 0.002483051896512805,
+      "grad_norm": 30.831520080566406,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 1.9902,
+      "step": 341
+    },
+    {
+      "epoch": 0.0024903335736286784,
+      "grad_norm": 22.046127319335938,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.3988,
+      "step": 342
+    },
+    {
+      "epoch": 0.0024976152507445513,
+      "grad_norm": 10.293771743774414,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.0854,
+      "step": 343
+    },
+    {
+      "epoch": 0.0025048969278604247,
+      "grad_norm": 9.61192512512207,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 1.9325,
+      "step": 344
+    },
+    {
+      "epoch": 0.002512178604976298,
+      "grad_norm": 44.886131286621094,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.0442,
+      "step": 345
+    },
+    {
+      "epoch": 0.0025194602820921715,
+      "grad_norm": 68.03099060058594,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.3183,
+      "step": 346
+    },
+    {
+      "epoch": 0.002526741959208045,
+      "grad_norm": 10.313465118408203,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 1.682,
+      "step": 347
+    },
+    {
+      "epoch": 0.0025340236363239183,
+      "grad_norm": 8.795594215393066,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 1.7499,
+      "step": 348
+    },
+    {
+      "epoch": 0.0025413053134397913,
+      "grad_norm": 15.358119010925293,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.5658,
+      "step": 349
+    },
+    {
+      "epoch": 0.0025485869905556647,
+      "grad_norm": 13.6449556350708,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.0518,
+      "step": 350
+    },
+    {
+      "epoch": 0.002555868667671538,
+      "grad_norm": 9.879755020141602,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 2.0147,
+      "step": 351
+    },
+    {
+      "epoch": 0.0025631503447874114,
+      "grad_norm": 36.2411003112793,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 1.908,
+      "step": 352
+    },
+    {
+      "epoch": 0.002570432021903285,
+      "grad_norm": 10.988778114318848,
+      "learning_rate": 0.00019999970390927047,
+      "loss": 1.9842,
+      "step": 353
+    },
+    {
+      "epoch": 0.0025777136990191582,
+      "grad_norm": 46.944091796875,
+      "learning_rate": 0.00019999968935735524,
+      "loss": 1.9198,
+      "step": 354
+    },
+    {
+      "epoch": 0.0025849953761350316,
+      "grad_norm": 136.6161651611328,
+      "learning_rate": 0.00019999968935735524,
+      "loss": 6.4679,
+      "step": 355
+    },
+    {
+      "epoch": 0.0025922770532509046,
+      "grad_norm": 10.581056594848633,
+      "learning_rate": 0.00019999968935735524,
+      "loss": 1.8126,
+      "step": 356
+    },
+    {
+      "epoch": 0.002599558730366778,
+      "grad_norm": 13.883180618286133,
+      "learning_rate": 0.00019999968935735524,
+      "loss": 1.4649,
+      "step": 357
+    },
+    {
+      "epoch": 0.0026068404074826514,
+      "grad_norm": 10.422268867492676,
+      "learning_rate": 0.00019999968935735524,
+      "loss": 2.4318,
+      "step": 358
+    },
+    {
+      "epoch": 0.0026141220845985248,
+      "grad_norm": 7.1867194175720215,
+      "learning_rate": 0.00019999968935735524,
+      "loss": 1.2992,
+      "step": 359
+    },
+    {
+      "epoch": 0.002621403761714398,
+      "grad_norm": 10.631929397583008,
+      "learning_rate": 0.00019999967480544,
+      "loss": 1.3248,
+      "step": 360
+    },
+    {
+      "epoch": 0.0026286854388302716,
+      "grad_norm": 5.5126471519470215,
+      "learning_rate": 0.00019999967480544,
+      "loss": 1.244,
+      "step": 361
+    },
+    {
+      "epoch": 0.0026359671159461445,
+      "grad_norm": 13.727178573608398,
+      "learning_rate": 0.00019999967480544,
+      "loss": 1.7372,
+      "step": 362
+    },
+    {
+      "epoch": 0.002643248793062018,
+      "grad_norm": 11.092156410217285,
+      "learning_rate": 0.00019999967480544,
+      "loss": 1.9153,
+      "step": 363
+    },
+    {
+      "epoch": 0.0026505304701778913,
+      "grad_norm": 9.164811134338379,
+      "learning_rate": 0.00019999967480544,
+      "loss": 2.42,
+      "step": 364
+    },
+    {
+      "epoch": 0.0026578121472937647,
+      "grad_norm": 9.590070724487305,
+      "learning_rate": 0.00019999967480544,
+      "loss": 2.405,
+      "step": 365
+    },
+    {
+      "epoch": 0.002665093824409638,
+      "grad_norm": 15.919793128967285,
+      "learning_rate": 0.00019999966025352478,
+      "loss": 2.6192,
+      "step": 366
+    },
+    {
+      "epoch": 0.0026723755015255115,
+      "grad_norm": 13.85263442993164,
+      "learning_rate": 0.00019999966025352478,
+      "loss": 2.0706,
+      "step": 367
+    },
+    {
+      "epoch": 0.002679657178641385,
+      "grad_norm": 8.932114601135254,
+      "learning_rate": 0.00019999966025352478,
+      "loss": 1.2626,
+      "step": 368
+    },
+    {
+      "epoch": 0.002686938855757258,
+      "grad_norm": 13.381108283996582,
+      "learning_rate": 0.00019999966025352478,
+      "loss": 2.2716,
+      "step": 369
+    },
+    {
+      "epoch": 0.0026942205328731312,
+      "grad_norm": 10.97152042388916,
+      "learning_rate": 0.00019999966025352478,
+      "loss": 2.5749,
+      "step": 370
+    },
+    {
+      "epoch": 0.0027015022099890046,
+      "grad_norm": 11.363154411315918,
+      "learning_rate": 0.00019999966025352478,
+      "loss": 1.6513,
+      "step": 371
+    },
+    {
+      "epoch": 0.002708783887104878,
+      "grad_norm": 10.766716003417969,
+      "learning_rate": 0.00019999964570160955,
+      "loss": 1.9178,
+      "step": 372
+    },
+    {
+      "epoch": 0.0027160655642207514,
+      "grad_norm": 13.253247261047363,
+      "learning_rate": 0.00019999964570160955,
+      "loss": 1.9283,
+      "step": 373
+    },
+    {
+      "epoch": 0.002723347241336625,
+      "grad_norm": 10.64928150177002,
+      "learning_rate": 0.00019999964570160955,
+      "loss": 1.8352,
+      "step": 374
+    },
+    {
+      "epoch": 0.002730628918452498,
+      "grad_norm": 9.825387954711914,
+      "learning_rate": 0.00019999964570160955,
+      "loss": 1.9724,
+      "step": 375
+    },
+    {
+      "epoch": 0.002737910595568371,
+      "grad_norm": 9.247831344604492,
+      "learning_rate": 0.00019999964570160955,
+      "loss": 1.3778,
+      "step": 376
+    },
+    {
+      "epoch": 0.0027451922726842446,
+      "grad_norm": 32.254974365234375,
+      "learning_rate": 0.00019999964570160955,
+      "loss": 1.9512,
+      "step": 377
+    },
+    {
+      "epoch": 0.002752473949800118,
+      "grad_norm": 10.275064468383789,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.8973,
+      "step": 378
+    },
+    {
+      "epoch": 0.0027597556269159914,
+      "grad_norm": 10.921187400817871,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.935,
+      "step": 379
+    },
+    {
+      "epoch": 0.0027670373040318648,
+      "grad_norm": 8.636027336120605,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.7587,
+      "step": 380
+    },
+    {
+      "epoch": 0.002774318981147738,
+      "grad_norm": 12.754554748535156,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.3819,
+      "step": 381
+    },
+    {
+      "epoch": 0.002781600658263611,
+      "grad_norm": 12.138449668884277,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.4523,
+      "step": 382
+    },
+    {
+      "epoch": 0.0027888823353794845,
+      "grad_norm": 10.390461921691895,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.6211,
+      "step": 383
+    },
+    {
+      "epoch": 0.002796164012495358,
+      "grad_norm": 16.587440490722656,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 2.1905,
+      "step": 384
+    },
+    {
+      "epoch": 0.0028034456896112313,
+      "grad_norm": 15.672696113586426,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.5341,
+      "step": 385
+    },
+    {
+      "epoch": 0.0028107273667271047,
+      "grad_norm": 12.346440315246582,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 2.43,
+      "step": 386
+    },
+    {
+      "epoch": 0.002818009043842978,
+      "grad_norm": 8.437786102294922,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.1114,
+      "step": 387
+    },
+    {
+      "epoch": 0.002825290720958851,
+      "grad_norm": 12.213990211486816,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.9927,
+      "step": 388
+    },
+    {
+      "epoch": 0.0028325723980747244,
+      "grad_norm": 8.3518648147583,
+      "learning_rate": 0.00019999963114969432,
+      "loss": 1.3671,
+      "step": 389
+    },
+    {
+      "epoch": 0.002839854075190598,
+      "grad_norm": 17.662803649902344,
+      "learning_rate": 0.0001999996165977791,
+      "loss": 2.1279,
+      "step": 390
+    },
+    {
+      "epoch": 0.0028471357523064712,
+      "grad_norm": 8.939593315124512,
+      "learning_rate": 0.0001999996165977791,
+      "loss": 1.543,
+      "step": 391
+    },
+    {
+      "epoch": 0.0028544174294223446,
+      "grad_norm": 11.475914001464844,
+      "learning_rate": 0.0001999996165977791,
+      "loss": 1.6749,
+      "step": 392
+    },
+    {
+      "epoch": 0.002861699106538218,
+      "grad_norm": 9.165367126464844,
+      "learning_rate": 0.0001999996165977791,
+      "loss": 1.7026,
+      "step": 393
+    },
+    {
+      "epoch": 0.0028689807836540914,
+      "grad_norm": 16.476228713989258,
+      "learning_rate": 0.0001999996165977791,
+      "loss": 2.8899,
+      "step": 394
+    },
+    {
+      "epoch": 0.0028762624607699644,
+      "grad_norm": 12.431415557861328,
+      "learning_rate": 0.0001999996165977791,
+      "loss": 2.3806,
+      "step": 395
+    },
+    {
+      "epoch": 0.0028835441378858378,
+      "grad_norm": 14.651118278503418,
+      "learning_rate": 0.00019999960204586387,
+      "loss": 2.7375,
+      "step": 396
+    },
+    {
+      "epoch": 0.002890825815001711,
+      "grad_norm": 11.111295700073242,
+      "learning_rate": 0.00019999960204586387,
+      "loss": 2.4693,
+      "step": 397
+    },
+    {
+      "epoch": 0.0028981074921175846,
+      "grad_norm": 7.806118011474609,
+      "learning_rate": 0.00019999960204586387,
+      "loss": 1.7636,
+      "step": 398
+    },
+    {
+      "epoch": 0.002905389169233458,
+      "grad_norm": 16.599353790283203,
+      "learning_rate": 0.00019999960204586387,
+      "loss": 2.8746,
+      "step": 399
+    },
+    {
+      "epoch": 0.0029126708463493313,
+      "grad_norm": 10.182330131530762,
+      "learning_rate": 0.00019999960204586387,
+      "loss": 1.6883,
+      "step": 400
+    },
+    {
+      "epoch": 0.0029126708463493313,
+      "eval_loss": 1.8433881998062134,
+      "eval_runtime": 36.2821,
+      "eval_samples_per_second": 5.705,
+      "eval_steps_per_second": 1.902,
+      "step": 400
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.65220203216896e+16,
   "train_batch_size": 3,
   "trial_name": null,
   "trial_params": null