Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f62a3669267cf71d82d7613940fda8316817761a312976ab3eaf115e310acc5
 size 671149168

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef365db25022057197b4b51b720031484568324974a69eac1519e50b0f27180e
 size 671149168

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:028cc8a1a5f704ba3fd8b0a0c40fff116ce068a6aeb95280523010390d01a4f7
 size 179316182

 version https://git-lfs.github.com/spec/v1
+oid sha256:c797a7af223641265261c12a7109ff441b5a98196dd5655fd2f5252583938e32
 size 179316182

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71352a8a29de772e57a41bb86fae428e2d7704c4a8210b01b9dc37d6bce4b251
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:566bad833abc8e24d98072c362038aaf0a56ffba2d3a483eddb4f69725e48c7a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b438af450d5f669b412dd3e9981bf7d3209f28ad248f243baf4956b744ebafc5
 size 2080

 version https://git-lfs.github.com/spec/v1
+oid sha256:51377c3c97ab55526aa4f5f8b0c3786821eafd572b923cffeba3b976830c2d0f
 size 2080

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.7760652303695679,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
-  "epoch": 0.03883570554768054,
   "eval_steps": 100,
-  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3555,6 +3555,714 @@
       "eval_samples_per_second": 9.359,
       "eval_steps_per_second": 9.359,
       "step": 500
     }
   ],
   "logging_steps": 1,
@@ -3569,7 +4277,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
@@ -3578,12 +4286,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.450601865596109e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.7760652303695679,
   "best_model_checkpoint": "miner_id_24/checkpoint-200",
+  "epoch": 0.046602846657216646,
   "eval_steps": 100,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.359,
       "eval_steps_per_second": 9.359,
       "step": 500
+    },
+    {
+      "epoch": 0.0389133769587759,
+      "grad_norm": 1.7078206539154053,
+      "learning_rate": 0.00019933712610509247,
+      "loss": 1.8207,
+      "step": 501
+    },
+    {
+      "epoch": 0.03899104836987126,
+      "grad_norm": 1.6380209922790527,
+      "learning_rate": 0.00019933430303353816,
+      "loss": 1.7694,
+      "step": 502
+    },
+    {
+      "epoch": 0.03906871978096662,
+      "grad_norm": 1.7188258171081543,
+      "learning_rate": 0.00019933147996198386,
+      "loss": 1.7103,
+      "step": 503
+    },
+    {
+      "epoch": 0.03914639119206198,
+      "grad_norm": 1.780552625656128,
+      "learning_rate": 0.00019932865689042956,
+      "loss": 2.0916,
+      "step": 504
+    },
+    {
+      "epoch": 0.03922406260315734,
+      "grad_norm": 1.7240533828735352,
+      "learning_rate": 0.00019932583381887525,
+      "loss": 1.7221,
+      "step": 505
+    },
+    {
+      "epoch": 0.0393017340142527,
+      "grad_norm": 1.8025785684585571,
+      "learning_rate": 0.00019932299619540572,
+      "loss": 1.7756,
+      "step": 506
+    },
+    {
+      "epoch": 0.03937940542534806,
+      "grad_norm": 1.6106324195861816,
+      "learning_rate": 0.00019932014402002096,
+      "loss": 1.7496,
+      "step": 507
+    },
+    {
+      "epoch": 0.03945707683644343,
+      "grad_norm": 1.6666816473007202,
+      "learning_rate": 0.0001993172918446362,
+      "loss": 1.658,
+      "step": 508
+    },
+    {
+      "epoch": 0.03953474824753879,
+      "grad_norm": 1.9957692623138428,
+      "learning_rate": 0.00019931443966925144,
+      "loss": 1.8459,
+      "step": 509
+    },
+    {
+      "epoch": 0.03961241965863415,
+      "grad_norm": 1.49708890914917,
+      "learning_rate": 0.00019931157294195145,
+      "loss": 1.4847,
+      "step": 510
+    },
+    {
+      "epoch": 0.03969009106972951,
+      "grad_norm": 1.823359489440918,
+      "learning_rate": 0.00019930870621465147,
+      "loss": 1.7518,
+      "step": 511
+    },
+    {
+      "epoch": 0.03976776248082487,
+      "grad_norm": 1.6615630388259888,
+      "learning_rate": 0.00019930583948735148,
+      "loss": 1.6027,
+      "step": 512
+    },
+    {
+      "epoch": 0.03984543389192023,
+      "grad_norm": 2.001208782196045,
+      "learning_rate": 0.00019930295820813626,
+      "loss": 1.8741,
+      "step": 513
+    },
+    {
+      "epoch": 0.03992310530301559,
+      "grad_norm": 1.87227463722229,
+      "learning_rate": 0.00019930006237700582,
+      "loss": 1.9748,
+      "step": 514
+    },
+    {
+      "epoch": 0.04000077671411095,
+      "grad_norm": 1.8258823156356812,
+      "learning_rate": 0.00019929716654587537,
+      "loss": 1.8829,
+      "step": 515
+    },
+    {
+      "epoch": 0.04007844812520631,
+      "grad_norm": 2.002673387527466,
+      "learning_rate": 0.00019929427071474493,
+      "loss": 1.8528,
+      "step": 516
+    },
+    {
+      "epoch": 0.040156119536301674,
+      "grad_norm": 1.7360410690307617,
+      "learning_rate": 0.00019929137488361448,
+      "loss": 1.8319,
+      "step": 517
+    },
+    {
+      "epoch": 0.040233790947397034,
+      "grad_norm": 1.4678514003753662,
+      "learning_rate": 0.0001992884645005688,
+      "loss": 1.7465,
+      "step": 518
+    },
+    {
+      "epoch": 0.040311462358492395,
+      "grad_norm": 1.7761526107788086,
+      "learning_rate": 0.00019928555411752313,
+      "loss": 2.01,
+      "step": 519
+    },
+    {
+      "epoch": 0.04038913376958776,
+      "grad_norm": 1.6452142000198364,
+      "learning_rate": 0.00019928262918256223,
+      "loss": 1.7229,
+      "step": 520
+    },
+    {
+      "epoch": 0.04046680518068312,
+      "grad_norm": 1.6721060276031494,
+      "learning_rate": 0.0001992796896956861,
+      "loss": 1.8213,
+      "step": 521
+    },
+    {
+      "epoch": 0.04054447659177848,
+      "grad_norm": 1.492349624633789,
+      "learning_rate": 0.0001992767647607252,
+      "loss": 1.7843,
+      "step": 522
+    },
+    {
+      "epoch": 0.040622148002873844,
+      "grad_norm": 1.6278711557388306,
+      "learning_rate": 0.00019927382527384907,
+      "loss": 1.4859,
+      "step": 523
+    },
+    {
+      "epoch": 0.040699819413969204,
+      "grad_norm": 1.5856826305389404,
+      "learning_rate": 0.00019927088578697294,
+      "loss": 1.7912,
+      "step": 524
+    },
+    {
+      "epoch": 0.040777490825064565,
+      "grad_norm": 1.6515257358551025,
+      "learning_rate": 0.00019926793174818158,
+      "loss": 1.7693,
+      "step": 525
+    },
+    {
+      "epoch": 0.040855162236159925,
+      "grad_norm": 1.8710856437683105,
+      "learning_rate": 0.00019926497770939022,
+      "loss": 1.8219,
+      "step": 526
+    },
+    {
+      "epoch": 0.040932833647255286,
+      "grad_norm": 1.731695532798767,
+      "learning_rate": 0.00019926200911868364,
+      "loss": 1.7707,
+      "step": 527
+    },
+    {
+      "epoch": 0.041010505058350646,
+      "grad_norm": 1.7034029960632324,
+      "learning_rate": 0.00019925904052797705,
+      "loss": 1.5997,
+      "step": 528
+    },
+    {
+      "epoch": 0.04108817646944601,
+      "grad_norm": 1.6124377250671387,
+      "learning_rate": 0.00019925607193727046,
+      "loss": 1.657,
+      "step": 529
+    },
+    {
+      "epoch": 0.04116584788054137,
+      "grad_norm": 1.540908694267273,
+      "learning_rate": 0.00019925308879464865,
+      "loss": 1.7859,
+      "step": 530
+    },
+    {
+      "epoch": 0.04124351929163673,
+      "grad_norm": 1.5464683771133423,
+      "learning_rate": 0.0001992500911001116,
+      "loss": 1.7216,
+      "step": 531
+    },
+    {
+      "epoch": 0.04132119070273209,
+      "grad_norm": 1.6259061098098755,
+      "learning_rate": 0.0001992471079574898,
+      "loss": 1.7018,
+      "step": 532
+    },
+    {
+      "epoch": 0.041398862113827456,
+      "grad_norm": 1.667738914489746,
+      "learning_rate": 0.00019924411026295274,
+      "loss": 1.5638,
+      "step": 533
+    },
+    {
+      "epoch": 0.041476533524922816,
+      "grad_norm": 1.5789062976837158,
+      "learning_rate": 0.00019924109801650047,
+      "loss": 1.7942,
+      "step": 534
+    },
+    {
+      "epoch": 0.04155420493601818,
+      "grad_norm": 1.6071553230285645,
+      "learning_rate": 0.0001992380857700482,
+      "loss": 1.9271,
+      "step": 535
+    },
+    {
+      "epoch": 0.04163187634711354,
+      "grad_norm": 1.4592769145965576,
+      "learning_rate": 0.00019923507352359593,
+      "loss": 1.7467,
+      "step": 536
+    },
+    {
+      "epoch": 0.0417095477582089,
+      "grad_norm": 1.689941644668579,
+      "learning_rate": 0.00019923204672522843,
+      "loss": 1.936,
+      "step": 537
+    },
+    {
+      "epoch": 0.04178721916930426,
+      "grad_norm": 1.7450604438781738,
+      "learning_rate": 0.00019922901992686093,
+      "loss": 2.0821,
+      "step": 538
+    },
+    {
+      "epoch": 0.04186489058039962,
+      "grad_norm": 1.4920010566711426,
+      "learning_rate": 0.00019922599312849343,
+      "loss": 1.7365,
+      "step": 539
+    },
+    {
+      "epoch": 0.04194256199149498,
+      "grad_norm": 1.7665345668792725,
+      "learning_rate": 0.0001992229517782107,
+      "loss": 1.8057,
+      "step": 540
+    },
+    {
+      "epoch": 0.04202023340259034,
+      "grad_norm": 1.5312821865081787,
+      "learning_rate": 0.00019921989587601274,
+      "loss": 1.8117,
+      "step": 541
+    },
+    {
+      "epoch": 0.0420979048136857,
+      "grad_norm": 1.6132386922836304,
+      "learning_rate": 0.00019921685452573001,
+      "loss": 1.7944,
+      "step": 542
+    },
+    {
+      "epoch": 0.04217557622478106,
+      "grad_norm": 1.7471600770950317,
+      "learning_rate": 0.00019921379862353206,
+      "loss": 1.751,
+      "step": 543
+    },
+    {
+      "epoch": 0.04225324763587642,
+      "grad_norm": 1.647037148475647,
+      "learning_rate": 0.00019921072816941887,
+      "loss": 1.9872,
+      "step": 544
+    },
+    {
+      "epoch": 0.04233091904697179,
+      "grad_norm": 1.8728786706924438,
+      "learning_rate": 0.00019920765771530569,
+      "loss": 1.7143,
+      "step": 545
+    },
+    {
+      "epoch": 0.04240859045806715,
+      "grad_norm": 1.4720885753631592,
+      "learning_rate": 0.0001992045872611925,
+      "loss": 1.4329,
+      "step": 546
+    },
+    {
+      "epoch": 0.04248626186916251,
+      "grad_norm": 1.9787479639053345,
+      "learning_rate": 0.0001992015022551641,
+      "loss": 1.8716,
+      "step": 547
+    },
+    {
+      "epoch": 0.04256393328025787,
+      "grad_norm": 1.5727592706680298,
+      "learning_rate": 0.00019919840269722044,
+      "loss": 1.7307,
+      "step": 548
+    },
+    {
+      "epoch": 0.04264160469135323,
+      "grad_norm": 1.695572853088379,
+      "learning_rate": 0.00019919531769119203,
+      "loss": 1.8648,
+      "step": 549
+    },
+    {
+      "epoch": 0.04271927610244859,
+      "grad_norm": 1.5870908498764038,
+      "learning_rate": 0.0001991922181332484,
+      "loss": 1.8435,
+      "step": 550
+    },
+    {
+      "epoch": 0.04279694751354395,
+      "grad_norm": 1.7436878681182861,
+      "learning_rate": 0.00019918910402338952,
+      "loss": 1.9561,
+      "step": 551
+    },
+    {
+      "epoch": 0.04287461892463931,
+      "grad_norm": 1.8657782077789307,
+      "learning_rate": 0.00019918600446544588,
+      "loss": 1.9407,
+      "step": 552
+    },
+    {
+      "epoch": 0.04295229033573467,
+      "grad_norm": 1.8304105997085571,
+      "learning_rate": 0.000199182890355587,
+      "loss": 1.7994,
+      "step": 553
+    },
+    {
+      "epoch": 0.043029961746830034,
+      "grad_norm": 1.7628461122512817,
+      "learning_rate": 0.0001991797616938129,
+      "loss": 1.8482,
+      "step": 554
+    },
+    {
+      "epoch": 0.043107633157925394,
+      "grad_norm": 1.6865324974060059,
+      "learning_rate": 0.0001991766330320388,
+      "loss": 1.8126,
+      "step": 555
+    },
+    {
+      "epoch": 0.043185304569020755,
+      "grad_norm": 1.8172606229782104,
+      "learning_rate": 0.00019917348981834948,
+      "loss": 1.7682,
+      "step": 556
+    },
+    {
+      "epoch": 0.043262975980116115,
+      "grad_norm": 1.6614211797714233,
+      "learning_rate": 0.00019917036115657538,
+      "loss": 1.8664,
+      "step": 557
+    },
+    {
+      "epoch": 0.04334064739121148,
+      "grad_norm": 1.6559243202209473,
+      "learning_rate": 0.00019916721794288605,
+      "loss": 1.8261,
+      "step": 558
+    },
+    {
+      "epoch": 0.04341831880230684,
+      "grad_norm": 1.5907464027404785,
+      "learning_rate": 0.00019916404562536627,
+      "loss": 1.6371,
+      "step": 559
+    },
+    {
+      "epoch": 0.043495990213402204,
+      "grad_norm": 1.774848461151123,
+      "learning_rate": 0.00019916088785976171,
+      "loss": 1.8444,
+      "step": 560
+    },
+    {
+      "epoch": 0.043573661624497564,
+      "grad_norm": 1.6430023908615112,
+      "learning_rate": 0.00019915773009415716,
+      "loss": 1.8364,
+      "step": 561
+    },
+    {
+      "epoch": 0.043651333035592925,
+      "grad_norm": 1.5221737623214722,
+      "learning_rate": 0.00019915455777663738,
+      "loss": 1.8028,
+      "step": 562
+    },
+    {
+      "epoch": 0.043729004446688285,
+      "grad_norm": 1.879007339477539,
+      "learning_rate": 0.0001991513854591176,
+      "loss": 1.892,
+      "step": 563
+    },
+    {
+      "epoch": 0.043806675857783646,
+      "grad_norm": 1.5331951379776,
+      "learning_rate": 0.00019914819858968258,
+      "loss": 1.7295,
+      "step": 564
+    },
+    {
+      "epoch": 0.043884347268879006,
+      "grad_norm": 1.6735893487930298,
+      "learning_rate": 0.00019914501172024757,
+      "loss": 1.7931,
+      "step": 565
+    },
+    {
+      "epoch": 0.04396201867997437,
+      "grad_norm": 1.6903493404388428,
+      "learning_rate": 0.00019914182485081255,
+      "loss": 1.6104,
+      "step": 566
+    },
+    {
+      "epoch": 0.04403969009106973,
+      "grad_norm": 1.9910458326339722,
+      "learning_rate": 0.00019913860887754709,
+      "loss": 1.8777,
+      "step": 567
+    },
+    {
+      "epoch": 0.04411736150216509,
+      "grad_norm": 1.6240683794021606,
+      "learning_rate": 0.00019913540745619684,
+      "loss": 1.7052,
+      "step": 568
+    },
+    {
+      "epoch": 0.04419503291326045,
+      "grad_norm": 1.6185333728790283,
+      "learning_rate": 0.0001991322060348466,
+      "loss": 1.6544,
+      "step": 569
+    },
+    {
+      "epoch": 0.044272704324355816,
+      "grad_norm": 1.7077709436416626,
+      "learning_rate": 0.0001991289755096659,
+      "loss": 1.5459,
+      "step": 570
+    },
+    {
+      "epoch": 0.04435037573545118,
+      "grad_norm": 1.8203749656677246,
+      "learning_rate": 0.00019912575953640044,
+      "loss": 2.0252,
+      "step": 571
+    },
+    {
+      "epoch": 0.04442804714654654,
+      "grad_norm": 1.8006839752197266,
+      "learning_rate": 0.00019912252901121974,
+      "loss": 2.0242,
+      "step": 572
+    },
+    {
+      "epoch": 0.0445057185576419,
+      "grad_norm": 1.777665615081787,
+      "learning_rate": 0.00019911928393412381,
+      "loss": 1.8088,
+      "step": 573
+    },
+    {
+      "epoch": 0.04458338996873726,
+      "grad_norm": 1.7679201364517212,
+      "learning_rate": 0.00019911605340894312,
+      "loss": 1.849,
+      "step": 574
+    },
+    {
+      "epoch": 0.04466106137983262,
+      "grad_norm": 1.9602786302566528,
+      "learning_rate": 0.0001991128083318472,
+      "loss": 1.6431,
+      "step": 575
+    },
+    {
+      "epoch": 0.04473873279092798,
+      "grad_norm": 1.8283534049987793,
+      "learning_rate": 0.00019910954870283604,
+      "loss": 1.7402,
+      "step": 576
+    },
+    {
+      "epoch": 0.04481640420202334,
+      "grad_norm": 1.6526250839233398,
+      "learning_rate": 0.00019910628907382488,
+      "loss": 1.7082,
+      "step": 577
+    },
+    {
+      "epoch": 0.0448940756131187,
+      "grad_norm": 2.265866756439209,
+      "learning_rate": 0.0001991030148928985,
+      "loss": 1.7652,
+      "step": 578
+    },
+    {
+      "epoch": 0.04497174702421406,
+      "grad_norm": 1.7171560525894165,
+      "learning_rate": 0.00019909975526388735,
+      "loss": 1.8516,
+      "step": 579
+    },
+    {
+      "epoch": 0.04504941843530942,
+      "grad_norm": 1.8056362867355347,
+      "learning_rate": 0.00019909646653104573,
+      "loss": 1.8199,
+      "step": 580
+    },
+    {
+      "epoch": 0.04512708984640478,
+      "grad_norm": 1.7205848693847656,
+      "learning_rate": 0.00019909319235011935,
+      "loss": 1.6558,
+      "step": 581
+    },
+    {
+      "epoch": 0.04520476125750014,
+      "grad_norm": 1.722912311553955,
+      "learning_rate": 0.00019908990361727774,
+      "loss": 1.8511,
+      "step": 582
+    },
+    {
+      "epoch": 0.04528243266859551,
+      "grad_norm": 1.8617427349090576,
+      "learning_rate": 0.0001990866003325209,
+      "loss": 1.7757,
+      "step": 583
+    },
+    {
+      "epoch": 0.04536010407969087,
+      "grad_norm": 1.748336672782898,
+      "learning_rate": 0.00019908329704776406,
+      "loss": 1.7258,
+      "step": 584
+    },
+    {
+      "epoch": 0.04543777549078623,
+      "grad_norm": 1.6378040313720703,
+      "learning_rate": 0.00019907999376300722,
+      "loss": 1.7745,
+      "step": 585
+    },
+    {
+      "epoch": 0.04551544690188159,
+      "grad_norm": 1.869744062423706,
+      "learning_rate": 0.00019907669047825038,
+      "loss": 1.9481,
+      "step": 586
+    },
+    {
+      "epoch": 0.04559311831297695,
+      "grad_norm": 1.8933027982711792,
+      "learning_rate": 0.0001990733580896631,
+      "loss": 2.0266,
+      "step": 587
+    },
+    {
+      "epoch": 0.04567078972407231,
+      "grad_norm": 1.7739107608795166,
+      "learning_rate": 0.0001990700257010758,
+      "loss": 1.974,
+      "step": 588
+    },
+    {
+      "epoch": 0.04574846113516767,
+      "grad_norm": 1.6742160320281982,
+      "learning_rate": 0.00019906670786440372,
+      "loss": 1.7525,
+      "step": 589
+    },
+    {
+      "epoch": 0.04582613254626303,
+      "grad_norm": 1.7374258041381836,
+      "learning_rate": 0.0001990633609239012,
+      "loss": 1.6707,
+      "step": 590
+    },
+    {
+      "epoch": 0.045903803957358394,
+      "grad_norm": 1.5789968967437744,
+      "learning_rate": 0.0001990600285353139,
+      "loss": 1.7975,
+      "step": 591
+    },
+    {
+      "epoch": 0.045981475368453754,
+      "grad_norm": 1.609497308731079,
+      "learning_rate": 0.00019905668159481138,
+      "loss": 1.6833,
+      "step": 592
+    },
+    {
+      "epoch": 0.046059146779549115,
+      "grad_norm": 1.6820900440216064,
+      "learning_rate": 0.00019905332010239363,
+      "loss": 1.7528,
+      "step": 593
+    },
+    {
+      "epoch": 0.046136818190644475,
+      "grad_norm": 1.68113374710083,
+      "learning_rate": 0.00019904995860997587,
+      "loss": 1.8833,
+      "step": 594
+    },
+    {
+      "epoch": 0.04621448960173984,
+      "grad_norm": 1.8364982604980469,
+      "learning_rate": 0.00019904659711755812,
+      "loss": 1.851,
+      "step": 595
+    },
+    {
+      "epoch": 0.0462921610128352,
+      "grad_norm": 1.7463794946670532,
+      "learning_rate": 0.00019904322107322514,
+      "loss": 1.6327,
+      "step": 596
+    },
+    {
+      "epoch": 0.046369832423930564,
+      "grad_norm": 2.0899975299835205,
+      "learning_rate": 0.00019903984502889216,
+      "loss": 1.9714,
+      "step": 597
+    },
+    {
+      "epoch": 0.046447503835025925,
+      "grad_norm": 1.9922312498092651,
+      "learning_rate": 0.00019903646898455918,
+      "loss": 1.8302,
+      "step": 598
+    },
+    {
+      "epoch": 0.046525175246121285,
+      "grad_norm": 1.7716776132583618,
+      "learning_rate": 0.00019903306383639574,
+      "loss": 1.8257,
+      "step": 599
+    },
+    {
+      "epoch": 0.046602846657216646,
+      "grad_norm": 1.7818366289138794,
+      "learning_rate": 0.00019902967324014753,
+      "loss": 1.8584,
+      "step": 600
+    },
+    {
+      "epoch": 0.046602846657216646,
+      "eval_loss": 1.8018074035644531,
+      "eval_runtime": 22.0543,
+      "eval_samples_per_second": 9.386,
+      "eval_steps_per_second": 9.386,
+      "step": 600
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 4
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.1341547207078707e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null