Training in progress, step 600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c3c33e8a4a8ed43dc3420bd1386f3c762dc8aac9f1b9d6abc028e43a8a65191
 size 13587864

 version https://git-lfs.github.com/spec/v1
+oid sha256:4899466220e60a7be496bec2b0702ec77d7b42f7c2abb9c89d5990de66858d42
 size 13587864

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df04fbad8806984f9879dd0372c5fd9dec6a52cd99cb407e7ce66b7b1b2d44db
 size 27273018

 version https://git-lfs.github.com/spec/v1
+oid sha256:04f03abccf9919cb7e6cb74b0991c90780af63ba8366b3ce90419ba802cfed1e
 size 27273018

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7950f08aabef52e7e1944935a980baf99b7b3f4d93fe07a39564c7e2a0b20d9
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3464f03cc2695b7e00d9a022811a7d39834c49f04402f78117f949150b3e4b68
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9a42c6236f225c51c1bc182b25f6928e0a7938d42d43dbe4f5aeb61527d641f0
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c10b97f44aaa8e8a1f8b6b752fbcc49ead2ee866b2143ac6a7831438a80daac
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.004960117861628532,
   "best_model_checkpoint": "miner_id_24/checkpoint-450",
-  "epoch": 1.8051282051282052,
   "eval_steps": 25,
-  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4041,6 +4041,372 @@
       "eval_samples_per_second": 47.487,
       "eval_steps_per_second": 47.487,
       "step": 550
     }
   ],
   "logging_steps": 1,
@@ -4055,7 +4421,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -4064,12 +4430,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 3.3508451401334784e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.004960117861628532,
   "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 1.9692307692307693,
   "eval_steps": 25,
+  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 47.487,
       "eval_steps_per_second": 47.487,
       "step": 550
+    },
+    {
+      "epoch": 1.8084102564102564,
+      "grad_norm": 0.0034276428632438183,
+      "learning_rate": 2.3195015539906243e-05,
+      "loss": 0.0049,
+      "step": 551
+    },
+    {
+      "epoch": 1.8116923076923077,
+      "grad_norm": 0.0030685942620038986,
+      "learning_rate": 2.3066675633983865e-05,
+      "loss": 0.0048,
+      "step": 552
+    },
+    {
+      "epoch": 1.814974358974359,
+      "grad_norm": 0.0046894908882677555,
+      "learning_rate": 2.2940921796353956e-05,
+      "loss": 0.0047,
+      "step": 553
+    },
+    {
+      "epoch": 1.8182564102564103,
+      "grad_norm": 0.0035674276296049356,
+      "learning_rate": 2.2817757767906625e-05,
+      "loss": 0.0048,
+      "step": 554
+    },
+    {
+      "epoch": 1.8215384615384616,
+      "grad_norm": 0.005269620567560196,
+      "learning_rate": 2.2697187212491044e-05,
+      "loss": 0.0051,
+      "step": 555
+    },
+    {
+      "epoch": 1.8248205128205128,
+      "grad_norm": 0.008738451637327671,
+      "learning_rate": 2.2579213716806474e-05,
+      "loss": 0.0052,
+      "step": 556
+    },
+    {
+      "epoch": 1.828102564102564,
+      "grad_norm": 0.008472139947116375,
+      "learning_rate": 2.2463840790295566e-05,
+      "loss": 0.0051,
+      "step": 557
+    },
+    {
+      "epoch": 1.8313846153846154,
+      "grad_norm": 0.008605373091995716,
+      "learning_rate": 2.2351071865039974e-05,
+      "loss": 0.0051,
+      "step": 558
+    },
+    {
+      "epoch": 1.8346666666666667,
+      "grad_norm": 0.02175315096974373,
+      "learning_rate": 2.224091029565824e-05,
+      "loss": 0.0053,
+      "step": 559
+    },
+    {
+      "epoch": 1.837948717948718,
+      "grad_norm": 0.008465359918773174,
+      "learning_rate": 2.2133359359206e-05,
+      "loss": 0.0052,
+      "step": 560
+    },
+    {
+      "epoch": 1.8412307692307692,
+      "grad_norm": 0.007232977543026209,
+      "learning_rate": 2.2028422255078542e-05,
+      "loss": 0.0052,
+      "step": 561
+    },
+    {
+      "epoch": 1.8445128205128205,
+      "grad_norm": 0.007051311433315277,
+      "learning_rate": 2.1926102104915553e-05,
+      "loss": 0.0051,
+      "step": 562
+    },
+    {
+      "epoch": 1.8477948717948718,
+      "grad_norm": 0.006151077803224325,
+      "learning_rate": 2.182640195250835e-05,
+      "loss": 0.005,
+      "step": 563
+    },
+    {
+      "epoch": 1.851076923076923,
+      "grad_norm": 0.006573867984116077,
+      "learning_rate": 2.1729324763709264e-05,
+      "loss": 0.0051,
+      "step": 564
+    },
+    {
+      "epoch": 1.8543589743589743,
+      "grad_norm": 0.00678396737203002,
+      "learning_rate": 2.1634873426343427e-05,
+      "loss": 0.0049,
+      "step": 565
+    },
+    {
+      "epoch": 1.8576410256410256,
+      "grad_norm": 0.005578219890594482,
+      "learning_rate": 2.1543050750122902e-05,
+      "loss": 0.0048,
+      "step": 566
+    },
+    {
+      "epoch": 1.860923076923077,
+      "grad_norm": 0.0040833973325788975,
+      "learning_rate": 2.145385946656303e-05,
+      "loss": 0.0047,
+      "step": 567
+    },
+    {
+      "epoch": 1.8642051282051282,
+      "grad_norm": 0.004177347291260958,
+      "learning_rate": 2.1367302228901282e-05,
+      "loss": 0.0046,
+      "step": 568
+    },
+    {
+      "epoch": 1.8674871794871795,
+      "grad_norm": 0.0036663906648755074,
+      "learning_rate": 2.128338161201819e-05,
+      "loss": 0.0047,
+      "step": 569
+    },
+    {
+      "epoch": 1.8707692307692307,
+      "grad_norm": 0.003597427159547806,
+      "learning_rate": 2.1202100112360894e-05,
+      "loss": 0.0048,
+      "step": 570
+    },
+    {
+      "epoch": 1.874051282051282,
+      "grad_norm": 0.0029398370534181595,
+      "learning_rate": 2.1123460147868763e-05,
+      "loss": 0.0048,
+      "step": 571
+    },
+    {
+      "epoch": 1.8773333333333333,
+      "grad_norm": 0.003072077641263604,
+      "learning_rate": 2.1047464057901542e-05,
+      "loss": 0.0048,
+      "step": 572
+    },
+    {
+      "epoch": 1.8806153846153846,
+      "grad_norm": 0.002605011221021414,
+      "learning_rate": 2.0974114103169712e-05,
+      "loss": 0.0048,
+      "step": 573
+    },
+    {
+      "epoch": 1.8838974358974359,
+      "grad_norm": 0.002371675567701459,
+      "learning_rate": 2.0903412465667293e-05,
+      "loss": 0.0047,
+      "step": 574
+    },
+    {
+      "epoch": 1.8871794871794871,
+      "grad_norm": 0.002911495743319392,
+      "learning_rate": 2.0835361248606867e-05,
+      "loss": 0.0047,
+      "step": 575
+    },
+    {
+      "epoch": 1.8871794871794871,
+      "eval_loss": 0.0050178528763353825,
+      "eval_runtime": 1.0828,
+      "eval_samples_per_second": 46.176,
+      "eval_steps_per_second": 46.176,
+      "step": 575
+    },
+    {
+      "epoch": 1.8904615384615384,
+      "grad_norm": 0.0025259945541620255,
+      "learning_rate": 2.0769962476357068e-05,
+      "loss": 0.0047,
+      "step": 576
+    },
+    {
+      "epoch": 1.8937435897435897,
+      "grad_norm": 0.0023200158029794693,
+      "learning_rate": 2.070721809438233e-05,
+      "loss": 0.0047,
+      "step": 577
+    },
+    {
+      "epoch": 1.897025641025641,
+      "grad_norm": 0.0023292931728065014,
+      "learning_rate": 2.0647129969185046e-05,
+      "loss": 0.0048,
+      "step": 578
+    },
+    {
+      "epoch": 1.9003076923076923,
+      "grad_norm": 0.0025951117277145386,
+      "learning_rate": 2.058969988825001e-05,
+      "loss": 0.0047,
+      "step": 579
+    },
+    {
+      "epoch": 1.9035897435897438,
+      "grad_norm": 0.0026415924075990915,
+      "learning_rate": 2.0534929559991233e-05,
+      "loss": 0.0047,
+      "step": 580
+    },
+    {
+      "epoch": 1.9068717948717948,
+      "grad_norm": 0.0020874382462352514,
+      "learning_rate": 2.0482820613701192e-05,
+      "loss": 0.0046,
+      "step": 581
+    },
+    {
+      "epoch": 1.9101538461538463,
+      "grad_norm": 0.002052360912784934,
+      "learning_rate": 2.043337459950229e-05,
+      "loss": 0.0046,
+      "step": 582
+    },
+    {
+      "epoch": 1.9134358974358974,
+      "grad_norm": 0.0021120973397046328,
+      "learning_rate": 2.0386592988300747e-05,
+      "loss": 0.0046,
+      "step": 583
+    },
+    {
+      "epoch": 1.9167179487179489,
+      "grad_norm": 0.0021454044617712498,
+      "learning_rate": 2.03424771717429e-05,
+      "loss": 0.0047,
+      "step": 584
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.0023362315259873867,
+      "learning_rate": 2.0301028462173774e-05,
+      "loss": 0.0048,
+      "step": 585
+    },
+    {
+      "epoch": 1.9232820512820514,
+      "grad_norm": 0.002209689933806658,
+      "learning_rate": 2.0262248092598006e-05,
+      "loss": 0.0048,
+      "step": 586
+    },
+    {
+      "epoch": 1.9265641025641025,
+      "grad_norm": 0.0022381660528481007,
+      "learning_rate": 2.0226137216643222e-05,
+      "loss": 0.0048,
+      "step": 587
+    },
+    {
+      "epoch": 1.929846153846154,
+      "grad_norm": 0.002202109433710575,
+      "learning_rate": 2.019269690852569e-05,
+      "loss": 0.0047,
+      "step": 588
+    },
+    {
+      "epoch": 1.933128205128205,
+      "grad_norm": 0.0021981867030262947,
+      "learning_rate": 2.016192816301837e-05,
+      "loss": 0.0046,
+      "step": 589
+    },
+    {
+      "epoch": 1.9364102564102565,
+      "grad_norm": 0.002059696475043893,
+      "learning_rate": 2.0133831895421322e-05,
+      "loss": 0.0047,
+      "step": 590
+    },
+    {
+      "epoch": 1.9396923076923076,
+      "grad_norm": 0.0020739359315484762,
+      "learning_rate": 2.0108408941534486e-05,
+      "loss": 0.0046,
+      "step": 591
+    },
+    {
+      "epoch": 1.942974358974359,
+      "grad_norm": 0.0024034185335040092,
+      "learning_rate": 2.00856600576328e-05,
+      "loss": 0.0047,
+      "step": 592
+    },
+    {
+      "epoch": 1.9462564102564102,
+      "grad_norm": 0.0022281610872596502,
+      "learning_rate": 2.006558592044373e-05,
+      "loss": 0.0048,
+      "step": 593
+    },
+    {
+      "epoch": 1.9495384615384617,
+      "grad_norm": 0.0029593328945338726,
+      "learning_rate": 2.0048187127127092e-05,
+      "loss": 0.0049,
+      "step": 594
+    },
+    {
+      "epoch": 1.9528205128205127,
+      "grad_norm": 0.002573527628555894,
+      "learning_rate": 2.003346419525735e-05,
+      "loss": 0.0048,
+      "step": 595
+    },
+    {
+      "epoch": 1.9561025641025642,
+      "grad_norm": 0.002822197275236249,
+      "learning_rate": 2.002141756280818e-05,
+      "loss": 0.0047,
+      "step": 596
+    },
+    {
+      "epoch": 1.9593846153846153,
+      "grad_norm": 0.002600959734991193,
+      "learning_rate": 2.001204758813944e-05,
+      "loss": 0.0047,
+      "step": 597
+    },
+    {
+      "epoch": 1.9626666666666668,
+      "grad_norm": 0.003187810303643346,
+      "learning_rate": 2.0005354549986523e-05,
+      "loss": 0.0047,
+      "step": 598
+    },
+    {
+      "epoch": 1.9659487179487178,
+      "grad_norm": 0.0029263379983603954,
+      "learning_rate": 2.0001338647452058e-05,
+      "loss": 0.0048,
+      "step": 599
+    },
+    {
+      "epoch": 1.9692307692307693,
+      "grad_norm": 0.003195718163624406,
+      "learning_rate": 2e-05,
+      "loss": 0.0048,
+      "step": 600
+    },
+    {
+      "epoch": 1.9692307692307693,
+      "eval_loss": 0.004978457931429148,
+      "eval_runtime": 1.0723,
+      "eval_samples_per_second": 46.628,
+      "eval_steps_per_second": 46.628,
+      "step": 600
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.654552359691878e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null