Training in progress, step 592, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +307 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:784a3ff3fc84f3309bb405e9f91f112e3a61d4a0cb9f2c6e822bd6a878858326
 size 60010048

 version https://git-lfs.github.com/spec/v1
+oid sha256:727ef6499c3b2029862b640bd0783bcd2d1d77402e26d95bca288a369396ffdf
 size 60010048

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91ba8047dcee23b14a1ea4b0abce7249941fe17ab3f28a1b96219ca15f6dd511
 size 120213058

 version https://git-lfs.github.com/spec/v1
+oid sha256:36f40407089333f17b49c25519b1a1948c002a6cf29f0dd3e129ac1c5424f9b5
 size 120213058

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8d3c6e0bc4d6caf36d14b922d7bf99043ffb0fc6496f31b9e5e063d07fde6e06
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:da699b9d89f916da840476c4663b34c3317d4235c857b22c694b924a7e8d83f6
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ea1fdb86a115854271995d284a3ce89e28b576b20e5293e4b93fa4b47fd3d21
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e426754c87d665fc6a368f5f0109f66999a2f562e7ade96fcf16cbb316f70e2
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.493194580078125,
   "best_model_checkpoint": "miner_id_24/checkpoint-550",
-  "epoch": 1.1920081273281409,
   "eval_steps": 25,
-  "global_step": 550,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4041,6 +4041,308 @@
       "eval_samples_per_second": 26.671,
       "eval_steps_per_second": 26.671,
       "step": 550
     }
   ],
   "logging_steps": 1,
@@ -4055,7 +4357,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -4064,12 +4366,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.8051015272143258e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.493194580078125,
   "best_model_checkpoint": "miner_id_24/checkpoint-550",
+  "epoch": 1.2830342025059263,
   "eval_steps": 25,
+  "global_step": 592,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 26.671,
       "eval_steps_per_second": 26.671,
       "step": 550
+    },
+    {
+      "epoch": 1.1941754148323738,
+      "grad_norm": 0.745163083076477,
+      "learning_rate": 2.2296148993937625e-05,
+      "loss": 1.5242,
+      "step": 551
+    },
+    {
+      "epoch": 1.196342702336607,
+      "grad_norm": 0.7508656978607178,
+      "learning_rate": 2.2185957798587907e-05,
+      "loss": 1.4407,
+      "step": 552
+    },
+    {
+      "epoch": 1.1985099898408398,
+      "grad_norm": 0.7356210350990295,
+      "learning_rate": 2.20784435397923e-05,
+      "loss": 1.1741,
+      "step": 553
+    },
+    {
+      "epoch": 1.200677277345073,
+      "grad_norm": 0.817733883857727,
+      "learning_rate": 2.1973609495031733e-05,
+      "loss": 1.2159,
+      "step": 554
+    },
+    {
+      "epoch": 1.2028445648493058,
+      "grad_norm": 0.7189146876335144,
+      "learning_rate": 2.18714588600831e-05,
+      "loss": 0.9351,
+      "step": 555
+    },
+    {
+      "epoch": 1.2050118523535387,
+      "grad_norm": 0.7796960473060608,
+      "learning_rate": 2.177199474892181e-05,
+      "loss": 1.0885,
+      "step": 556
+    },
+    {
+      "epoch": 1.2071791398577718,
+      "grad_norm": 0.8709949851036072,
+      "learning_rate": 2.1675220193626897e-05,
+      "loss": 1.0783,
+      "step": 557
+    },
+    {
+      "epoch": 1.2093464273620047,
+      "grad_norm": 0.8464658856391907,
+      "learning_rate": 2.1581138144288525e-05,
+      "loss": 0.9859,
+      "step": 558
+    },
+    {
+      "epoch": 1.2115137148662378,
+      "grad_norm": 0.9255772829055786,
+      "learning_rate": 2.148975146891817e-05,
+      "loss": 0.94,
+      "step": 559
+    },
+    {
+      "epoch": 1.2136810023704707,
+      "grad_norm": 0.9168251156806946,
+      "learning_rate": 2.140106295336103e-05,
+      "loss": 0.6344,
+      "step": 560
+    },
+    {
+      "epoch": 1.2158482898747036,
+      "grad_norm": 0.9316290020942688,
+      "learning_rate": 2.1315075301211272e-05,
+      "loss": 0.6167,
+      "step": 561
+    },
+    {
+      "epoch": 1.2180155773789367,
+      "grad_norm": 0.5710924863815308,
+      "learning_rate": 2.123179113372946e-05,
+      "loss": 1.06,
+      "step": 562
+    },
+    {
+      "epoch": 1.2201828648831696,
+      "grad_norm": 0.4053063690662384,
+      "learning_rate": 2.1151212989762815e-05,
+      "loss": 2.0825,
+      "step": 563
+    },
+    {
+      "epoch": 1.2223501523874027,
+      "grad_norm": 0.3882395923137665,
+      "learning_rate": 2.1073343325667632e-05,
+      "loss": 1.9561,
+      "step": 564
+    },
+    {
+      "epoch": 1.2245174398916356,
+      "grad_norm": 0.40940290689468384,
+      "learning_rate": 2.0998184515234558e-05,
+      "loss": 1.8549,
+      "step": 565
+    },
+    {
+      "epoch": 1.2266847273958685,
+      "grad_norm": 0.4803662896156311,
+      "learning_rate": 2.0925738849616136e-05,
+      "loss": 1.9017,
+      "step": 566
+    },
+    {
+      "epoch": 1.2288520149001017,
+      "grad_norm": 0.4790099561214447,
+      "learning_rate": 2.0856008537257003e-05,
+      "loss": 1.8168,
+      "step": 567
+    },
+    {
+      "epoch": 1.2310193024043345,
+      "grad_norm": 0.4733673334121704,
+      "learning_rate": 2.078899570382657e-05,
+      "loss": 1.9583,
+      "step": 568
+    },
+    {
+      "epoch": 1.2331865899085677,
+      "grad_norm": 0.48890408873558044,
+      "learning_rate": 2.0724702392154168e-05,
+      "loss": 1.9115,
+      "step": 569
+    },
+    {
+      "epoch": 1.2353538774128006,
+      "grad_norm": 0.4990405738353729,
+      "learning_rate": 2.0663130562166867e-05,
+      "loss": 1.6618,
+      "step": 570
+    },
+    {
+      "epoch": 1.2375211649170335,
+      "grad_norm": 0.5173394083976746,
+      "learning_rate": 2.0604282090829626e-05,
+      "loss": 1.62,
+      "step": 571
+    },
+    {
+      "epoch": 1.2396884524212666,
+      "grad_norm": 0.47291800379753113,
+      "learning_rate": 2.0548158772088183e-05,
+      "loss": 1.6012,
+      "step": 572
+    },
+    {
+      "epoch": 1.2418557399254995,
+      "grad_norm": 0.5543546676635742,
+      "learning_rate": 2.0494762316814265e-05,
+      "loss": 2.2266,
+      "step": 573
+    },
+    {
+      "epoch": 1.2440230274297326,
+      "grad_norm": 0.5711101293563843,
+      "learning_rate": 2.044409435275349e-05,
+      "loss": 2.1061,
+      "step": 574
+    },
+    {
+      "epoch": 1.2461903149339655,
+      "grad_norm": 0.5323336720466614,
+      "learning_rate": 2.0396156424475758e-05,
+      "loss": 1.7618,
+      "step": 575
+    },
+    {
+      "epoch": 1.2461903149339655,
+      "eval_loss": 1.4941222667694092,
+      "eval_runtime": 1.9256,
+      "eval_samples_per_second": 25.965,
+      "eval_steps_per_second": 25.965,
+      "step": 575
+    },
+    {
+      "epoch": 1.2483576024381984,
+      "grad_norm": 0.5417742729187012,
+      "learning_rate": 2.035094999332813e-05,
+      "loss": 1.9826,
+      "step": 576
+    },
+    {
+      "epoch": 1.2505248899424315,
+      "grad_norm": 0.5711143016815186,
+      "learning_rate": 2.0308476437390292e-05,
+      "loss": 2.1701,
+      "step": 577
+    },
+    {
+      "epoch": 1.2526921774466644,
+      "grad_norm": 0.5723181962966919,
+      "learning_rate": 2.0268737051432534e-05,
+      "loss": 1.8376,
+      "step": 578
+    },
+    {
+      "epoch": 1.2548594649508975,
+      "grad_norm": 0.6304724216461182,
+      "learning_rate": 2.0231733046876336e-05,
+      "loss": 2.2109,
+      "step": 579
+    },
+    {
+      "epoch": 1.2570267524551304,
+      "grad_norm": 0.49579569697380066,
+      "learning_rate": 2.019746555175737e-05,
+      "loss": 1.8671,
+      "step": 580
+    },
+    {
+      "epoch": 1.2591940399593633,
+      "grad_norm": 0.5718152523040771,
+      "learning_rate": 2.0165935610691136e-05,
+      "loss": 2.0108,
+      "step": 581
+    },
+    {
+      "epoch": 1.2613613274635964,
+      "grad_norm": 0.5495701432228088,
+      "learning_rate": 2.0137144184841137e-05,
+      "loss": 1.7419,
+      "step": 582
+    },
+    {
+      "epoch": 1.2635286149678293,
+      "grad_norm": 0.5827396512031555,
+      "learning_rate": 2.0111092151889548e-05,
+      "loss": 2.0878,
+      "step": 583
+    },
+    {
+      "epoch": 1.2656959024720624,
+      "grad_norm": 0.5941032767295837,
+      "learning_rate": 2.00877803060105e-05,
+      "loss": 2.0261,
+      "step": 584
+    },
+    {
+      "epoch": 1.2678631899762953,
+      "grad_norm": 0.6075966954231262,
+      "learning_rate": 2.006720935784581e-05,
+      "loss": 1.9424,
+      "step": 585
+    },
+    {
+      "epoch": 1.2700304774805282,
+      "grad_norm": 0.6470325589179993,
+      "learning_rate": 2.0049379934483398e-05,
+      "loss": 2.0216,
+      "step": 586
+    },
+    {
+      "epoch": 1.2721977649847613,
+      "grad_norm": 0.5846126675605774,
+      "learning_rate": 2.0034292579438092e-05,
+      "loss": 1.9047,
+      "step": 587
+    },
+    {
+      "epoch": 1.2743650524889942,
+      "grad_norm": 0.6144323945045471,
+      "learning_rate": 2.00219477526351e-05,
+      "loss": 1.897,
+      "step": 588
+    },
+    {
+      "epoch": 1.2765323399932273,
+      "grad_norm": 0.5959208607673645,
+      "learning_rate": 2.0012345830396012e-05,
+      "loss": 1.7803,
+      "step": 589
+    },
+    {
+      "epoch": 1.2786996274974602,
+      "grad_norm": 0.6328864097595215,
+      "learning_rate": 2.0005487105427258e-05,
+      "loss": 1.8311,
+      "step": 590
+    },
+    {
+      "epoch": 1.2808669150016931,
+      "grad_norm": 0.6484797596931458,
+      "learning_rate": 2.0001371786811258e-05,
+      "loss": 1.9282,
+      "step": 591
+    },
+    {
+      "epoch": 1.2830342025059263,
+      "grad_norm": 0.6196519136428833,
+      "learning_rate": 2e-05,
+      "loss": 1.7637,
+      "step": 592
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.943692451244933e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null