Training in progress, step 190, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +292 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b8c80c5b549785e595a6edded231302dc9fc7eb612dcee3581a10a67613d048
 size 188794608

 version https://git-lfs.github.com/spec/v1
+oid sha256:e4197823243285d91151e5193369a743a18bb5831f1e606de7cfdd4aa9505682
 size 188794608

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69c608cb2b8bbffebbb5337c04a77b7b4b0b84c85657d208be0718283f227245
 size 377811346

 version https://git-lfs.github.com/spec/v1
+oid sha256:27902d987d923d811183b59010cf9f99b8ceade2c91a8416f467e78344dfc5c8
 size 377811346

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6861fc28c485cb3ef6288fd169e89f691e202481a632d4f2a2a8c13f43c26070
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:56ff2e2b75ce612d72e9e8b2d041791a868be7d580cc9f1da80036ee48c96d1b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:445b7598993fe3a18f07b6ee10e450c957df6de162dc7729c132643a3ca6e545
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b0717fa852ff14c4fb6ef3d8fb6e7d6c5b1e6b17d5f3ada276b0ae8e8648b64
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.21235010027885437,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.2430379746835443,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,294 @@
       "eval_samples_per_second": 8.691,
       "eval_steps_per_second": 8.691,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1424,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.6427164989652992e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.21235010027885437,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 0.3078481012658228,
   "eval_steps": 25,
+  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.691,
       "eval_steps_per_second": 8.691,
       "step": 150
+    },
+    {
+      "epoch": 0.24465822784810126,
+      "grad_norm": 11.316670417785645,
+      "learning_rate": 1.9922179741796086e-05,
+      "loss": 0.2515,
+      "step": 151
+    },
+    {
+      "epoch": 0.24627848101265823,
+      "grad_norm": 11.774153709411621,
+      "learning_rate": 1.9438237813334586e-05,
+      "loss": 0.1704,
+      "step": 152
+    },
+    {
+      "epoch": 0.2478987341772152,
+      "grad_norm": 6.292091369628906,
+      "learning_rate": 1.8965008980117037e-05,
+      "loss": 0.1084,
+      "step": 153
+    },
+    {
+      "epoch": 0.24951898734177216,
+      "grad_norm": 14.302482604980469,
+      "learning_rate": 1.850263580385163e-05,
+      "loss": 0.4959,
+      "step": 154
+    },
+    {
+      "epoch": 0.25113924050632913,
+      "grad_norm": 12.771811485290527,
+      "learning_rate": 1.8051257575944925e-05,
+      "loss": 0.2987,
+      "step": 155
+    },
+    {
+      "epoch": 0.25275949367088607,
+      "grad_norm": 16.40115737915039,
+      "learning_rate": 1.7611010275539962e-05,
+      "loss": 0.5615,
+      "step": 156
+    },
+    {
+      "epoch": 0.254379746835443,
+      "grad_norm": 13.117633819580078,
+      "learning_rate": 1.718202652855205e-05,
+      "loss": 0.3622,
+      "step": 157
+    },
+    {
+      "epoch": 0.256,
+      "grad_norm": 10.55799674987793,
+      "learning_rate": 1.6764435567714794e-05,
+      "loss": 0.3473,
+      "step": 158
+    },
+    {
+      "epoch": 0.25762025316455694,
+      "grad_norm": 7.9049763679504395,
+      "learning_rate": 1.6358363193648352e-05,
+      "loss": 0.2403,
+      "step": 159
+    },
+    {
+      "epoch": 0.25924050632911394,
+      "grad_norm": 11.420134544372559,
+      "learning_rate": 1.5963931736961547e-05,
+      "loss": 0.4369,
+      "step": 160
+    },
+    {
+      "epoch": 0.2608607594936709,
+      "grad_norm": 8.98204231262207,
+      "learning_rate": 1.5581260021399396e-05,
+      "loss": 0.2004,
+      "step": 161
+    },
+    {
+      "epoch": 0.2624810126582279,
+      "grad_norm": 7.689177513122559,
+      "learning_rate": 1.5210463328047095e-05,
+      "loss": 0.1891,
+      "step": 162
+    },
+    {
+      "epoch": 0.2641012658227848,
+      "grad_norm": 10.6261568069458,
+      "learning_rate": 1.4851653360601179e-05,
+      "loss": 0.2807,
+      "step": 163
+    },
+    {
+      "epoch": 0.26572151898734175,
+      "grad_norm": 7.932508945465088,
+      "learning_rate": 1.4504938211718489e-05,
+      "loss": 0.2299,
+      "step": 164
+    },
+    {
+      "epoch": 0.26734177215189875,
+      "grad_norm": 23.33995819091797,
+      "learning_rate": 1.4170422330452816e-05,
+      "loss": 0.3367,
+      "step": 165
+    },
+    {
+      "epoch": 0.2689620253164557,
+      "grad_norm": 10.17355728149414,
+      "learning_rate": 1.384820649078939e-05,
+      "loss": 0.2386,
+      "step": 166
+    },
+    {
+      "epoch": 0.2705822784810127,
+      "grad_norm": 12.60328197479248,
+      "learning_rate": 1.3538387761286303e-05,
+      "loss": 0.2627,
+      "step": 167
+    },
+    {
+      "epoch": 0.2722025316455696,
+      "grad_norm": 9.619555473327637,
+      "learning_rate": 1.3241059475832373e-05,
+      "loss": 0.2167,
+      "step": 168
+    },
+    {
+      "epoch": 0.27382278481012656,
+      "grad_norm": 5.489387512207031,
+      "learning_rate": 1.2956311205529943e-05,
+      "loss": 0.1933,
+      "step": 169
+    },
+    {
+      "epoch": 0.27544303797468356,
+      "grad_norm": 15.453241348266602,
+      "learning_rate": 1.268422873171136e-05,
+      "loss": 0.1648,
+      "step": 170
+    },
+    {
+      "epoch": 0.2770632911392405,
+      "grad_norm": 37.87616729736328,
+      "learning_rate": 1.2424894020096997e-05,
+      "loss": 0.5199,
+      "step": 171
+    },
+    {
+      "epoch": 0.2786835443037975,
+      "grad_norm": 15.486174583435059,
+      "learning_rate": 1.217838519610291e-05,
+      "loss": 0.7258,
+      "step": 172
+    },
+    {
+      "epoch": 0.28030379746835443,
+      "grad_norm": 15.361915588378906,
+      "learning_rate": 1.1944776521305213e-05,
+      "loss": 0.266,
+      "step": 173
+    },
+    {
+      "epoch": 0.28192405063291137,
+      "grad_norm": 10.4679594039917,
+      "learning_rate": 1.1724138371068603e-05,
+      "loss": 0.3238,
+      "step": 174
+    },
+    {
+      "epoch": 0.28354430379746837,
+      "grad_norm": 15.89462947845459,
+      "learning_rate": 1.1516537213345519e-05,
+      "loss": 0.3613,
+      "step": 175
+    },
+    {
+      "epoch": 0.28354430379746837,
+      "eval_loss": 0.17382794618606567,
+      "eval_runtime": 5.7564,
+      "eval_samples_per_second": 8.686,
+      "eval_steps_per_second": 8.686,
+      "step": 175
+    },
+    {
+      "epoch": 0.2851645569620253,
+      "grad_norm": 11.57437515258789,
+      "learning_rate": 1.1322035588652484e-05,
+      "loss": 0.31,
+      "step": 176
+    },
+    {
+      "epoch": 0.2867848101265823,
+      "grad_norm": 11.174723625183105,
+      "learning_rate": 1.1140692091229556e-05,
+      "loss": 0.423,
+      "step": 177
+    },
+    {
+      "epoch": 0.28840506329113924,
+      "grad_norm": 7.6294026374816895,
+      "learning_rate": 1.0972561351388622e-05,
+      "loss": 0.3503,
+      "step": 178
+    },
+    {
+      "epoch": 0.2900253164556962,
+      "grad_norm": 8.027375221252441,
+      "learning_rate": 1.0817694019055866e-05,
+      "loss": 0.2998,
+      "step": 179
+    },
+    {
+      "epoch": 0.2916455696202532,
+      "grad_norm": 10.243330955505371,
+      "learning_rate": 1.0676136748513286e-05,
+      "loss": 0.2555,
+      "step": 180
+    },
+    {
+      "epoch": 0.2932658227848101,
+      "grad_norm": 21.219587326049805,
+      "learning_rate": 1.0547932184343948e-05,
+      "loss": 0.4385,
+      "step": 181
+    },
+    {
+      "epoch": 0.2948860759493671,
+      "grad_norm": 7.321977615356445,
+      "learning_rate": 1.043311894858519e-05,
+      "loss": 0.2346,
+      "step": 182
+    },
+    {
+      "epoch": 0.29650632911392405,
+      "grad_norm": 9.628911972045898,
+      "learning_rate": 1.033173162909358e-05,
+      "loss": 0.2849,
+      "step": 183
+    },
+    {
+      "epoch": 0.298126582278481,
+      "grad_norm": 7.442295551300049,
+      "learning_rate": 1.0243800769125222e-05,
+      "loss": 0.146,
+      "step": 184
+    },
+    {
+      "epoch": 0.299746835443038,
+      "grad_norm": 5.40734338760376,
+      "learning_rate": 1.0169352858134525e-05,
+      "loss": 0.1266,
+      "step": 185
+    },
+    {
+      "epoch": 0.3013670886075949,
+      "grad_norm": 6.042978763580322,
+      "learning_rate": 1.0108410323794131e-05,
+      "loss": 0.167,
+      "step": 186
+    },
+    {
+      "epoch": 0.3029873417721519,
+      "grad_norm": 7.91204309463501,
+      "learning_rate": 1.0060991525238538e-05,
+      "loss": 0.174,
+      "step": 187
+    },
+    {
+      "epoch": 0.30460759493670886,
+      "grad_norm": 10.424847602844238,
+      "learning_rate": 1.0027110747533332e-05,
+      "loss": 0.3899,
+      "step": 188
+    },
+    {
+      "epoch": 0.3062278481012658,
+      "grad_norm": 11.064706802368164,
+      "learning_rate": 1.0006778197371774e-05,
+      "loss": 0.2932,
+      "step": 189
+    },
+    {
+      "epoch": 0.3078481012658228,
+      "grad_norm": 18.252004623413086,
+      "learning_rate": 1e-05,
+      "loss": 0.8028,
+      "step": 190
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.080310468542464e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null