Training in progress, step 190, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +292 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b63bc9d574a1e345fc18c69107e6e6be6d868f3c4a2374ff6dca021fca856ecc
 size 100966336

 version https://git-lfs.github.com/spec/v1
+oid sha256:343c1413ea61110e9ebf894c3ac3f0bf6fc2c2d0350e548b608a4ed6b9025375
 size 100966336

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe0c28bfe4d853a31d04be135ca4e997ab5a91e658ae11b8ad2911ff1fa9e4bc
 size 202110330

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6c531dcf5a32114e0a68282c25cf49d0edf1632e6ce8ee2da4834ef2a77d2d5
 size 202110330

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30171f90f1f394c061f52ff83feba83fa242f21f32d0e16ee62eb252bde7bbbc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1748613d6fe579509e3840a49ac2fdd8a596903c8c8205b822e98ccf4ff543d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:445b7598993fe3a18f07b6ee10e450c957df6de162dc7729c132643a3ca6e545
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b0717fa852ff14c4fb6ef3d8fb6e7d6c5b1e6b17d5f3ada276b0ae8e8648b64
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.0219863653182983,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 1.1214953271028036,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1113,6 +1113,294 @@
       "eval_samples_per_second": 28.515,
       "eval_steps_per_second": 3.992,
       "step": 150
     }
   ],
   "logging_steps": 1,
@@ -1136,12 +1424,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.249320727497933e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.0219863653182983,
   "best_model_checkpoint": "miner_id_24/checkpoint-150",
+  "epoch": 1.4205607476635513,
   "eval_steps": 25,
+  "global_step": 190,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 28.515,
       "eval_steps_per_second": 3.992,
       "step": 150
+    },
+    {
+      "epoch": 1.1289719626168224,
+      "grad_norm": 1.3583935499191284,
+      "learning_rate": 1.9922179741796086e-05,
+      "loss": 0.8198,
+      "step": 151
+    },
+    {
+      "epoch": 1.1364485981308412,
+      "grad_norm": 1.284717321395874,
+      "learning_rate": 1.9438237813334586e-05,
+      "loss": 0.7878,
+      "step": 152
+    },
+    {
+      "epoch": 1.1439252336448598,
+      "grad_norm": 1.3621183633804321,
+      "learning_rate": 1.8965008980117037e-05,
+      "loss": 0.9232,
+      "step": 153
+    },
+    {
+      "epoch": 1.1514018691588785,
+      "grad_norm": 1.6327229738235474,
+      "learning_rate": 1.850263580385163e-05,
+      "loss": 1.0294,
+      "step": 154
+    },
+    {
+      "epoch": 1.158878504672897,
+      "grad_norm": 1.386734127998352,
+      "learning_rate": 1.8051257575944925e-05,
+      "loss": 0.8834,
+      "step": 155
+    },
+    {
+      "epoch": 1.1663551401869159,
+      "grad_norm": 1.375533938407898,
+      "learning_rate": 1.7611010275539962e-05,
+      "loss": 0.9483,
+      "step": 156
+    },
+    {
+      "epoch": 1.1738317757009347,
+      "grad_norm": 1.242241382598877,
+      "learning_rate": 1.718202652855205e-05,
+      "loss": 0.8194,
+      "step": 157
+    },
+    {
+      "epoch": 1.1813084112149532,
+      "grad_norm": 1.247066617012024,
+      "learning_rate": 1.6764435567714794e-05,
+      "loss": 0.8326,
+      "step": 158
+    },
+    {
+      "epoch": 1.188785046728972,
+      "grad_norm": 1.4270540475845337,
+      "learning_rate": 1.6358363193648352e-05,
+      "loss": 0.8584,
+      "step": 159
+    },
+    {
+      "epoch": 1.1962616822429906,
+      "grad_norm": 1.225496768951416,
+      "learning_rate": 1.5963931736961547e-05,
+      "loss": 0.8475,
+      "step": 160
+    },
+    {
+      "epoch": 1.2037383177570093,
+      "grad_norm": 1.2226568460464478,
+      "learning_rate": 1.5581260021399396e-05,
+      "loss": 0.828,
+      "step": 161
+    },
+    {
+      "epoch": 1.2112149532710281,
+      "grad_norm": 1.387080192565918,
+      "learning_rate": 1.5210463328047095e-05,
+      "loss": 0.8902,
+      "step": 162
+    },
+    {
+      "epoch": 1.2186915887850467,
+      "grad_norm": 1.2809566259384155,
+      "learning_rate": 1.4851653360601179e-05,
+      "loss": 0.9188,
+      "step": 163
+    },
+    {
+      "epoch": 1.2261682242990655,
+      "grad_norm": 1.4872632026672363,
+      "learning_rate": 1.4504938211718489e-05,
+      "loss": 1.1853,
+      "step": 164
+    },
+    {
+      "epoch": 1.233644859813084,
+      "grad_norm": 1.3789664506912231,
+      "learning_rate": 1.4170422330452816e-05,
+      "loss": 0.9331,
+      "step": 165
+    },
+    {
+      "epoch": 1.2411214953271028,
+      "grad_norm": 1.7620553970336914,
+      "learning_rate": 1.384820649078939e-05,
+      "loss": 1.3851,
+      "step": 166
+    },
+    {
+      "epoch": 1.2485981308411216,
+      "grad_norm": 2.51485013961792,
+      "learning_rate": 1.3538387761286303e-05,
+      "loss": 1.7585,
+      "step": 167
+    },
+    {
+      "epoch": 1.2560747663551401,
+      "grad_norm": 1.4379189014434814,
+      "learning_rate": 1.3241059475832373e-05,
+      "loss": 0.9246,
+      "step": 168
+    },
+    {
+      "epoch": 1.263551401869159,
+      "grad_norm": 1.3618923425674438,
+      "learning_rate": 1.2956311205529943e-05,
+      "loss": 0.8608,
+      "step": 169
+    },
+    {
+      "epoch": 1.2710280373831775,
+      "grad_norm": 1.3011233806610107,
+      "learning_rate": 1.268422873171136e-05,
+      "loss": 0.8322,
+      "step": 170
+    },
+    {
+      "epoch": 1.2785046728971963,
+      "grad_norm": 1.5399248600006104,
+      "learning_rate": 1.2424894020096997e-05,
+      "loss": 0.7588,
+      "step": 171
+    },
+    {
+      "epoch": 1.2859813084112148,
+      "grad_norm": 1.449872374534607,
+      "learning_rate": 1.217838519610291e-05,
+      "loss": 0.857,
+      "step": 172
+    },
+    {
+      "epoch": 1.2934579439252336,
+      "grad_norm": 1.3477046489715576,
+      "learning_rate": 1.1944776521305213e-05,
+      "loss": 0.8627,
+      "step": 173
+    },
+    {
+      "epoch": 1.3009345794392524,
+      "grad_norm": 1.3076852560043335,
+      "learning_rate": 1.1724138371068603e-05,
+      "loss": 0.9005,
+      "step": 174
+    },
+    {
+      "epoch": 1.308411214953271,
+      "grad_norm": 1.294968843460083,
+      "learning_rate": 1.1516537213345519e-05,
+      "loss": 0.7639,
+      "step": 175
+    },
+    {
+      "epoch": 1.308411214953271,
+      "eval_loss": 1.0100014209747314,
+      "eval_runtime": 1.729,
+      "eval_samples_per_second": 28.918,
+      "eval_steps_per_second": 4.049,
+      "step": 175
+    },
+    {
+      "epoch": 1.3158878504672897,
+      "grad_norm": 1.3219696283340454,
+      "learning_rate": 1.1322035588652484e-05,
+      "loss": 0.7752,
+      "step": 176
+    },
+    {
+      "epoch": 1.3233644859813083,
+      "grad_norm": 1.1848926544189453,
+      "learning_rate": 1.1140692091229556e-05,
+      "loss": 0.7759,
+      "step": 177
+    },
+    {
+      "epoch": 1.330841121495327,
+      "grad_norm": 1.1485064029693604,
+      "learning_rate": 1.0972561351388622e-05,
+      "loss": 0.7454,
+      "step": 178
+    },
+    {
+      "epoch": 1.3383177570093459,
+      "grad_norm": 1.1740100383758545,
+      "learning_rate": 1.0817694019055866e-05,
+      "loss": 0.761,
+      "step": 179
+    },
+    {
+      "epoch": 1.3457943925233644,
+      "grad_norm": 1.3378069400787354,
+      "learning_rate": 1.0676136748513286e-05,
+      "loss": 0.8535,
+      "step": 180
+    },
+    {
+      "epoch": 1.3532710280373832,
+      "grad_norm": 1.2721531391143799,
+      "learning_rate": 1.0547932184343948e-05,
+      "loss": 0.8117,
+      "step": 181
+    },
+    {
+      "epoch": 1.3607476635514018,
+      "grad_norm": 1.255110740661621,
+      "learning_rate": 1.043311894858519e-05,
+      "loss": 0.8114,
+      "step": 182
+    },
+    {
+      "epoch": 1.3682242990654205,
+      "grad_norm": 1.184085726737976,
+      "learning_rate": 1.033173162909358e-05,
+      "loss": 0.7484,
+      "step": 183
+    },
+    {
+      "epoch": 1.3757009345794393,
+      "grad_norm": 1.2864772081375122,
+      "learning_rate": 1.0243800769125222e-05,
+      "loss": 0.8197,
+      "step": 184
+    },
+    {
+      "epoch": 1.3831775700934579,
+      "grad_norm": 1.3960767984390259,
+      "learning_rate": 1.0169352858134525e-05,
+      "loss": 0.8416,
+      "step": 185
+    },
+    {
+      "epoch": 1.3906542056074767,
+      "grad_norm": 1.6105817556381226,
+      "learning_rate": 1.0108410323794131e-05,
+      "loss": 0.8156,
+      "step": 186
+    },
+    {
+      "epoch": 1.3981308411214952,
+      "grad_norm": 1.4161114692687988,
+      "learning_rate": 1.0060991525238538e-05,
+      "loss": 0.8663,
+      "step": 187
+    },
+    {
+      "epoch": 1.405607476635514,
+      "grad_norm": 1.3891263008117676,
+      "learning_rate": 1.0027110747533332e-05,
+      "loss": 0.9249,
+      "step": 188
+    },
+    {
+      "epoch": 1.4130841121495328,
+      "grad_norm": 1.4171258211135864,
+      "learning_rate": 1.0006778197371774e-05,
+      "loss": 0.837,
+      "step": 189
+    },
+    {
+      "epoch": 1.4205607476635513,
+      "grad_norm": 1.4086953401565552,
+      "learning_rate": 1e-05,
+      "loss": 0.9393,
+      "step": 190
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.916153511660749e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null