Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +152 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e1cf37faf9885b6cb74c0c4ef4e2c2d7e9e4aab0773875932068d778e74f6f5
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:818a070534240e91d68e2f203e19637a55fb6d281983e6b1f4db372769f90baf
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00e3c513aa284f4a659f6163efed2ca9c3075b3ed338078bb8fc52b80c446eec
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:632626d38f137f1b532904c86748923f618de2d84d09066f3d0aab67269c0719
 size 23159290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb63af91c38df442748945f1270836c41afb394cb114946f4943111c086f7fbe
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c212ac5b1e7f34fdb83be045ed0267f2d345126b22dfd1da63ed82ec4ee137d
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ed29baad6d6611c736eccf18bdd7afdc5d6f3612cde61e7bfa83472d3e2068d2
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7736f824b0a772b3806e37af9e860068207311e879196bc19f8a76d97eaf6bce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.1519354581832886,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
-  "epoch": 1.6842105263157894,
   "eval_steps": 20,
-  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -607,6 +607,154 @@
       "eval_samples_per_second": 48.871,
       "eval_steps_per_second": 4.887,
       "step": 80
     }
   ],
   "logging_steps": 1,
@@ -621,7 +769,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
@@ -635,7 +783,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8119797185249280.0,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.1519354581832886,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
+  "epoch": 2.1052631578947367,
   "eval_steps": 20,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 48.871,
       "eval_steps_per_second": 4.887,
       "step": 80
+    },
+    {
+      "epoch": 1.7052631578947368,
+      "grad_norm": 0.44070982933044434,
+      "learning_rate": 9.775619357041952e-05,
+      "loss": 1.1024,
+      "step": 81
+    },
+    {
+      "epoch": 1.7263157894736842,
+      "grad_norm": 0.5059276819229126,
+      "learning_rate": 9.551351696494854e-05,
+      "loss": 1.1214,
+      "step": 82
+    },
+    {
+      "epoch": 1.7473684210526317,
+      "grad_norm": 0.4155433773994446,
+      "learning_rate": 9.327309943879604e-05,
+      "loss": 1.1853,
+      "step": 83
+    },
+    {
+      "epoch": 1.768421052631579,
+      "grad_norm": 0.5396384596824646,
+      "learning_rate": 9.103606910965666e-05,
+      "loss": 1.1497,
+      "step": 84
+    },
+    {
+      "epoch": 1.7894736842105263,
+      "grad_norm": 0.43235623836517334,
+      "learning_rate": 8.880355238966923e-05,
+      "loss": 1.1753,
+      "step": 85
+    },
+    {
+      "epoch": 1.8105263157894735,
+      "grad_norm": 0.3918503224849701,
+      "learning_rate": 8.657667341823448e-05,
+      "loss": 1.1254,
+      "step": 86
+    },
+    {
+      "epoch": 1.831578947368421,
+      "grad_norm": 0.4692346155643463,
+      "learning_rate": 8.435655349597689e-05,
+      "loss": 1.3666,
+      "step": 87
+    },
+    {
+      "epoch": 1.8526315789473684,
+      "grad_norm": 0.4968159794807434,
+      "learning_rate": 8.214431052013634e-05,
+      "loss": 0.9668,
+      "step": 88
+    },
+    {
+      "epoch": 1.8736842105263158,
+      "grad_norm": 0.4856269061565399,
+      "learning_rate": 7.994105842167273e-05,
+      "loss": 1.1482,
+      "step": 89
+    },
+    {
+      "epoch": 1.8947368421052633,
+      "grad_norm": 0.5288775563240051,
+      "learning_rate": 7.774790660436858e-05,
+      "loss": 1.13,
+      "step": 90
+    },
+    {
+      "epoch": 1.9157894736842105,
+      "grad_norm": 0.5403844118118286,
+      "learning_rate": 7.556595938621058e-05,
+      "loss": 1.1483,
+      "step": 91
+    },
+    {
+      "epoch": 1.936842105263158,
+      "grad_norm": 0.45445382595062256,
+      "learning_rate": 7.339631544333249e-05,
+      "loss": 1.0528,
+      "step": 92
+    },
+    {
+      "epoch": 1.9578947368421051,
+      "grad_norm": 0.48713403940200806,
+      "learning_rate": 7.124006725679828e-05,
+      "loss": 1.2208,
+      "step": 93
+    },
+    {
+      "epoch": 1.9789473684210526,
+      "grad_norm": 0.4627130627632141,
+      "learning_rate": 6.909830056250527e-05,
+      "loss": 1.0794,
+      "step": 94
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.46807029843330383,
+      "learning_rate": 6.697209380448333e-05,
+      "loss": 1.12,
+      "step": 95
+    },
+    {
+      "epoch": 2.0210526315789474,
+      "grad_norm": 0.41066575050354004,
+      "learning_rate": 6.486251759186572e-05,
+      "loss": 1.0634,
+      "step": 96
+    },
+    {
+      "epoch": 2.042105263157895,
+      "grad_norm": 0.3904050886631012,
+      "learning_rate": 6.277063415980549e-05,
+      "loss": 0.9888,
+      "step": 97
+    },
+    {
+      "epoch": 2.0631578947368423,
+      "grad_norm": 0.49676060676574707,
+      "learning_rate": 6.069749683460765e-05,
+      "loss": 0.8783,
+      "step": 98
+    },
+    {
+      "epoch": 2.0842105263157893,
+      "grad_norm": 0.46549147367477417,
+      "learning_rate": 5.864414950334796e-05,
+      "loss": 0.9815,
+      "step": 99
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "grad_norm": 0.5622740387916565,
+      "learning_rate": 5.6611626088244194e-05,
+      "loss": 1.0091,
+      "step": 100
+    },
+    {
+      "epoch": 2.1052631578947367,
+      "eval_loss": 1.1575236320495605,
+      "eval_runtime": 2.0589,
+      "eval_samples_per_second": 48.569,
+      "eval_steps_per_second": 4.857,
+      "step": 100
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 1.01497464815616e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null