Training in progress, step 120, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:818a070534240e91d68e2f203e19637a55fb6d281983e6b1f4db372769f90baf
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:e36bb4966b3713f17079f0f0073225f3c17789e78598436f125bc5847c546220
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:632626d38f137f1b532904c86748923f618de2d84d09066f3d0aab67269c0719
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:31631c7141c9eed8d3d67722b7f007bb55e7b4644efb82e4b7c07b72a46d6b5f
 size 23159290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3c212ac5b1e7f34fdb83be045ed0267f2d345126b22dfd1da63ed82ec4ee137d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:330e765b24011cd6e18b8db74d77f7195e5780a184071a5df72e72c642350c23
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7736f824b0a772b3806e37af9e860068207311e879196bc19f8a76d97eaf6bce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:61c2b4927e3039b26d377375be782c03ce853d193f96b5868ccf559441e84af9
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.1519354581832886,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
-  "epoch": 2.1052631578947367,
   "eval_steps": 20,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -755,6 +755,154 @@
       "eval_samples_per_second": 48.569,
       "eval_steps_per_second": 4.857,
       "step": 100
     }
   ],
   "logging_steps": 1,
@@ -769,7 +917,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 2
       }
     },
     "TrainerControl": {
@@ -778,12 +926,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.01497464815616e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.1519354581832886,
   "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
+  "epoch": 2.526315789473684,
   "eval_steps": 20,
+  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 48.569,
       "eval_steps_per_second": 4.857,
       "step": 100
+    },
+    {
+      "epoch": 2.126315789473684,
+      "grad_norm": 0.47087791562080383,
+      "learning_rate": 5.4600950026045326e-05,
+      "loss": 0.994,
+      "step": 101
+    },
+    {
+      "epoch": 2.1473684210526316,
+      "grad_norm": 0.46321335434913635,
+      "learning_rate": 5.261313375270014e-05,
+      "loss": 0.8965,
+      "step": 102
+    },
+    {
+      "epoch": 2.168421052631579,
+      "grad_norm": 0.48722636699676514,
+      "learning_rate": 5.0649178193565314e-05,
+      "loss": 1.0028,
+      "step": 103
+    },
+    {
+      "epoch": 2.1894736842105265,
+      "grad_norm": 0.5477016568183899,
+      "learning_rate": 4.87100722594094e-05,
+      "loss": 0.9755,
+      "step": 104
+    },
+    {
+      "epoch": 2.2105263157894735,
+      "grad_norm": 0.43870726227760315,
+      "learning_rate": 4.6796792348466356e-05,
+      "loss": 0.9023,
+      "step": 105
+    },
+    {
+      "epoch": 2.231578947368421,
+      "grad_norm": 0.4974609911441803,
+      "learning_rate": 4.491030185478976e-05,
+      "loss": 1.0978,
+      "step": 106
+    },
+    {
+      "epoch": 2.2526315789473683,
+      "grad_norm": 0.48663774132728577,
+      "learning_rate": 4.305155068315481e-05,
+      "loss": 1.1326,
+      "step": 107
+    },
+    {
+      "epoch": 2.2736842105263158,
+      "grad_norm": 0.47879499197006226,
+      "learning_rate": 4.12214747707527e-05,
+      "loss": 0.8403,
+      "step": 108
+    },
+    {
+      "epoch": 2.294736842105263,
+      "grad_norm": 0.4391883909702301,
+      "learning_rate": 3.942099561591802e-05,
+      "loss": 1.0096,
+      "step": 109
+    },
+    {
+      "epoch": 2.3157894736842106,
+      "grad_norm": 0.5225970149040222,
+      "learning_rate": 3.7651019814126654e-05,
+      "loss": 0.9684,
+      "step": 110
+    },
+    {
+      "epoch": 2.336842105263158,
+      "grad_norm": 0.529344379901886,
+      "learning_rate": 3.591243860149759e-05,
+      "loss": 0.9164,
+      "step": 111
+    },
+    {
+      "epoch": 2.3578947368421055,
+      "grad_norm": 0.4865782856941223,
+      "learning_rate": 3.4206127406028745e-05,
+      "loss": 1.0993,
+      "step": 112
+    },
+    {
+      "epoch": 2.3789473684210525,
+      "grad_norm": 0.4908663332462311,
+      "learning_rate": 3.253294540679257e-05,
+      "loss": 1.1203,
+      "step": 113
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 0.4688137471675873,
+      "learning_rate": 3.089373510131354e-05,
+      "loss": 0.8358,
+      "step": 114
+    },
+    {
+      "epoch": 2.4210526315789473,
+      "grad_norm": 0.5007145404815674,
+      "learning_rate": 2.9289321881345254e-05,
+      "loss": 1.0975,
+      "step": 115
+    },
+    {
+      "epoch": 2.442105263157895,
+      "grad_norm": 0.4280741214752197,
+      "learning_rate": 2.7720513617260856e-05,
+      "loss": 1.0134,
+      "step": 116
+    },
+    {
+      "epoch": 2.463157894736842,
+      "grad_norm": 0.5474169850349426,
+      "learning_rate": 2.6188100251265945e-05,
+      "loss": 0.9781,
+      "step": 117
+    },
+    {
+      "epoch": 2.4842105263157896,
+      "grad_norm": 0.4554167091846466,
+      "learning_rate": 2.4692853399638917e-05,
+      "loss": 1.082,
+      "step": 118
+    },
+    {
+      "epoch": 2.5052631578947366,
+      "grad_norm": 0.5812304615974426,
+      "learning_rate": 2.323552596419889e-05,
+      "loss": 0.9826,
+      "step": 119
+    },
+    {
+      "epoch": 2.526315789473684,
+      "grad_norm": 0.4756172001361847,
+      "learning_rate": 2.181685175319702e-05,
+      "loss": 1.1045,
+      "step": 120
+    },
+    {
+      "epoch": 2.526315789473684,
+      "eval_loss": 1.1679396629333496,
+      "eval_runtime": 2.0595,
+      "eval_samples_per_second": 48.555,
+      "eval_steps_per_second": 4.856,
+      "step": 120
     }
   ],
   "logging_steps": 1,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 3
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.214189411500032e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null