Training in progress, step 60, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e41d69019d421204220964865c9c89fc7bacb60a8411908a8b29c0a12114b94a
 size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b7abe2f22382f3999a654747949c5f04cd7b50f2076f7d08dc4cde28c3ff8c3
 size 45118424

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1928572a9ac56c6d98ea8953c7a7a8fdca570b8a20a3959f47990cc5be36ddc3
 size 23159290

 version https://git-lfs.github.com/spec/v1
+oid sha256:86a525396334785dec46ba6a050a54b56d171785887e5e54d90bce2652d22331
 size 23159290

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e565442590f72e02c81245f841f0d570f0816c276d51ef5abfd2bed9cc00d28
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:31543a07fbd48036322e55fc875b941e5f56b48a74d8818ee4618f3aa994756b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:062016b917f4cd81cef6ab15bfe81df4e94586c0afba8905b655deaa6fff468f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:50b80d146b8b37f82a1962ba385b8329fd6c2c35ba5116c53e131bb661ab681b
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.7905269861221313,
-  "best_model_checkpoint": "miner_id_besimray/checkpoint-40",
-  "epoch": 0.5144694533762058,
   "eval_steps": 20,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -311,6 +311,154 @@
       "eval_samples_per_second": 21.975,
       "eval_steps_per_second": 2.278,
       "step": 40
     }
   ],
   "logging_steps": 1,
@@ -339,7 +487,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.012328531755008e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.7785296440124512,
+  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
+  "epoch": 0.7717041800643086,
   "eval_steps": 20,
+  "global_step": 60,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 21.975,
       "eval_steps_per_second": 2.278,
       "step": 40
+    },
+    {
+      "epoch": 0.5273311897106109,
+      "grad_norm": 0.20955221354961395,
+      "learning_rate": 0.0001767644740358011,
+      "loss": 0.7579,
+      "step": 41
+    },
+    {
+      "epoch": 0.5401929260450161,
+      "grad_norm": 0.22845645248889923,
+      "learning_rate": 0.00017530714660036112,
+      "loss": 0.8299,
+      "step": 42
+    },
+    {
+      "epoch": 0.5530546623794212,
+      "grad_norm": 0.21866574883460999,
+      "learning_rate": 0.00017381189974873407,
+      "loss": 0.7611,
+      "step": 43
+    },
+    {
+      "epoch": 0.5659163987138264,
+      "grad_norm": 0.23347878456115723,
+      "learning_rate": 0.00017227948638273916,
+      "loss": 0.7499,
+      "step": 44
+    },
+    {
+      "epoch": 0.5787781350482315,
+      "grad_norm": 0.22938531637191772,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.7519,
+      "step": 45
+    },
+    {
+      "epoch": 0.5916398713826366,
+      "grad_norm": 0.23043856024742126,
+      "learning_rate": 0.00016910626489868649,
+      "loss": 0.7447,
+      "step": 46
+    },
+    {
+      "epoch": 0.6045016077170418,
+      "grad_norm": 0.2377193123102188,
+      "learning_rate": 0.00016746705459320745,
+      "loss": 0.8113,
+      "step": 47
+    },
+    {
+      "epoch": 0.617363344051447,
+      "grad_norm": 0.22112426161766052,
+      "learning_rate": 0.00016579387259397127,
+      "loss": 0.7647,
+      "step": 48
+    },
+    {
+      "epoch": 0.6302250803858521,
+      "grad_norm": 0.22107215225696564,
+      "learning_rate": 0.0001640875613985024,
+      "loss": 0.7654,
+      "step": 49
+    },
+    {
+      "epoch": 0.6430868167202572,
+      "grad_norm": 0.23675453662872314,
+      "learning_rate": 0.00016234898018587337,
+      "loss": 0.7388,
+      "step": 50
+    },
+    {
+      "epoch": 0.6559485530546624,
+      "grad_norm": 0.22421492636203766,
+      "learning_rate": 0.000160579004384082,
+      "loss": 0.7062,
+      "step": 51
+    },
+    {
+      "epoch": 0.6688102893890675,
+      "grad_norm": 0.20600281655788422,
+      "learning_rate": 0.00015877852522924732,
+      "loss": 0.7074,
+      "step": 52
+    },
+    {
+      "epoch": 0.6816720257234726,
+      "grad_norm": 0.24087969958782196,
+      "learning_rate": 0.0001569484493168452,
+      "loss": 0.8616,
+      "step": 53
+    },
+    {
+      "epoch": 0.6945337620578779,
+      "grad_norm": 0.20934121310710907,
+      "learning_rate": 0.00015508969814521025,
+      "loss": 0.7366,
+      "step": 54
+    },
+    {
+      "epoch": 0.707395498392283,
+      "grad_norm": 0.19671772420406342,
+      "learning_rate": 0.00015320320765153367,
+      "loss": 0.6683,
+      "step": 55
+    },
+    {
+      "epoch": 0.7202572347266881,
+      "grad_norm": 0.2396930456161499,
+      "learning_rate": 0.00015128992774059063,
+      "loss": 0.721,
+      "step": 56
+    },
+    {
+      "epoch": 0.7331189710610932,
+      "grad_norm": 0.23783567547798157,
+      "learning_rate": 0.0001493508218064347,
+      "loss": 0.6809,
+      "step": 57
+    },
+    {
+      "epoch": 0.7459807073954984,
+      "grad_norm": 0.2352839708328247,
+      "learning_rate": 0.00014738686624729986,
+      "loss": 0.8257,
+      "step": 58
+    },
+    {
+      "epoch": 0.7588424437299035,
+      "grad_norm": 0.24446754157543182,
+      "learning_rate": 0.00014539904997395468,
+      "loss": 0.8681,
+      "step": 59
+    },
+    {
+      "epoch": 0.7717041800643086,
+      "grad_norm": 0.21945470571517944,
+      "learning_rate": 0.00014338837391175582,
+      "loss": 0.7524,
+      "step": 60
+    },
+    {
+      "epoch": 0.7717041800643086,
+      "eval_loss": 0.7785296440124512,
+      "eval_runtime": 7.0737,
+      "eval_samples_per_second": 23.185,
+      "eval_steps_per_second": 2.403,
+      "step": 60
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.446291621543936e+16,
   "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null