Training in progress, step 150, checkpoint

Files changed (9) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c503048216b577a639c3e7199d6c8e0dc5320af80cf66353a78052840963a44
 size 83946192

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ef31bd5a132dc63059267ab5a04a10fe9f1676b33345214450050e8f34f3ec8
 size 83946192

last-checkpoint/global_step150/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1893317a4c5a150ef714767ec2bb7a323f7b7ce3e4e52a45098e4fbd40d3ff7
+size 251686096

last-checkpoint/global_step150/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e84ad5ab5d56709c933eb3e76fda7609114ea7257d8c79662462699ccc15306
+size 251686224

last-checkpoint/global_step150/mp_rank_00_model_states.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c43ceb50d2abb1069848965147a144b507ea964f7f1eecee6eb98362e89af5ed
+size 84231276

last-checkpoint/latest CHANGED Viewed

	@@ -1 +1 @@
1	- ~~global_step140~~


1	+ global_step150

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:26e86d514dddce0b14f45dd634a478c13db09aafb21ec02229544713a63bc1e7
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:f34572b4ea0c45cec898917df37b81bb05851f4de5cc1bcbafb774bab1bd5668
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2e5617e3007c4a85897d5bc30efdcdd93ae16461f578795dfc04877a4238ff6
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bad1eb070839a232fe7250717e387d5d3120d7edacea87879c84fecc74267f8
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d9ec9defbf9d184fe9bf33fb5021004937dc6216d5af83e76aa562e2a036d91
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0edab479cf5df2fd0e0eb08833b9040a0342b7b3b1ce5f746c88e4c78156c68
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.2121437788009644,
   "best_model_checkpoint": "miner_id_24/checkpoint-140",
-  "epoch": 0.021387923461788184,
   "eval_steps": 10,
-  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -323,6 +323,28 @@
       "eval_samples_per_second": 7.595,
       "eval_steps_per_second": 1.9,
       "step": 140
     }
   ],
   "logging_steps": 5,
@@ -337,7 +359,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -346,12 +368,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 8.309137876524728e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.2121437788009644,
   "best_model_checkpoint": "miner_id_24/checkpoint-140",
+  "epoch": 0.02291563228048734,
   "eval_steps": 10,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 7.595,
       "eval_steps_per_second": 1.9,
       "step": 140
+    },
+    {
+      "epoch": 0.02215177787113776,
+      "grad_norm": 0.24214355647563934,
+      "learning_rate": 3.6455629509730136e-07,
+      "loss": 1.3213,
+      "step": 145
+    },
+    {
+      "epoch": 0.02291563228048734,
+      "grad_norm": 0.40569353103637695,
+      "learning_rate": 0.0,
+      "loss": 1.0569,
+      "step": 150
+    },
+    {
+      "epoch": 0.02291563228048734,
+      "eval_loss": 1.2136216163635254,
+      "eval_runtime": 722.8743,
+      "eval_samples_per_second": 7.626,
+      "eval_steps_per_second": 1.908,
+      "step": 150
     }
   ],
   "logging_steps": 5,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 8.902647724847923e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null