Training in progress, step 6400, checkpoint

Files changed (8) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02a1e0d87add09ba651f2a729abafd2a613814e02b8125bafe8d677d51716111
 size 136000488

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba67af77e590add1234ed9e6707c3290aff04c26a766ec648f4300588e0afe16
 size 136000488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:970bd2be1ae81ed39a3bab26aac9fc4a4b90f3a6e038c7195d610bc8193f621e
 size 268176506

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbe63bd17935f5a39a7f184f7d8114197718b8cbb65f4193ecb4011c7d3b4022
 size 268176506

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be98fd7b6c59868051c2a7a496322fe42f0b5f1c7f02ac3df74001bd720639e9
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3da06ea7fd1000116d22b485aedad0cad93b28290b3d5dbb3f366fd88b44390
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c09c62cd10a62d153706d50b58ca84f185d81931e84abc4ef9fdb458816f39a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:49ea0a73b6835eaf0ddaf0b8b30c4d80570d58f4c008d678e3873e2b2b06348e
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0095be6e8cdbda40c1ef0de4eab1d05ca8dcf9005466b61accb040b1d57e5855
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:895fc1f187ec064c8fbc6029dd7b1972f6a6c4742fc4d164dd5488f493b85c4d
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44214bf51755430bb0ad6e043f4416534d7554ae6b99cffa99fb0632139f6298
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d5acff08e0047885af8ddd8c4e0c85f4e5ecff94472442c3435d42f29f06dc6
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f46aec629b817656a56924b587a49fe29c98571de391a7ee56ba1b4b34f3fc5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0738fae84b946e365269b38dfada90de3b34d88297d57fd448fba6217d237f7f
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7565015554428101,
   "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-6300",
-  "epoch": 8.433734939759036,
   "eval_steps": 100,
-  "global_step": 6300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -952,6 +952,21 @@
       "eval_samples_per_second": 896.341,
       "eval_steps_per_second": 3.622,
       "step": 6300
     }
   ],
   "logging_steps": 100,
@@ -966,7 +981,7 @@
         "early_stopping_threshold": 0.0
       },
       "attributes": {
-        "early_stopping_patience_counter": 0
       }
     },
     "TrainerControl": {
@@ -980,7 +995,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.28767305826304e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7565015554428101,
   "best_model_checkpoint": "mgh6/TCS_MLM/checkpoint-6300",
+  "epoch": 8.56760374832664,
   "eval_steps": 100,
+  "global_step": 6400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 896.341,
       "eval_steps_per_second": 3.622,
       "step": 6300
+    },
+    {
+      "epoch": 8.56760374832664,
+      "grad_norm": 0.21538911759853363,
+      "learning_rate": 0.000143239625167336,
+      "loss": 0.6759,
+      "step": 6400
+    },
+    {
+      "epoch": 8.56760374832664,
+      "eval_loss": 0.7605956792831421,
+      "eval_runtime": 6.4074,
+      "eval_samples_per_second": 888.342,
+      "eval_steps_per_second": 3.59,
+      "step": 6400
     }
   ],
   "logging_steps": 100,
         "early_stopping_threshold": 0.0
       },
       "attributes": {
+        "early_stopping_patience_counter": 1
       }
     },
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 2.32398532902912e+17,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null