Training in progress, step 10180, checkpoint

Files changed (5) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d87304ba9339f77e6175e6965b703a52428af51cedafc31a0bbfddf79d7965a6
 size 990185320

 version https://git-lfs.github.com/spec/v1
+oid sha256:f54d77e7daba182457f5b934b683bb0c366abc8b22b61e6bcd269621abc92e9f
 size 990185320

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:12036c4d1c532ea86bb8a9c360dfbe0b8fe32c9677625d889c487420ab810ff9
 size 1980545291

 version https://git-lfs.github.com/spec/v1
+oid sha256:135ed32c18796919d025ee0d031964f792393b973ea4bbaf40df292142688daf
 size 1980545291

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97f497ef69b8dbe5c6bba152f4cd98b501ac4e5a09248cff1e802c21fa6c2d4f
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:8552f27df7abe09a972ce07245d6ac928db4275e959c227e9dc77f79689c125d
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a94f04e86ab58289e37d74269e24f1195144c7ee097f576e9404678cea6926f7
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:94d07f0b1a695f119065fa87f9ec7a10c06b521dc7b9d758333f96e37c1da2d9
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.911591355599214,
   "eval_steps": 400,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -958,6 +958,13 @@
       "eval_samples_per_second": 9.254,
       "eval_steps_per_second": 1.157,
       "step": 10000
     }
   ],
   "logging_steps": 100,
@@ -972,12 +979,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.738510164603699e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 400,
+  "global_step": 10180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.254,
       "eval_steps_per_second": 1.157,
       "step": 10000
+    },
+    {
+      "epoch": 4.960707269155206,
+      "grad_norm": 4.8344407081604,
+      "learning_rate": 2.784872298624754e-07,
+      "loss": 1.0911,
+      "step": 10100
     }
   ],
   "logging_steps": 100,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 2.787739807776768e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null