Training in progress, step 29815, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +67 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2808ef6d7c1ec76f205bfc3bf60e8896b8d012108d053710f2b9d7309d7f42b2
 size 959732416

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c07327c7b7da4c43a47ffb81f6c4f6a4fec25e1261fa34365dcc766509128f4
 size 959732416

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93f4aafd5e55920f58c0febce193ec74760cdf950639df7ad73eefccd9da8ec1
 size 1915006400

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1433b898173b1a5c7863cdff93e63e7206b206d609822044b1e2437bfafc192
 size 1915006400

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe1ca75804c729ecd6274811b801cee592417281624e7bdb93722530ee68ca62
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbbe6a48b674827d84b4f767001ab0185f81594882203e7c8ba1b5ab2f80c9e8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:348961f7e743fe4e2fc3f96e9872ae30fee7c5dae2b7050fcbf673c342e559b9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:ccc5794f619fdf6bef21052c83e93ac1b40acd82a7f910575e12c05843e440f8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.192520543350662,
   "eval_steps": 500,
-  "global_step": 25000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -409,6 +409,69 @@
       "learning_rate": 8.074794566493377e-06,
       "loss": 0.0309,
       "step": 25000
     }
   ],
   "logging_steps": 500,
@@ -423,12 +486,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 9.02247270044192e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 29815,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 8.074794566493377e-06,
       "loss": 0.0309,
       "step": 25000
+    },
+    {
+      "epoch": 4.2763709542176755,
+      "grad_norm": 0.37754014134407043,
+      "learning_rate": 7.236290457823244e-06,
+      "loss": 0.0312,
+      "step": 25500
+    },
+    {
+      "epoch": 4.360221365084689,
+      "grad_norm": 0.23429933190345764,
+      "learning_rate": 6.397786349153111e-06,
+      "loss": 0.0335,
+      "step": 26000
+    },
+    {
+      "epoch": 4.444071775951702,
+      "grad_norm": 0.25711116194725037,
+      "learning_rate": 5.5592822404829784e-06,
+      "loss": 0.03,
+      "step": 26500
+    },
+    {
+      "epoch": 4.527922186818715,
+      "grad_norm": 0.5407963395118713,
+      "learning_rate": 4.720778131812846e-06,
+      "loss": 0.0325,
+      "step": 27000
+    },
+    {
+      "epoch": 4.611772597685729,
+      "grad_norm": 0.3212348520755768,
+      "learning_rate": 3.882274023142714e-06,
+      "loss": 0.0321,
+      "step": 27500
+    },
+    {
+      "epoch": 4.695623008552742,
+      "grad_norm": 0.34211465716362,
+      "learning_rate": 3.043769914472581e-06,
+      "loss": 0.0309,
+      "step": 28000
+    },
+    {
+      "epoch": 4.779473419419755,
+      "grad_norm": 0.19717147946357727,
+      "learning_rate": 2.2052658058024483e-06,
+      "loss": 0.0306,
+      "step": 28500
+    },
+    {
+      "epoch": 4.8633238302867685,
+      "grad_norm": 0.4883914589881897,
+      "learning_rate": 1.366761697132316e-06,
+      "loss": 0.0322,
+      "step": 29000
+    },
+    {
+      "epoch": 4.947174241153782,
+      "grad_norm": 0.45241132378578186,
+      "learning_rate": 5.282575884621835e-07,
+      "loss": 0.0319,
+      "step": 29500
     }
   ],
   "logging_steps": 500,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0760159508037632e+19,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null