Training in progress, step 800, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e9c89c5afa1545ed2abce6fb61b801899049ea15f9cad45a8a50d3ec0f9d3c1
 size 3826461296

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9a757842f36fd94aead624d11ae735bab0021a2cf1d22b12f1d19d8eb3745df
 size 3826461296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b0b1b8a92e051537f0d9657a16e5d51d9f5dddc753f3ecef5357cb38fad4fca
 size 2479955235

 version https://git-lfs.github.com/spec/v1
+oid sha256:864f28f2db139c235def5468478555614e1208e6c7e1636a6be1a9a8a84d2903
 size 2479955235

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d166d430557d2569c721c8dec1c8ddf3bfe3fec272b03dceb3e3268be418ae2c
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee1ff4b6d230d52fbd75a1fdfc717e2baaa7034a01541c1dee54a5bf5dd662d6
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55fdec1914c1bee37a4826392246117bd3062dd019b2d4c1b1f435e39b62b9ce
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:87f9876af7981b4f995b217441438c53a026fb406c344a1e30a18ad2545bd292
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.28,
   "eval_steps": 500,
-  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -498,6 +498,76 @@
       "learning_rate": 3.6254019292604503e-05,
       "loss": 0.7662,
       "step": 700
     }
   ],
   "logging_steps": 10,
@@ -517,7 +587,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2633123965792256e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.32,
   "eval_steps": 500,
+  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.6254019292604503e-05,
       "loss": 0.7662,
       "step": 700
+    },
+    {
+      "epoch": 0.284,
+      "grad_norm": 25.36922264099121,
+      "learning_rate": 3.605305466237942e-05,
+      "loss": 0.7945,
+      "step": 710
+    },
+    {
+      "epoch": 0.288,
+      "grad_norm": 24.251853942871094,
+      "learning_rate": 3.585209003215435e-05,
+      "loss": 0.9693,
+      "step": 720
+    },
+    {
+      "epoch": 0.292,
+      "grad_norm": 15.235057830810547,
+      "learning_rate": 3.5651125401929266e-05,
+      "loss": 0.8969,
+      "step": 730
+    },
+    {
+      "epoch": 0.296,
+      "grad_norm": 14.464040756225586,
+      "learning_rate": 3.5450160771704185e-05,
+      "loss": 0.7205,
+      "step": 740
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 23.044424057006836,
+      "learning_rate": 3.5249196141479104e-05,
+      "loss": 0.839,
+      "step": 750
+    },
+    {
+      "epoch": 0.304,
+      "grad_norm": 25.620925903320312,
+      "learning_rate": 3.504823151125402e-05,
+      "loss": 0.8887,
+      "step": 760
+    },
+    {
+      "epoch": 0.308,
+      "grad_norm": 10.347396850585938,
+      "learning_rate": 3.484726688102894e-05,
+      "loss": 0.645,
+      "step": 770
+    },
+    {
+      "epoch": 0.312,
+      "grad_norm": 19.114471435546875,
+      "learning_rate": 3.464630225080386e-05,
+      "loss": 0.8341,
+      "step": 780
+    },
+    {
+      "epoch": 0.316,
+      "grad_norm": 17.528043746948242,
+      "learning_rate": 3.4445337620578785e-05,
+      "loss": 0.7108,
+      "step": 790
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 13.186959266662598,
+      "learning_rate": 3.4244372990353704e-05,
+      "loss": 0.7991,
+      "step": 800
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.442001474164736e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null