Training in progress, step 140, checkpoint

Files changed (6) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84bda531b872d67221b98bd6c658143e5d6cdef1cc3945a95e53e5b9f7dcd800
 size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:5eccc29f20136e494c181b5aa29e8b7587894903fe73530f9417233b28db3cd6
 size 250490408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65e9c5ada665a48d98e97c1e7fe956283af0da1e8c0bcc8349010ef07ff0fcc8
 size 255265850

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb2b6093a475d42265f69c469897bf59a1052f8a2162fe277c84c75e1c2d7121
 size 255265850

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ab5f379178a16fe50394e3a25fed2ec2104fb031e0d65d050929ea3900b9b8a
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d3162f96d93c4de8f0b55d1e1a81f49c8184b1e035e8b2823a0289ac758bbc4
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f18d6d5030bddd3e14856faaed8b883ccb49a599678d6e4aaaec0ea4cbbea9e4
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:3ec93be897e5dfa0ec937b22566356bc36cb89c5fcbcabc5d6f7aa190d5e8539
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b34b977f0b4e5c0c89d8dd446f627fbac9a5e7e945e487b3b1a5a2235e2ee07
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:648d534e3bfa3788d45747301f1f48cbd9086fc244a9af1c996a30c275d122dc
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 10.0,
   "eval_steps": 40,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -123,6 +123,20 @@
       "eval_samples_per_second": 143.191,
       "eval_steps_per_second": 2.29,
       "step": 120
     }
   ],
   "logging_steps": 10,
@@ -142,7 +156,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7772915311312896.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 11.666666666666666,
   "eval_steps": 40,
+  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 143.191,
       "eval_steps_per_second": 2.29,
       "step": 120
+    },
+    {
+      "epoch": 10.833333333333334,
+      "grad_norm": 3.4375,
+      "learning_rate": 0.00018519194088383273,
+      "loss": 3.913,
+      "step": 130
+    },
+    {
+      "epoch": 11.666666666666666,
+      "grad_norm": 3.859375,
+      "learning_rate": 0.0001821777815225245,
+      "loss": 3.8009,
+      "step": 140
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 9085646496858112.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null