Training in progress, step 1400, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +59 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d838a5ea1d7a4dc8078b8d4b929ed97c3470e3371a8bb2486e51d5dfd7c4fd84
 size 540001920

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb2357db4d5dbc8b50b73aee33258be5078a83ac5942fbb18c3611064b0ba01e
 size 540001920

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7fff9e74897ca1492335c9d8fe5b040902cbb283142b17c7520e36732329301
 size 1080097722

 version https://git-lfs.github.com/spec/v1
+oid sha256:799e6d327ebe8e5729bbf02861a8f0503d38df901843c264b5fa151bb9f8f1f0
 size 1080097722

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bed33057040b93d153a6ff39538a6442d5d06dbf384014b88b5824efe94b6e35
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:56c984d039682da209f1f0b16ab958ba4afe2a245e0b9d729f68d8024510ead0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e9157fdd19d09ebce63a8190c2be1a0db31dfdda72875e01c571181cfafcf28
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:a18a679691bf356f5e92141e3d0862158121dd0dcde0cf358005325db8d44228
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.0140105078809105,
   "eval_steps": 500,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -147,6 +147,62 @@
       "learning_rate": 6.747099073871009e-06,
       "loss": 0.3459,
       "step": 1000
     }
   ],
   "logging_steps": 50,
@@ -175,7 +231,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6260818378752000.0,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.812609457092819,
   "eval_steps": 500,
+  "global_step": 1400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.747099073871009e-06,
       "loss": 0.3459,
       "step": 1000
+    },
+    {
+      "epoch": 7.364273204903678,
+      "grad_norm": 3.011735200881958,
+      "learning_rate": 1.8115901680389373e-06,
+      "loss": 0.2136,
+      "step": 1050
+    },
+    {
+      "epoch": 7.714535901926444,
+      "grad_norm": 1.369733452796936,
+      "learning_rate": 1.8128355854463153e-10,
+      "loss": 0.2193,
+      "step": 1100
+    },
+    {
+      "epoch": 8.063047285464098,
+      "grad_norm": 2.204275369644165,
+      "learning_rate": 1.7419630088165716e-06,
+      "loss": 0.2068,
+      "step": 1150
+    },
+    {
+      "epoch": 8.413309982486865,
+      "grad_norm": 3.1148300170898438,
+      "learning_rate": 6.624338192647677e-06,
+      "loss": 0.2043,
+      "step": 1200
+    },
+    {
+      "epoch": 8.763572679509632,
+      "grad_norm": 2.349841356277466,
+      "learning_rate": 1.3490758880415994e-05,
+      "loss": 0.1968,
+      "step": 1250
+    },
+    {
+      "epoch": 9.112084063047286,
+      "grad_norm": 2.6443629264831543,
+      "learning_rate": 2.07146919805216e-05,
+      "loss": 0.1814,
+      "step": 1300
+    },
+    {
+      "epoch": 9.462346760070053,
+      "grad_norm": 2.4111177921295166,
+      "learning_rate": 2.65849160614322e-05,
+      "loss": 0.1568,
+      "step": 1350
+    },
+    {
+      "epoch": 9.812609457092819,
+      "grad_norm": 2.6896817684173584,
+      "learning_rate": 2.9710879312112288e-05,
+      "loss": 0.184,
+      "step": 1400
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 8764518629376000.0,
   "train_batch_size": 6,
   "trial_name": null,
   "trial_params": null