Training in progress, step 60000, checkpoint

Files changed (4) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c416859fe44e8b6d5cb48a9782e6f50fd51541dcab765af23144123ba2fe7971
 size 409608164

 version https://git-lfs.github.com/spec/v1
+oid sha256:d853c6f05a319f25e01c962a1fb3ee48c2db25e7027aae9ea5ba74c82184f6e4
 size 409608164

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61c98bad3ea164698c925b2c088cccf6c87624823b19a3514843aafecdd26735
 size 814647162

 version https://git-lfs.github.com/spec/v1
+oid sha256:83553605e7ddd6468c2b9dc770ee7a1e541ce642115183b055361a355442fe7d
 size 814647162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2128f689a6302c47cffe9b18c691d685bfb7c7c4df1c1367d77ca12e87395223
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea587fc8ce98d035036ade6e290d9e6323d34aeaeb53c96eb93f1b8fb08d759f
 size 14244

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.260389268805891,
   "eval_steps": 20000,
-  "global_step": 40000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -63,6 +63,34 @@
       "learning_rate": 5e-05,
       "loss": 0.0011,
       "step": 40000
     }
   ],
   "logging_steps": 5000,
@@ -82,7 +110,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.477887493977829e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 7.890583903208837,
   "eval_steps": 20000,
+  "global_step": 60000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5e-05,
       "loss": 0.0011,
       "step": 40000
+    },
+    {
+      "epoch": 5.917937927406628,
+      "grad_norm": 0.03355633094906807,
+      "learning_rate": 5e-05,
+      "loss": 0.001,
+      "step": 45000
+    },
+    {
+      "epoch": 6.575486586007365,
+      "grad_norm": 0.029502825811505318,
+      "learning_rate": 5e-05,
+      "loss": 0.0009,
+      "step": 50000
+    },
+    {
+      "epoch": 7.233035244608101,
+      "grad_norm": 0.03313542529940605,
+      "learning_rate": 5e-05,
+      "loss": 0.0008,
+      "step": 55000
+    },
+    {
+      "epoch": 7.890583903208837,
+      "grad_norm": 0.020021334290504456,
+      "learning_rate": 5e-05,
+      "loss": 0.0008,
+      "step": 60000
     }
   ],
   "logging_steps": 5000,
       "attributes": {}
     }
   },
+  "total_flos": 5.2168028971708224e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null