Training in progress, step 60000, checkpoint

Files changed (4) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93944d3da94e747d1eb6729b5cf252c97afb6c5c71c3bb6fb94728dbb93580ad
 size 409608164

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee12323d3de40cc6fec2e53ef895e1b7ced882c5f1e301da444d2d3a86df81e2
 size 409608164

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae37a541fce5610847044607f6719e0d304a7d22c1474f06c0d42c3e7e858884
 size 814647162

 version https://git-lfs.github.com/spec/v1
+oid sha256:14679abceec2775a9a53dc56e0f88e04155e7d5239080dd3f62cd447309b6fcf
 size 814647162

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0482616294e14da6847ffaa783531ec334633dfd41edcf1d3d71810b0c98d07
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d820f2235ccb34f6ad754b4400efd96293f074f257b91237679dc0697b87a58
 size 14244

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.89110278161373,
   "eval_steps": 20000,
-  "global_step": 40000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -63,6 +63,34 @@
       "learning_rate": 5e-05,
       "loss": 0.0011,
       "step": 40000
     }
   ],
   "logging_steps": 5000,
@@ -82,7 +110,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.4672067032889626e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 11.836654172420596,
   "eval_steps": 20000,
+  "global_step": 60000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5e-05,
       "loss": 0.0011,
       "step": 40000
+    },
+    {
+      "epoch": 8.877490629315448,
+      "grad_norm": 0.022646205499768257,
+      "learning_rate": 5e-05,
+      "loss": 0.0009,
+      "step": 45000
+    },
+    {
+      "epoch": 9.863878477017163,
+      "grad_norm": 0.027662230655550957,
+      "learning_rate": 5e-05,
+      "loss": 0.0008,
+      "step": 50000
+    },
+    {
+      "epoch": 10.850266324718879,
+      "grad_norm": 0.006470137741416693,
+      "learning_rate": 5e-05,
+      "loss": 0.0008,
+      "step": 55000
+    },
+    {
+      "epoch": 11.836654172420596,
+      "grad_norm": 0.043929170817136765,
+      "learning_rate": 5e-05,
+      "loss": 0.0008,
+      "step": 60000
     }
   ],
   "logging_steps": 5000,
       "attributes": {}
     }
   },
+  "total_flos": 5.2004587461159706e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null