Training in progress, step 6000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:843ffea3e47027a7327b46056614528e573a8eb208925c13ef01de733d872085
 size 2682482800

 version https://git-lfs.github.com/spec/v1
+oid sha256:9eae45eb43651c4ce612c5b264270a3ccdfbc48e1be2784320e0059c614c3cab
 size 2682482800

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:020e3cbb17c3204164f195677f3b07302a30bdd875a5e4274d98f682a414c00e
 size 5365108834

 version https://git-lfs.github.com/spec/v1
+oid sha256:36970303513d3e205403c36051106bf22e33ef86f3a1e71a2f1e2cba961b8110
 size 5365108834

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9972da412683217d3e7b5c8b7b27bb7cb54e37fcb06d0959653aa9cad5d36fc8
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f4312d4eb4a3834512b8e6a5f558f7335f936ed9768ab54b18216e62eb5a7d3
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e64edb59ac4e53d4505685902ba836e67456c610161bcc738cae4fc6ba12a85d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:13ae4134e19f55d5a540bad8977ebfa7de23a5f70c51215224d0742bb2666b1a
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e05485df9c0772c57db6278171bd1d12be10e5f20dbf942e364c40f5fbd3287d
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:7e287e6f80aed910a1d4cb01fb428361df3b7e62045921fccfd519aab7f20c2e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdab421c47fae8409d29d61cb7a02864fe4a42719ec643482d144bf7b2ce3282
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:006e670f373067b7e226643b8cade6148c320aff0b769e7d1532179c7f45b76a
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c4950c64cff23a8cf10836c8406c5d9f7e6c7ef15fb647d3bd7f359bce3314c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:214df0e2d0c96471516754f237b8e237791d4cac9a44207b49ae1586ecbb810a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.4221377680185069,
   "eval_steps": 500,
-  "global_step": 5500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -777,6 +777,76 @@
       "learning_rate": 5.7783236106846794e-05,
       "loss": 0.9085,
       "step": 5500
     }
   ],
   "logging_steps": 50,
@@ -796,7 +866,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9.426997587409371e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.46051392874746205,
   "eval_steps": 500,
+  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 5.7783236106846794e-05,
       "loss": 0.9085,
       "step": 5500
+    },
+    {
+      "epoch": 0.4259753840914024,
+      "grad_norm": 0.4861834645271301,
+      "learning_rate": 5.739944734418177e-05,
+      "loss": 0.9067,
+      "step": 5550
+    },
+    {
+      "epoch": 0.42981300016429796,
+      "grad_norm": 0.24226143956184387,
+      "learning_rate": 5.7015658581516737e-05,
+      "loss": 0.9066,
+      "step": 5600
+    },
+    {
+      "epoch": 0.4336506162371935,
+      "grad_norm": 0.2108086198568344,
+      "learning_rate": 5.6631869818851705e-05,
+      "loss": 0.9061,
+      "step": 5650
+    },
+    {
+      "epoch": 0.437488232310089,
+      "grad_norm": 0.7616965770721436,
+      "learning_rate": 5.624808105618667e-05,
+      "loss": 0.9041,
+      "step": 5700
+    },
+    {
+      "epoch": 0.4413258483829845,
+      "grad_norm": 0.3760414719581604,
+      "learning_rate": 5.586429229352165e-05,
+      "loss": 0.9035,
+      "step": 5750
+    },
+    {
+      "epoch": 0.44516346445588,
+      "grad_norm": 0.4564415216445923,
+      "learning_rate": 5.548050353085662e-05,
+      "loss": 0.902,
+      "step": 5800
+    },
+    {
+      "epoch": 0.4490010805287755,
+      "grad_norm": 0.803648054599762,
+      "learning_rate": 5.509671476819159e-05,
+      "loss": 0.9011,
+      "step": 5850
+    },
+    {
+      "epoch": 0.45283869660167103,
+      "grad_norm": 0.7869254350662231,
+      "learning_rate": 5.4712926005526565e-05,
+      "loss": 0.9007,
+      "step": 5900
+    },
+    {
+      "epoch": 0.45667631267456654,
+      "grad_norm": 0.8484482765197754,
+      "learning_rate": 5.4329137242861526e-05,
+      "loss": 0.902,
+      "step": 5950
+    },
+    {
+      "epoch": 0.46051392874746205,
+      "grad_norm": 0.4946975111961365,
+      "learning_rate": 5.39453484801965e-05,
+      "loss": 0.8968,
+      "step": 6000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 1.0284708509245243e+19,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null