Training in progress, step 240, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c79b3d2dfa1e2ff08e34bee0a50e5d26df97d53fd00ae51087689ebaf5027fe9
 size 136062744

 version https://git-lfs.github.com/spec/v1
+oid sha256:57aa943ba31b7fd0f2d2258b3638435908884af1e83b7e3da9763ba67b95fa40
 size 136062744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f5a1aea2e7e54386cf3ce389cec6fd8823514c6ea8045ebccf97c746d743cca
 size 272133748

 version https://git-lfs.github.com/spec/v1
+oid sha256:78f609766fd9499e6af357bcd74eef24836222dee0149f07897fd8895e50aade
 size 272133748

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2976a2f475d8edc9e9a00ed903ec6fa861e056646565524847948c22626d681
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:93e80a2275824ab49f6bc0b217bb315cd0a85d3c25b43a245828495794a78d4d
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4a98cf07637306947ea7d3f67892a3b98b5c22007d4395c1de2047ef45cd95c
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:381eb7d8287a93e17a40cc15be93d534da9dbf37378fcc74868d5615daf19b34
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e06aedc7584f87414cddb2adf9cb46d6573a485741ffcbccef2e7d45ace8f8f8
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d4d987ee650d278db90b1b49f5d5e57d81bba91b4e110659d4027a225f63078
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af7c8c884b8c371ae21c399e557a74788ef7204c82d4d61283bc7a25749fb4a5
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bfd04834fa55090f0aa6f19062eb69d5e7e7d567f3b51b2a09c93679da782f7
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf3bb25966486ea21e5c0eda07f93bcccad75e0a9e396cccc2d2b31e52284d21
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e15a8fd81fd90d6fe35aa6feb35c5e13dd4fe18af2950ff7fcf4c6b68016d32
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec6fb749e5390815130c1196c45f456b136c4b36339acd1006b814b606a29cec
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a4d603165c2d1acc537a09a3e1f8f3831fbd36a555d1b4282034bf9a666af8e7
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a6724cc3b6fa30b54ecf6969ff17f0fa6b8805e0ec29471ae97fdc0007eca256
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3b4cdeeb7d7c2d37111aeb034296baee0b0b647a48bc49f1ac03a01bf25b677
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e5db64dfbdb44b1f0e091abfb9b970cb0dc413d3122f9ce2b84c891c3041b677
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:887f0c6920abc07c1199dc922f55301b3e567adfbbf72707fdf5afb2c202b331
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e961aafc4bb3a24acc13a66c3eb856682eb2bc992742878e58df3da341f94ce
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:941d9e9f4cfb6894bf574771af69c852f299b452dcf03e677dae3dadf692a003
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.7772511848341233,
   "eval_steps": 200,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -163,6 +163,34 @@
       "eval_samples_per_second": 1454.261,
       "eval_steps_per_second": 182.146,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -182,7 +210,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.7236502626893824e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 4.530805687203792,
   "eval_steps": 200,
+  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1454.261,
       "eval_steps_per_second": 182.146,
       "step": 200
+    },
+    {
+      "epoch": 3.966824644549763,
+      "grad_norm": 1.78125,
+      "learning_rate": 0.00019847451999183694,
+      "loss": 2.1714,
+      "step": 210
+    },
+    {
+      "epoch": 4.151658767772512,
+      "grad_norm": 3.453125,
+      "learning_rate": 0.00019824084210910925,
+      "loss": 2.0489,
+      "step": 220
+    },
+    {
+      "epoch": 4.341232227488152,
+      "grad_norm": 2.296875,
+      "learning_rate": 0.00019799067644341844,
+      "loss": 1.7888,
+      "step": 230
+    },
+    {
+      "epoch": 4.530805687203792,
+      "grad_norm": 1.9375,
+      "learning_rate": 0.0001977240649801253,
+      "loss": 2.055,
+      "step": 240
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.267013366723379e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null