Training in progress, step 200, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8417a0b73d653667ba53d65fa3aeb3fa79e457611efdd000b881b15f98418ab3
 size 136062744

 version https://git-lfs.github.com/spec/v1
+oid sha256:c79b3d2dfa1e2ff08e34bee0a50e5d26df97d53fd00ae51087689ebaf5027fe9
 size 136062744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f508bc8ba1c9ec4a811bb445adae8ac92d7ef48b6b5ff9f8faae4b6d0c26b855
 size 272133748

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f5a1aea2e7e54386cf3ce389cec6fd8823514c6ea8045ebccf97c746d743cca
 size 272133748

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e99e6fcba4d06b6db192e436ce6ba6ad3be6c41f7c1a17df75645e63e4a1ba26
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2976a2f475d8edc9e9a00ed903ec6fa861e056646565524847948c22626d681
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:376dd9eb28d9b4207f6c2661925fdbdac8371f1ffb311a713e83c6cfb1f41b62
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4a98cf07637306947ea7d3f67892a3b98b5c22007d4395c1de2047ef45cd95c
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad6cdb93e11706f0da92f1b4c1b5338e31199ca3d605048fd4a572a58a63d62c
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e06aedc7584f87414cddb2adf9cb46d6573a485741ffcbccef2e7d45ace8f8f8
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b72936ee699998723e9d7cd494ac9340b1dcad53e7524e76c89067d7a2b346a
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:af7c8c884b8c371ae21c399e557a74788ef7204c82d4d61283bc7a25749fb4a5
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04fc0d5434f3f92369635a715c45371e6d42eea63346cccbfb195349b83867de
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf3bb25966486ea21e5c0eda07f93bcccad75e0a9e396cccc2d2b31e52284d21
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:214b0d7b01f4d4cd88728cb6763768dfad4c00b7c1aaafbceaae054977b8354f
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ec6fb749e5390815130c1196c45f456b136c4b36339acd1006b814b606a29cec
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e801bb4e9c6d2b0c80bda30e2a0b720124fbd23c8855f40e134326ac1bc12d0f
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6724cc3b6fa30b54ecf6969ff17f0fa6b8805e0ec29471ae97fdc0007eca256
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c9810ee90fe6a0cfadc81241baad7797b3acf7d2a9c97e0dd1ae291c481a3bb
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e5db64dfbdb44b1f0e091abfb9b970cb0dc413d3122f9ce2b84c891c3041b677
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a25f9d7c5f42f735c079628b97ba8fb26659e157055967a2c398402818d74e5
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e961aafc4bb3a24acc13a66c3eb856682eb2bc992742878e58df3da341f94ce
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.018957345971564,
   "eval_steps": 200,
-  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -127,6 +127,42 @@
       "learning_rate": 0.0001993943172293368,
       "loss": 2.5101,
       "step": 160
     }
   ],
   "logging_steps": 10,
@@ -146,7 +182,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1768697873956864e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.7772511848341233,
   "eval_steps": 200,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001993943172293368,
       "loss": 2.5101,
       "step": 160
+    },
+    {
+      "epoch": 3.2085308056872037,
+      "grad_norm": 2.453125,
+      "learning_rate": 0.00019924361097105623,
+      "loss": 2.1293,
+      "step": 170
+    },
+    {
+      "epoch": 3.3981042654028437,
+      "grad_norm": 1.765625,
+      "learning_rate": 0.0001990762486348855,
+      "loss": 2.2484,
+      "step": 180
+    },
+    {
+      "epoch": 3.5876777251184833,
+      "grad_norm": 4.5,
+      "learning_rate": 0.00019889225830928365,
+      "loss": 2.0503,
+      "step": 190
+    },
+    {
+      "epoch": 3.7772511848341233,
+      "grad_norm": 1.7734375,
+      "learning_rate": 0.00019869167087338907,
+      "loss": 2.2932,
+      "step": 200
+    },
+    {
+      "epoch": 3.7772511848341233,
+      "eval_loss": 2.8660690784454346,
+      "eval_runtime": 1.0321,
+      "eval_samples_per_second": 1454.261,
+      "eval_steps_per_second": 182.146,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.7236502626893824e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null