Training in progress, step 160, checkpoint

Files changed (12) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da375f86481b03a9d2fbe4c4764b5a6541f3519ef4ed2d8e159ad8f64d26671f
 size 136062744

 version https://git-lfs.github.com/spec/v1
+oid sha256:8417a0b73d653667ba53d65fa3aeb3fa79e457611efdd000b881b15f98418ab3
 size 136062744

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7e86ccac5cb882c2d06d8bad4ad5ff69d8f6a59845e1f47ae216a61b5c83670
 size 272133748

 version https://git-lfs.github.com/spec/v1
+oid sha256:f508bc8ba1c9ec4a811bb445adae8ac92d7ef48b6b5ff9f8faae4b6d0c26b855
 size 272133748

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d9d284fcf8929ff68940eadb1f206d8eba49f8c693987854749b46be83e55db
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e99e6fcba4d06b6db192e436ce6ba6ad3be6c41f7c1a17df75645e63e4a1ba26
 size 15984

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:801bc45967e225400ea6d6218cefea8ca7fb06e07fc6307ecd6a44809f1bd798
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:376dd9eb28d9b4207f6c2661925fdbdac8371f1ffb311a713e83c6cfb1f41b62
 size 15984

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5cf84b6a4de40fc270b354ba61cbdcfe219381ff9b92d4bf07039673a43eb930
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:ad6cdb93e11706f0da92f1b4c1b5338e31199ca3d605048fd4a572a58a63d62c
 size 15984

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6c8ec6d9dca3ddeedb012f1d2267ce340264c6bc66a00c098755d036d5dcea2
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b72936ee699998723e9d7cd494ac9340b1dcad53e7524e76c89067d7a2b346a
 size 15984

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4dfac3378cdcc5c67a5e5136020cc0ff3471b8e102c11164b6757ee8ddd6bcdb
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:04fc0d5434f3f92369635a715c45371e6d42eea63346cccbfb195349b83867de
 size 15984

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0e8f97846130fafbc6fbe7c6af5edfb3d752af8f4d68cdc20aa3841316baa2ca
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:214b0d7b01f4d4cd88728cb6763768dfad4c00b7c1aaafbceaae054977b8354f
 size 15984

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6c1d3a3fce8cc9bdae89ac70c19090a517698796481d453ce3dff4933fb62c3
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:e801bb4e9c6d2b0c80bda30e2a0b720124fbd23c8855f40e134326ac1bc12d0f
 size 15984

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61e46970ea72272e9b4db66aca39fd744398034c1875c92ca2f8109b6c5acb6c
 size 15984

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c9810ee90fe6a0cfadc81241baad7797b3acf7d2a9c97e0dd1ae291c481a3bb
 size 15984

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a028d0589cc0ff9afb1e426f9cdea54d0a7cf11c78f68bb990d9bfe61fedfe95
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a25f9d7c5f42f735c079628b97ba8fb26659e157055967a2c398402818d74e5
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.265402843601896,
   "eval_steps": 200,
-  "global_step": 120,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -99,6 +99,34 @@
       "learning_rate": 0.00019983011763899673,
       "loss": 2.463,
       "step": 120
     }
   ],
   "logging_steps": 10,
@@ -118,7 +146,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.6335066833616896e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 3.018957345971564,
   "eval_steps": 200,
+  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019983011763899673,
       "loss": 2.463,
       "step": 120
+    },
+    {
+      "epoch": 2.4549763033175354,
+      "grad_norm": 3.953125,
+      "learning_rate": 0.00019974626060814647,
+      "loss": 2.4124,
+      "step": 130
+    },
+    {
+      "epoch": 2.6445497630331753,
+      "grad_norm": 1.484375,
+      "learning_rate": 0.00019964566313960264,
+      "loss": 2.4985,
+      "step": 140
+    },
+    {
+      "epoch": 2.834123222748815,
+      "grad_norm": 1.75,
+      "learning_rate": 0.0001995283421166614,
+      "loss": 2.4199,
+      "step": 150
+    },
+    {
+      "epoch": 3.018957345971564,
+      "grad_norm": 1.21875,
+      "learning_rate": 0.0001993943172293368,
+      "loss": 2.5101,
+      "step": 160
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.1768697873956864e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null