Training in progress, step 5500, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b86c85d21e02b4fb353faeeb3d660aea6704896451d9709184bff7c07066fc11
 size 2682482800

 version https://git-lfs.github.com/spec/v1
+oid sha256:843ffea3e47027a7327b46056614528e573a8eb208925c13ef01de733d872085
 size 2682482800

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f858c659727537dee70d208e4f7530bb9aa2fd75abd75ce281f7921f65b47398
 size 5365108834

 version https://git-lfs.github.com/spec/v1
+oid sha256:020e3cbb17c3204164f195677f3b07302a30bdd875a5e4274d98f682a414c00e
 size 5365108834

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3840a0d55df82e551ba2df2f51bc3f82c51096b78b70326ed0ced27bbb9ee89
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:9972da412683217d3e7b5c8b7b27bb7cb54e37fcb06d0959653aa9cad5d36fc8
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0bb057eeb9b06f53e574fbe4a832963942b78a235d10b3a25b446042672f8691
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e64edb59ac4e53d4505685902ba836e67456c610161bcc738cae4fc6ba12a85d
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:281bc409228e74a86854821c6d735fbc6b19559ddf2adbb5902115645730087f
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:e05485df9c0772c57db6278171bd1d12be10e5f20dbf942e364c40f5fbd3287d
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d53e5a514b935e5958ac84cfa036d94b5b66d5dc4bb60a3521252e851ea7316c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:bdab421c47fae8409d29d61cb7a02864fe4a42719ec643482d144bf7b2ce3282
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9682ac1ed23374d8c69f5c65f96051c280ca3527cf976ad16813a2bf816501e7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:7c4950c64cff23a8cf10836c8406c5d9f7e6c7ef15fb647d3bd7f359bce3314c
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.3837616072895517,
   "eval_steps": 500,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -707,6 +707,76 @@
       "learning_rate": 6.162112373349709e-05,
       "loss": 0.92,
       "step": 5000
     }
   ],
   "logging_steps": 50,
@@ -726,7 +796,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.569672748773671e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.4221377680185069,
   "eval_steps": 500,
+  "global_step": 5500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 6.162112373349709e-05,
       "loss": 0.92,
       "step": 5000
+    },
+    {
+      "epoch": 0.38759922336244723,
+      "grad_norm": 0.6962669491767883,
+      "learning_rate": 6.123733497083206e-05,
+      "loss": 0.9186,
+      "step": 5050
+    },
+    {
+      "epoch": 0.39143683943534274,
+      "grad_norm": 0.6156628131866455,
+      "learning_rate": 6.085354620816702e-05,
+      "loss": 0.9139,
+      "step": 5100
+    },
+    {
+      "epoch": 0.3952744555082383,
+      "grad_norm": 0.4484277069568634,
+      "learning_rate": 6.0469757445502e-05,
+      "loss": 0.914,
+      "step": 5150
+    },
+    {
+      "epoch": 0.3991120715811338,
+      "grad_norm": 0.6082286834716797,
+      "learning_rate": 6.0085968682836965e-05,
+      "loss": 0.9148,
+      "step": 5200
+    },
+    {
+      "epoch": 0.40294968765402933,
+      "grad_norm": 0.6756613850593567,
+      "learning_rate": 5.970217992017194e-05,
+      "loss": 0.9137,
+      "step": 5250
+    },
+    {
+      "epoch": 0.40678730372692484,
+      "grad_norm": 0.6353741884231567,
+      "learning_rate": 5.9318391157506915e-05,
+      "loss": 0.9094,
+      "step": 5300
+    },
+    {
+      "epoch": 0.41062491979982035,
+      "grad_norm": 0.6543828845024109,
+      "learning_rate": 5.893460239484189e-05,
+      "loss": 0.9089,
+      "step": 5350
+    },
+    {
+      "epoch": 0.41446253587271586,
+      "grad_norm": 0.6633620262145996,
+      "learning_rate": 5.855081363217685e-05,
+      "loss": 0.9105,
+      "step": 5400
+    },
+    {
+      "epoch": 0.4183001519456114,
+      "grad_norm": 0.6769128441810608,
+      "learning_rate": 5.816702486951182e-05,
+      "loss": 0.9095,
+      "step": 5450
+    },
+    {
+      "epoch": 0.4221377680185069,
+      "grad_norm": 0.6803929209709167,
+      "learning_rate": 5.7783236106846794e-05,
+      "loss": 0.9085,
+      "step": 5500
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 9.426997587409371e+18,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null