Training in progress, step 880, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +60 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ebe70a745247409cae6b79f3c1184ce97abbd4bac127f999909b5b73251d4c8
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:7ca0f4e76acdd877e061b54b094915f061d5aef2a48a215afcdffebf99684346
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17ac518a3753be37e8a64fd1d9865d36bd71db712f97713b82aeaa261792226a
 size 148053627

 version https://git-lfs.github.com/spec/v1
+oid sha256:e456b05a1308ce2179ec568426c87fba32d61b44110a58f117291568af87b5e3
 size 148053627

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7895d483b77b3cb6aee62b3bc42d0d3e4277b9cef4ded3fbeb54271ff22e18ca
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:b51625db2aad97eb7aa57bda210faaf43e85a6afe5e9f14d65cd48ce0ab9a8f6
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37fe9d97d6d21af00d1c8335a3abe36a9edcee7b7c01e459eb702311857d4d02
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:df5db66705b412cd18d5ad4eb6f72a79adbb3f849df7f1405564a81fc27e59d0
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04a7e2e967d181c1ed113e98417e3884ff02f87ef8b2066c0ee1e0fd9a352607
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe155ae67690223b1383fab8be573d608c35abd7fd572e583add75faf527c0df
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 9.548571428571428,
   "eval_steps": 100,
-  "global_step": 840,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1248,6 +1248,62 @@
       "learning_rate": 3.298903859953517e-06,
       "loss": 0.0239,
       "step": 840
     }
   ],
   "logging_steps": 5,
@@ -1262,12 +1318,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.45693806582825e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 10.0,
   "eval_steps": 100,
+  "global_step": 880,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 3.298903859953517e-06,
       "loss": 0.0239,
       "step": 840
+    },
+    {
+      "epoch": 9.605714285714285,
+      "grad_norm": 0.11044926196336746,
+      "learning_rate": 2.544639529766829e-06,
+      "loss": 0.0234,
+      "step": 845
+    },
+    {
+      "epoch": 9.662857142857142,
+      "grad_norm": 0.12619182467460632,
+      "learning_rate": 1.887710869163284e-06,
+      "loss": 0.0233,
+      "step": 850
+    },
+    {
+      "epoch": 9.72,
+      "grad_norm": 0.12324284762144089,
+      "learning_rate": 1.328376278651705e-06,
+      "loss": 0.023,
+      "step": 855
+    },
+    {
+      "epoch": 9.777142857142858,
+      "grad_norm": 0.11614653468132019,
+      "learning_rate": 8.668557704669122e-07,
+      "loss": 0.0238,
+      "step": 860
+    },
+    {
+      "epoch": 9.834285714285715,
+      "grad_norm": 0.13400106132030487,
+      "learning_rate": 5.033308820289185e-07,
+      "loss": 0.0237,
+      "step": 865
+    },
+    {
+      "epoch": 9.891428571428571,
+      "grad_norm": 0.12201932817697525,
+      "learning_rate": 2.3794460453555045e-07,
+      "loss": 0.0216,
+      "step": 870
+    },
+    {
+      "epoch": 9.948571428571428,
+      "grad_norm": 0.12058448791503906,
+      "learning_rate": 7.080132671774542e-08,
+      "loss": 0.0238,
+      "step": 875
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.15471021831035614,
+      "learning_rate": 1.966793778229725e-09,
+      "loss": 0.0221,
+      "step": 880
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.7149261611008e+16,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null