Training in progress, step 270, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6ecb69f42c4102287be171762bb98640bcc97b4631086383d36dcfa0f5d1a677
 size 295488936

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae3b1fa8c31aabbe9ab45a20bd05cb09885864f566e475e30f433820052b4171
 size 295488936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58b4a023d7f908cb321232429392e5505fde05980c4e0083630c2ac62ba19284
 size 591203178

 version https://git-lfs.github.com/spec/v1
+oid sha256:2fdf8c6ddfc5f151321a3c63a95bb87e96c0c51829e1a421ff772c199002a38a
 size 591203178

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f994073fa65bd3eeec886197c2259b5a6406cdb6b5ea5f198b369e2b78371547
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:aca50632b9dcfeaf56f29cc41af869dfc765fe5c731289691cb32c1dd52ebe96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.411214953271028,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -304,6 +304,104 @@
       "eval_samples_per_second": 11.956,
       "eval_steps_per_second": 11.956,
       "step": 200
     }
   ],
   "logging_steps": 5,
@@ -318,12 +416,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 2.607353817710592e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 10.0,
   "eval_steps": 100,
+  "global_step": 270,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 11.956,
       "eval_steps_per_second": 11.956,
       "step": 200
+    },
+    {
+      "epoch": 7.598130841121495,
+      "grad_norm": 0.2857516407966614,
+      "learning_rate": 1.7123931571546827e-05,
+      "loss": 0.0151,
+      "step": 205
+    },
+    {
+      "epoch": 7.785046728971962,
+      "grad_norm": 0.22625453770160675,
+      "learning_rate": 1.4759117090312197e-05,
+      "loss": 0.0156,
+      "step": 210
+    },
+    {
+      "epoch": 7.97196261682243,
+      "grad_norm": 0.21999120712280273,
+      "learning_rate": 1.25415076745532e-05,
+      "loss": 0.0151,
+      "step": 215
+    },
+    {
+      "epoch": 8.149532710280374,
+      "grad_norm": 0.16067098081111908,
+      "learning_rate": 1.0480366524062042e-05,
+      "loss": 0.0133,
+      "step": 220
+    },
+    {
+      "epoch": 8.336448598130842,
+      "grad_norm": 0.18020516633987427,
+      "learning_rate": 8.584303253381847e-06,
+      "loss": 0.0122,
+      "step": 225
+    },
+    {
+      "epoch": 8.523364485981308,
+      "grad_norm": 0.16721676290035248,
+      "learning_rate": 6.861237928494579e-06,
+      "loss": 0.012,
+      "step": 230
+    },
+    {
+      "epoch": 8.710280373831775,
+      "grad_norm": 0.18559329211711884,
+      "learning_rate": 5.318367983829392e-06,
+      "loss": 0.0121,
+      "step": 235
+    },
+    {
+      "epoch": 8.897196261682243,
+      "grad_norm": 0.1803818792104721,
+      "learning_rate": 3.962138157783085e-06,
+      "loss": 0.0126,
+      "step": 240
+    },
+    {
+      "epoch": 9.074766355140186,
+      "grad_norm": 0.15274249017238617,
+      "learning_rate": 2.798213572335001e-06,
+      "loss": 0.0113,
+      "step": 245
+    },
+    {
+      "epoch": 9.261682242990654,
+      "grad_norm": 0.15818247199058533,
+      "learning_rate": 1.8314560692059835e-06,
+      "loss": 0.0112,
+      "step": 250
+    },
+    {
+      "epoch": 9.448598130841122,
+      "grad_norm": 0.14977800846099854,
+      "learning_rate": 1.0659039014077944e-06,
+      "loss": 0.0105,
+      "step": 255
+    },
+    {
+      "epoch": 9.63551401869159,
+      "grad_norm": 0.16695314645767212,
+      "learning_rate": 5.047548650136513e-07,
+      "loss": 0.0122,
+      "step": 260
+    },
+    {
+      "epoch": 9.822429906542055,
+      "grad_norm": 0.14471110701560974,
+      "learning_rate": 1.503529416103988e-07,
+      "loss": 0.0111,
+      "step": 265
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.23003625869750977,
+      "learning_rate": 4.178507228136397e-09,
+      "loss": 0.0103,
+      "step": 270
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 3.511896195158016e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null