Training in progress, step 270, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +102 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2b68f939a1ac16287167cfe01619979f639ace315e759fe243def56bc7e709a
 size 479005064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ebe99d1ad4c258f1237f57aef6685beeb79122cdc7802ac0bacc3a47816f656
 size 479005064

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2c25e2c917453589390f69ad73c1e25677e6004a421b6e103961137e3994126f
 size 958299770

 version https://git-lfs.github.com/spec/v1
+oid sha256:0caf5f99eabea4f6ea151c88df39573a4bdb60aba4df360468a0e44f174ae47b
 size 958299770

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d245e05e72192c132e0f2edb6fdcae0c578c890f0fe912f17ec7b0bba2d38cc3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3e5d946241df2516b06d7074d8779088eae7607173ad780df56583910a9589b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f994073fa65bd3eeec886197c2259b5a6406cdb6b5ea5f198b369e2b78371547
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:aca50632b9dcfeaf56f29cc41af869dfc765fe5c731289691cb32c1dd52ebe96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.411214953271028,
   "eval_steps": 100,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -304,6 +304,104 @@
       "eval_samples_per_second": 9.296,
       "eval_steps_per_second": 9.296,
       "step": 200
     }
   ],
   "logging_steps": 5,
@@ -318,12 +416,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 5.452252790312141e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 10.0,
   "eval_steps": 100,
+  "global_step": 270,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.296,
       "eval_steps_per_second": 9.296,
       "step": 200
+    },
+    {
+      "epoch": 7.598130841121495,
+      "grad_norm": 0.2390548288822174,
+      "learning_rate": 1.7123931571546827e-05,
+      "loss": 0.0116,
+      "step": 205
+    },
+    {
+      "epoch": 7.785046728971962,
+      "grad_norm": 0.1910872757434845,
+      "learning_rate": 1.4759117090312197e-05,
+      "loss": 0.0122,
+      "step": 210
+    },
+    {
+      "epoch": 7.97196261682243,
+      "grad_norm": 0.18526358902454376,
+      "learning_rate": 1.25415076745532e-05,
+      "loss": 0.0128,
+      "step": 215
+    },
+    {
+      "epoch": 8.149532710280374,
+      "grad_norm": 0.14037036895751953,
+      "learning_rate": 1.0480366524062042e-05,
+      "loss": 0.0104,
+      "step": 220
+    },
+    {
+      "epoch": 8.336448598130842,
+      "grad_norm": 0.1647230088710785,
+      "learning_rate": 8.584303253381847e-06,
+      "loss": 0.0096,
+      "step": 225
+    },
+    {
+      "epoch": 8.523364485981308,
+      "grad_norm": 0.12098614126443863,
+      "learning_rate": 6.861237928494579e-06,
+      "loss": 0.0087,
+      "step": 230
+    },
+    {
+      "epoch": 8.710280373831775,
+      "grad_norm": 0.1445273905992508,
+      "learning_rate": 5.318367983829392e-06,
+      "loss": 0.0091,
+      "step": 235
+    },
+    {
+      "epoch": 8.897196261682243,
+      "grad_norm": 0.14538030326366425,
+      "learning_rate": 3.962138157783085e-06,
+      "loss": 0.0093,
+      "step": 240
+    },
+    {
+      "epoch": 9.074766355140186,
+      "grad_norm": 0.11064188182353973,
+      "learning_rate": 2.798213572335001e-06,
+      "loss": 0.0082,
+      "step": 245
+    },
+    {
+      "epoch": 9.261682242990654,
+      "grad_norm": 0.12358280271291733,
+      "learning_rate": 1.8314560692059835e-06,
+      "loss": 0.0088,
+      "step": 250
+    },
+    {
+      "epoch": 9.448598130841122,
+      "grad_norm": 0.11256805807352066,
+      "learning_rate": 1.0659039014077944e-06,
+      "loss": 0.0078,
+      "step": 255
+    },
+    {
+      "epoch": 9.63551401869159,
+      "grad_norm": 0.13209514319896698,
+      "learning_rate": 5.047548650136513e-07,
+      "loss": 0.0093,
+      "step": 260
+    },
+    {
+      "epoch": 9.822429906542055,
+      "grad_norm": 0.11688179522752762,
+      "learning_rate": 1.503529416103988e-07,
+      "loss": 0.0082,
+      "step": 265
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.1742771714925766,
+      "learning_rate": 4.178507228136397e-09,
+      "loss": 0.0076,
+      "step": 270
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 7.343746636637798e+16,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null