Training in progress, step 20, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:961f35029677070ede9234dcd4cb050b4d0e492a911978fedb6dffe8847b8d97
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:388de8b1fd2d9b827937b3a78d2f0c4f9e28ffa087987c50cf8c48941c605e27
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb70bad3451bf3a8d2b698f9922fefa5f55f13dcc960b77e5e77adce9eefe424
 size 85723284

 version https://git-lfs.github.com/spec/v1
+oid sha256:37278b058da42c070ece918b5474e74f3a0c469f409efa5127d07d2cc5355219
 size 85723284

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:276ca034010af89433fdc86003e552246afaf008d6bdfe28c94401b0f98e39e6
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bcc550cfbb47ad4cbc37125ea640e4b6df0c324dad2c713e9b18c9c4eb2ecb33
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0008,
   "eval_steps": 500,
-  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -77,6 +77,76 @@
       "learning_rate": 0.00019487179487179487,
       "loss": 0.3175,
       "step": 10
     }
   ],
   "logging_steps": 1,
@@ -96,7 +166,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2489379596009472.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0016,
   "eval_steps": 500,
+  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00019487179487179487,
       "loss": 0.3175,
       "step": 10
+    },
+    {
+      "epoch": 0.00088,
+      "grad_norm": 0.5333597660064697,
+      "learning_rate": 0.00019384615384615385,
+      "loss": 0.5604,
+      "step": 11
+    },
+    {
+      "epoch": 0.00096,
+      "grad_norm": 0.49285152554512024,
+      "learning_rate": 0.00019282051282051282,
+      "loss": 0.6799,
+      "step": 12
+    },
+    {
+      "epoch": 0.00104,
+      "grad_norm": 0.5650416016578674,
+      "learning_rate": 0.00019179487179487182,
+      "loss": 0.3743,
+      "step": 13
+    },
+    {
+      "epoch": 0.00112,
+      "grad_norm": 0.3586512506008148,
+      "learning_rate": 0.0001907692307692308,
+      "loss": 0.3659,
+      "step": 14
+    },
+    {
+      "epoch": 0.0012,
+      "grad_norm": 0.4746282398700714,
+      "learning_rate": 0.00018974358974358974,
+      "loss": 0.511,
+      "step": 15
+    },
+    {
+      "epoch": 0.00128,
+      "grad_norm": 0.5259126424789429,
+      "learning_rate": 0.0001887179487179487,
+      "loss": 0.2649,
+      "step": 16
+    },
+    {
+      "epoch": 0.00136,
+      "grad_norm": 0.5694918036460876,
+      "learning_rate": 0.0001876923076923077,
+      "loss": 0.5794,
+      "step": 17
+    },
+    {
+      "epoch": 0.00144,
+      "grad_norm": 0.6411933898925781,
+      "learning_rate": 0.0001866666666666667,
+      "loss": 0.356,
+      "step": 18
+    },
+    {
+      "epoch": 0.00152,
+      "grad_norm": 0.4190411865711212,
+      "learning_rate": 0.00018564102564102566,
+      "loss": 0.4555,
+      "step": 19
+    },
+    {
+      "epoch": 0.0016,
+      "grad_norm": 0.48357734084129333,
+      "learning_rate": 0.00018461538461538463,
+      "loss": 0.4766,
+      "step": 20
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5377867744100352.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null