Training in progress, step 660, checkpoint

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e59a37c02b250fde83ba038c839a2e952bf520ce910f43a7857db234f396d3f0
 size 100697728

 version https://git-lfs.github.com/spec/v1
+oid sha256:66333af72e1725806c8e221908e16e38b77dc29f7dce82420a71ceea2996f731
 size 100697728

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:79a9df9fefae970af178ffc5c6daff5bac43c4ffccc8bb655e4c8b0717bde90a
 size 201541754

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cf47a1463ac1ceb4c0c1e0ba9c8532a53101f3ec7c5d55cdea529cb461d992b
 size 201541754

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e72fb99776019a0f4753f71a958f1e0ab8cd89837117e79e8970f4ea20b12a6d
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc6353f9c4c4d14c6900d066e87c1879de52f1aa09da8179a11a66235a737911
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbd83824b10c800d2a1e10e6af2da6cf8778074505a180484fec6f86647c2253
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5d7053aaab2445f905f900c08b42128e5713d6d142ebe37c511ff095c7697e08
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.22898032200357782,
   "eval_steps": 500,
-  "global_step": 640,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -519,14 +519,30 @@
       "loss": 0.4163,
       "num_input_tokens_seen": 428523,
       "step": 640
     }
   ],
   "logging_steps": 10,
   "max_steps": 2795,
-  "num_input_tokens_seen": 428523,
   "num_train_epochs": 1,
   "save_steps": 20,
-  "total_flos": 9635968987305984.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.23613595706618962,
   "eval_steps": 500,
+  "global_step": 660,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "loss": 0.4163,
       "num_input_tokens_seen": 428523,
       "step": 640
+    },
+    {
+      "epoch": 0.23255813953488372,
+      "grad_norm": 0.35523882508277893,
+      "learning_rate": 0.00015348837209302327,
+      "loss": 0.4237,
+      "num_input_tokens_seen": 436283,
+      "step": 650
+    },
+    {
+      "epoch": 0.23613595706618962,
+      "grad_norm": 0.32238948345184326,
+      "learning_rate": 0.00015277280858676207,
+      "loss": 0.4527,
+      "num_input_tokens_seen": 444368,
+      "step": 660
     }
   ],
   "logging_steps": 10,
   "max_steps": 2795,
+  "num_input_tokens_seen": 444368,
   "num_train_epochs": 1,
   "save_steps": 20,
+  "total_flos": 9992267082399744.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null