Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/model-00001-of-00004.safetensors +1 -1
last-checkpoint/model-00002-of-00004.safetensors +1 -1
last-checkpoint/model-00003-of-00004.safetensors +1 -1
last-checkpoint/model-00004-of-00004.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +26 -19
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b5c20dc59c769d88fe81b2b0843f23d2dd86b47b4591ef40bad9115cd2b4385
 size 4874664552

 version https://git-lfs.github.com/spec/v1
+oid sha256:72fe86bc02adec23b28b68c15b10a96fd293a20ee6a1cdaafd0cf18e1685dc20
 size 4874664552

last-checkpoint/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d8ef29282721049bf0104c9d2701bfa26a60707403dbad153e661e9df28751f
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:f0db6c3053a7deda030e77cbf9dc2889c468ba509b50f75ffb580a36cf57e0ef
 size 4932751008

last-checkpoint/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a175e2a7592da2781889f8d417906512e54b2c4cf1831f1316e119d4722001f4
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d92a224e994fa1a22383ebe54bf00bb1077af44379b3bf42e7679b3dbab9987
 size 4330865200

last-checkpoint/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d149d0190b4a1e8c630935731679b382222057f720f060d7786d7b42e66238bd
 size 1086998656

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdd5806b5d6ec7a64af996081c0f79be909a2e91f7faf708ef85bc77a39e2f04
 size 1086998656

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fbd3cf7e9f20636c638ad5a407323f188cfb4d34ff7182f544b0b533f9bad084
 size 15465450874

 version https://git-lfs.github.com/spec/v1
+oid sha256:264d9f5983d4e9539188419005a2b5a0ae23bbe058e7cb34ad670a166a35860a
 size 15465450874

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73d7e7515bf9366d4128b5be03fea719bb7ed473249f57b8c5a1f2a51581ebf8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c620e9c1e58182d1e495bef166b392358f6580ca0b286508365d4c14b06ac944
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.2094240837696335,
-  "eval_steps": 40,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11,53 +11,60 @@
     {
       "epoch": 0.005235602094240838,
       "eval_loss": 2.384796619415283,
-      "eval_runtime": 61.2033,
-      "eval_samples_per_second": 24.525,
-      "eval_steps_per_second": 6.143,
       "step": 1
     },
     {
       "epoch": 0.05235602094240838,
       "grad_norm": 5.15625,
       "learning_rate": 3.6363636363636364e-05,
-      "loss": 1.9243,
       "step": 10
     },
     {
       "epoch": 0.10471204188481675,
-      "grad_norm": 4.5625,
       "learning_rate": 7.272727272727273e-05,
-      "loss": 1.8451,
       "step": 20
     },
     {
       "epoch": 0.15706806282722513,
       "grad_norm": 3.484375,
       "learning_rate": 0.00010909090909090909,
-      "loss": 1.8315,
       "step": 30
     },
     {
       "epoch": 0.2094240837696335,
       "grad_norm": 3.390625,
       "learning_rate": 0.00014545454545454546,
-      "loss": 2.0039,
       "step": 40
     },
     {
-      "epoch": 0.2094240837696335,
-      "eval_loss": 1.9543291330337524,
-      "eval_runtime": 73.5405,
-      "eval_samples_per_second": 20.411,
-      "eval_steps_per_second": 5.113,
-      "step": 40
     }
   ],
   "logging_steps": 10,
   "max_steps": 1100,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 6,
-  "save_steps": 20,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
@@ -70,7 +77,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3898457261539328e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.2617801047120419,
+  "eval_steps": 50,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 0.005235602094240838,
       "eval_loss": 2.384796619415283,
+      "eval_runtime": 62.0394,
+      "eval_samples_per_second": 24.194,
+      "eval_steps_per_second": 6.061,
       "step": 1
     },
     {
       "epoch": 0.05235602094240838,
       "grad_norm": 5.15625,
       "learning_rate": 3.6363636363636364e-05,
+      "loss": 1.9242,
       "step": 10
     },
     {
       "epoch": 0.10471204188481675,
+      "grad_norm": 4.59375,
       "learning_rate": 7.272727272727273e-05,
+      "loss": 1.845,
       "step": 20
     },
     {
       "epoch": 0.15706806282722513,
       "grad_norm": 3.484375,
       "learning_rate": 0.00010909090909090909,
+      "loss": 1.8311,
       "step": 30
     },
     {
       "epoch": 0.2094240837696335,
       "grad_norm": 3.390625,
       "learning_rate": 0.00014545454545454546,
+      "loss": 2.0034,
       "step": 40
     },
     {
+      "epoch": 0.2617801047120419,
+      "grad_norm": 3.59375,
+      "learning_rate": 0.00018181818181818183,
+      "loss": 2.2024,
+      "step": 50
+    },
+    {
+      "epoch": 0.2617801047120419,
+      "eval_loss": 2.1606948375701904,
+      "eval_runtime": 60.3553,
+      "eval_samples_per_second": 24.869,
+      "eval_steps_per_second": 6.23,
+      "step": 50
     }
   ],
   "logging_steps": 10,
   "max_steps": 1100,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 6,
+  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
       "attributes": {}
     }
   },
+  "total_flos": 1.737307157692416e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2dccb17e5942fb2737bb1fdd5b88f91e0f16655a5f1ad194d5593b47e06f8592
 size 6968

 version https://git-lfs.github.com/spec/v1
+oid sha256:32a7a8613e5fa3317cdc198f56f6d0577b15eb3e0cf0efd4aa72ac710a8260e0
 size 6968