Training in progress, step 250, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c27d9594ab02f9adc827e9cc100409e429b6fa6e5da458f22196243beabd0e12
 size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:1aeb37187936d017f6bd51736738766a1f2cd5d041a79e47044d4da52589bf50
 size 250490408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3184cf8f88bad2cfb5a68fc4094b566ab6b6cce219f98681e60f0f6402fd93e
 size 255265850

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d8581669082f016d0e4e6b12b964126b4e2bcf55585a4a4b4bf94b576f2041e
 size 255265850

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e44ef27abe50e5bba1d9636856695c0706b4a69481203dbe05866fc8428b12b
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:30ca12c54f4164ace515795e08e0960f0c28e1845dd3bb744b613ac48e9edba6
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c36c84de099f12bcc525eb47423becd04c47e16e865404a5529083e8a6215c3a
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:df12b162a1b11037b9375aff4eed1b0f26be6ed2687bf1154d65e88dde5f9250
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cd11d413bc67bf01de9a1a006e9e7655be307353028b25f5b3c299e5b6b7a44
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:748ed266e9432323e41e747f49e84d108918da883711ff6e01c8135af1c286fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 16.666666666666668,
   "eval_steps": 50,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -187,6 +187,49 @@
       "eval_samples_per_second": 141.537,
       "eval_steps_per_second": 2.263,
       "step": 200
     }
   ],
   "logging_steps": 10,
@@ -206,7 +249,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2971581575790592e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 20.833333333333332,
   "eval_steps": 50,
+  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 141.537,
       "eval_steps_per_second": 2.263,
       "step": 200
+    },
+    {
+      "epoch": 17.5,
+      "grad_norm": 3.984375,
+      "learning_rate": 0.00015469481581224272,
+      "loss": 3.2145,
+      "step": 210
+    },
+    {
+      "epoch": 18.333333333333332,
+      "grad_norm": 4.0,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 3.1833,
+      "step": 220
+    },
+    {
+      "epoch": 19.166666666666668,
+      "grad_norm": 4.28125,
+      "learning_rate": 0.00014515333583108896,
+      "loss": 3.1072,
+      "step": 230
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.00014016954246529696,
+      "loss": 3.0609,
+      "step": 240
+    },
+    {
+      "epoch": 20.833333333333332,
+      "grad_norm": 4.0,
+      "learning_rate": 0.00013506375551927547,
+      "loss": 3.0201,
+      "step": 250
+    },
+    {
+      "epoch": 20.833333333333332,
+      "eval_loss": 3.0752980709075928,
+      "eval_runtime": 10.4283,
+      "eval_samples_per_second": 143.936,
+      "eval_steps_per_second": 2.301,
+      "step": 250
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.6216828598550528e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null