Training in progress, step 500, checkpoint

Files changed (7) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82c59ec2d8158df03dc91fa7bf16e8507d566511b5411f9c67de05e90ccd6e91
 size 136000488

 version https://git-lfs.github.com/spec/v1
+oid sha256:adb2b72f8723f4968194cbd9677da9f0d3b95a3bb2b456223aff39c48f66ce55
 size 136000488

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a884ac11bbb6fd1dc22614df6924f8dd43c3773bcea3ac35a711f03ab60f071f
-size 268170437

 version https://git-lfs.github.com/spec/v1
+oid sha256:a6317f9659375722e0ffcd3bddd1b4887cc0d0a47a8d9f521c95f0e1777589cb
+size 268176506

last-checkpoint/rng_state_0.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4949af6620fa4439fb03a0b241a4168f13ef96a69dbb8f746f58c0945aaef872
+size 14512

last-checkpoint/rng_state_1.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:206aee04b08be982c9ae1d22485ea9da39a726e50a79ff31b2bb78bdde57bf85
+size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e71f795f060986bf4341bea9d8720ae7bfda0098d9ee25a8bcebb898741611d0
-size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:49d75955e6c714b42e871e5da259951f06c1c7e918ec09a61baa0ed321fb603c
+size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06210408644888834,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
@@ -9,50 +9,50 @@
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.01,
-      "learning_rate": 9.998757918271022e-05,
-      "loss": 1.4897,
       "step": 100
     },
     {
-      "epoch": 0.02,
-      "learning_rate": 9.997515836542045e-05,
-      "loss": 1.4177,
       "step": 200
     },
     {
-      "epoch": 0.04,
-      "learning_rate": 9.996273754813067e-05,
-      "loss": 1.3882,
       "step": 300
     },
     {
-      "epoch": 0.05,
-      "learning_rate": 9.99503167308409e-05,
-      "loss": 1.4079,
       "step": 400
     },
     {
-      "epoch": 0.06,
-      "learning_rate": 9.993789591355111e-05,
-      "loss": 1.3846,
       "step": 500
     },
     {
-      "epoch": 0.06,
-      "eval_loss": 1.3667649030685425,
-      "eval_runtime": 632.9435,
-      "eval_samples_per_second": 25.53,
-      "eval_steps_per_second": 0.4,
       "step": 500
     }
   ],
   "logging_steps": 100,
-  "max_steps": 805100,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 100,
   "save_steps": 500,
-  "total_flos": 2500969057152000.0,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.12419274714356682,
   "eval_steps": 500,
   "global_step": 500,
   "is_hyper_param_search": false,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.02,
+      "learning_rate": 9.997516145057129e-05,
+      "loss": 1.4447,
       "step": 100
     },
     {
+      "epoch": 0.05,
+      "learning_rate": 9.995032290114258e-05,
+      "loss": 1.3886,
       "step": 200
     },
     {
+      "epoch": 0.07,
+      "learning_rate": 9.992548435171386e-05,
+      "loss": 1.3792,
       "step": 300
     },
     {
+      "epoch": 0.1,
+      "learning_rate": 9.990064580228516e-05,
+      "loss": 1.3473,
       "step": 400
     },
     {
+      "epoch": 0.12,
+      "learning_rate": 9.987580725285644e-05,
+      "loss": 1.3557,
       "step": 500
     },
     {
+      "epoch": 0.12,
+      "eval_loss": 1.3079262971878052,
+      "eval_runtime": 75.0933,
+      "eval_samples_per_second": 215.186,
+      "eval_steps_per_second": 1.691,
       "step": 500
     }
   ],
   "logging_steps": 100,
+  "max_steps": 402600,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 100,
   "save_steps": 500,
+  "total_flos": 5001938038947840.0,
   "train_batch_size": 64,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6d59a9adb744b2a139b8a100e72cc86e9fa7d2ee2e2b625692bb081e86590b4
-size 4283

 version https://git-lfs.github.com/spec/v1
+oid sha256:17377382dfbf5abb92d5eacb4e8dfb49159713b7f4459e7175a1e6ac74f6b199
+size 4728