Training in progress, step 150, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +46 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11a9b69a02962a84d97f38904eabe5b4f53cc044fc070efd4a92ce22c97e4e05
 size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfb6c282582daa46607f42fb293fdbf3819e8d75d52dd0d1b623400802d28b35
 size 250490408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd5b3908315305669c6ecf2973c1e26d14718d1532bcd29022c36b98c149b484
 size 255265850

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfedfce98860d74e92fcd3e31dca7238b9b93ac3bee488a9a090a1706464f3a0
 size 255265850

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e4027fefb69e105f5311c9cb16f281c69cccb2a472492c81e3a929896e7e87d
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b63e3edd0c0cc48f086057ce6e75022d83d87a3b00734c15ba24422849770cd
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4791734ef084625a7f903446f63b41709ec4aa45432e4cf043c49101a53ddaee
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:186e88827d2475291b71f4a0a4127f7fbf5706df899ca40fe1878eb7c8301c05
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d429d3d3635edcf38935f000b0d1f4e5db465042c289fb4623c33dce588231ab
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb0e11d33e42a9adcc5c976e37e059307e91eb6ae74c969ff1b3eb2f755782d6
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 8.333333333333334,
   "eval_steps": 50,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -101,6 +101,49 @@
       "eval_samples_per_second": 140.864,
       "eval_steps_per_second": 2.252,
       "step": 100
     }
   ],
   "logging_steps": 10,
@@ -120,7 +163,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6486313364619264.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 12.5,
   "eval_steps": 50,
+  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 140.864,
       "eval_steps_per_second": 2.252,
       "step": 100
+    },
+    {
+      "epoch": 9.166666666666666,
+      "grad_norm": 5.0625,
+      "learning_rate": 0.00019043571606975777,
+      "loss": 4.1085,
+      "step": 110
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 4.59375,
+      "learning_rate": 0.0001879473751206489,
+      "loss": 4.0046,
+      "step": 120
+    },
+    {
+      "epoch": 10.833333333333334,
+      "grad_norm": 4.875,
+      "learning_rate": 0.00018519194088383273,
+      "loss": 3.8989,
+      "step": 130
+    },
+    {
+      "epoch": 11.666666666666666,
+      "grad_norm": 3.90625,
+      "learning_rate": 0.0001821777815225245,
+      "loss": 3.7925,
+      "step": 140
+    },
+    {
+      "epoch": 12.5,
+      "grad_norm": 3.9375,
+      "learning_rate": 0.00017891405093963938,
+      "loss": 3.7077,
+      "step": 150
+    },
+    {
+      "epoch": 12.5,
+      "eval_loss": 3.263496160507202,
+      "eval_runtime": 10.1832,
+      "eval_samples_per_second": 147.399,
+      "eval_steps_per_second": 2.357,
+      "step": 150
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 9728424859926528.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null