Training in progress, step 140000

Browse files

Files changed (13) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62c0a2c7e077b8baa73fd828f2e70985bcefeea8be38a9936a8140714c1e4c47
 size 50044689

 version https://git-lfs.github.com/spec/v1
+oid sha256:878ac2af256d90283abe99c8603dab07e40eb73da1c3655fc21a49086d6f8483
 size 50044689

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc20de398adc6319c715613716f2c9d4ce124e70ee41a98ab642ac175fb204e5
 size 25761253

 version https://git-lfs.github.com/spec/v1
+oid sha256:34c22557eb08501f7a24373ea155511c565e71575e567e7ff811308f57ab5e12
 size 25761253

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e66d48adb544b295049d3e3a54b3728811cc41b1dee1e6eae561cecd176107ac
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e2c0aada9b191e5f2d821af5ad22cc6daf3753d3e556038bf559d65cf4419b6
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa61e63d6ec853afa02e48d5167bab30a383bd9f05f192b20c686fb9a3478097
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed68d92642b5c57649c135331b8243d8047b1dee7f4eb5f6f68f9dc4d2f32821
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.965686274509804,
-  "global_step": 130000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2606,11 +2606,211 @@
       "eval_samples_per_second": 729.142,
       "eval_steps_per_second": 11.666,
       "step": 130000
     }
   ],
   "max_steps": 250000,
   "num_train_epochs": 16,
-  "total_flos": 2.0821139637301475e+21,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 8.57843137254902,
+  "global_step": 140000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 729.142,
       "eval_steps_per_second": 11.666,
       "step": 130000
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 0.00030792659356955893,
+      "loss": 0.4657,
+      "step": 130500
+    },
+    {
+      "epoch": 8.03,
+      "learning_rate": 0.0003059755454143586,
+      "loss": 0.4653,
+      "step": 131000
+    },
+    {
+      "epoch": 8.03,
+      "eval_loss": 0.8070600628852844,
+      "eval_runtime": 1.2854,
+      "eval_samples_per_second": 777.995,
+      "eval_steps_per_second": 12.448,
+      "step": 131000
+    },
+    {
+      "epoch": 8.06,
+      "learning_rate": 0.00030402445458564144,
+      "loss": 0.4649,
+      "step": 131500
+    },
+    {
+      "epoch": 8.09,
+      "learning_rate": 0.0003020734064304411,
+      "loss": 0.4647,
+      "step": 132000
+    },
+    {
+      "epoch": 8.09,
+      "eval_loss": 0.799366295337677,
+      "eval_runtime": 1.2985,
+      "eval_samples_per_second": 770.128,
+      "eval_steps_per_second": 12.322,
+      "step": 132000
+    },
+    {
+      "epoch": 8.12,
+      "learning_rate": 0.00030012248629392423,
+      "loss": 0.4647,
+      "step": 132500
+    },
+    {
+      "epoch": 8.15,
+      "learning_rate": 0.00029817177951565793,
+      "loss": 0.4639,
+      "step": 133000
+    },
+    {
+      "epoch": 8.15,
+      "eval_loss": 0.8033633232116699,
+      "eval_runtime": 1.2955,
+      "eval_samples_per_second": 771.897,
+      "eval_steps_per_second": 12.35,
+      "step": 133000
+    },
+    {
+      "epoch": 8.18,
+      "learning_rate": 0.00029622137142587594,
+      "loss": 0.4637,
+      "step": 133500
+    },
+    {
+      "epoch": 8.21,
+      "learning_rate": 0.0002942713473417466,
+      "loss": 0.4634,
+      "step": 134000
+    },
+    {
+      "epoch": 8.21,
+      "eval_loss": 0.8022355437278748,
+      "eval_runtime": 1.3019,
+      "eval_samples_per_second": 768.12,
+      "eval_steps_per_second": 12.29,
+      "step": 134000
+    },
+    {
+      "epoch": 8.24,
+      "learning_rate": 0.00029232179256364054,
+      "loss": 0.4631,
+      "step": 134500
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 0.0002903727923713994,
+      "loss": 0.4656,
+      "step": 135000
+    },
+    {
+      "epoch": 8.27,
+      "eval_loss": 0.8051571249961853,
+      "eval_runtime": 1.3053,
+      "eval_samples_per_second": 766.083,
+      "eval_steps_per_second": 12.257,
+      "step": 135000
+    },
+    {
+      "epoch": 8.3,
+      "learning_rate": 0.00028842443202060556,
+      "loss": 0.4625,
+      "step": 135500
+    },
+    {
+      "epoch": 8.33,
+      "learning_rate": 0.00028647679673885255,
+      "loss": 0.4623,
+      "step": 136000
+    },
+    {
+      "epoch": 8.33,
+      "eval_loss": 0.7988797426223755,
+      "eval_runtime": 1.3231,
+      "eval_samples_per_second": 755.795,
+      "eval_steps_per_second": 12.093,
+      "step": 136000
+    },
+    {
+      "epoch": 8.36,
+      "learning_rate": 0.000284529971722017,
+      "loss": 0.462,
+      "step": 136500
+    },
+    {
+      "epoch": 8.39,
+      "learning_rate": 0.0002825840421305321,
+      "loss": 0.4617,
+      "step": 137000
+    },
+    {
+      "epoch": 8.39,
+      "eval_loss": 0.7993477582931519,
+      "eval_runtime": 1.2892,
+      "eval_samples_per_second": 775.645,
+      "eval_steps_per_second": 12.41,
+      "step": 137000
+    },
+    {
+      "epoch": 8.43,
+      "learning_rate": 0.00028063909308566196,
+      "loss": 0.4616,
+      "step": 137500
+    },
+    {
+      "epoch": 8.46,
+      "learning_rate": 0.00027869520966577874,
+      "loss": 0.4612,
+      "step": 138000
+    },
+    {
+      "epoch": 8.46,
+      "eval_loss": 0.8003228902816772,
+      "eval_runtime": 1.2968,
+      "eval_samples_per_second": 771.107,
+      "eval_steps_per_second": 12.338,
+      "step": 138000
+    },
+    {
+      "epoch": 8.49,
+      "learning_rate": 0.00027675247690264027,
+      "loss": 0.461,
+      "step": 138500
+    },
+    {
+      "epoch": 8.52,
+      "learning_rate": 0.0002748109797776715,
+      "loss": 0.4608,
+      "step": 139000
+    },
+    {
+      "epoch": 8.52,
+      "eval_loss": 0.7989851236343384,
+      "eval_runtime": 1.2801,
+      "eval_samples_per_second": 781.164,
+      "eval_steps_per_second": 12.499,
+      "step": 139000
+    },
+    {
+      "epoch": 8.55,
+      "learning_rate": 0.0002728708032182461,
+      "loss": 0.4603,
+      "step": 139500
+    },
+    {
+      "epoch": 8.58,
+      "learning_rate": 0.0002709320320939721,
+      "loss": 0.4603,
+      "step": 140000
+    },
+    {
+      "epoch": 8.58,
+      "eval_loss": 0.8073873519897461,
+      "eval_runtime": 1.2866,
+      "eval_samples_per_second": 777.265,
+      "eval_steps_per_second": 12.436,
+      "step": 140000
     }
   ],
   "max_steps": 250000,
   "num_train_epochs": 16,
+  "total_flos": 2.2422719560923365e+21,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc20de398adc6319c715613716f2c9d4ce124e70ee41a98ab642ac175fb204e5
 size 25761253

 version https://git-lfs.github.com/spec/v1
+oid sha256:34c22557eb08501f7a24373ea155511c565e71575e567e7ff811308f57ab5e12
 size 25761253