Training in progress, step 570000

Browse files

Files changed (13) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/rng_state_4.pth +1 -1
last-checkpoint/rng_state_5.pth +1 -1
last-checkpoint/rng_state_6.pth +1 -1
last-checkpoint/rng_state_7.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +203 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b1cc1f37f7674d19f50d0ac065ef69529e5d29f5bb20b814471f7b204857988
 size 893439185

 version https://git-lfs.github.com/spec/v1
+oid sha256:3bb93720ada86a6004ba26eb4fabd56849226e35d2f46baf4052697153c666bb
 size 893439185

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e49d2c84e31d61487864f2465a53de7f412017d3a4351764e556c2063f04b645
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
 size 449471589

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_4.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_5.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_6.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/rng_state_7.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:48b59e1e03e49cc431ae4100dc25b1494b2774eb8a3efc82fdd3d6eb3cf7405c
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:d389965857734bb2eeaa6cafe0b8f8e001a5d59ae127d36647124f18dacab70e
 size 14503

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:af9a25b33d29c3fd157c79676adec6abbe35f3978d907c7efc857fe0437c64ac
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:e62e41706c3cdebd0963ceae6fb24ae079cf26e6452a67e31e4c02f3a80456e6
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 6.245608556483722,
-  "global_step": 560000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11206,11 +11206,211 @@
       "eval_samples_per_second": 882.169,
       "eval_steps_per_second": 13.826,
       "step": 560000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 12,
-  "total_flos": 3.925581759014346e+22,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 6.357137280706646,
+  "global_step": 570000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 882.169,
       "eval_steps_per_second": 13.826,
       "step": 560000
+    },
+    {
+      "epoch": 6.25,
+      "learning_rate": 7.180111803267856e-05,
+      "loss": 0.2054,
+      "step": 560500
+    },
+    {
+      "epoch": 6.26,
+      "learning_rate": 7.168618306865838e-05,
+      "loss": 0.2051,
+      "step": 561000
+    },
+    {
+      "epoch": 6.26,
+      "eval_loss": 0.195304736495018,
+      "eval_runtime": 2.4332,
+      "eval_samples_per_second": 944.014,
+      "eval_steps_per_second": 14.795,
+      "step": 561000
+    },
+    {
+      "epoch": 6.26,
+      "learning_rate": 7.157127083429626e-05,
+      "loss": 0.2053,
+      "step": 561500
+    },
+    {
+      "epoch": 6.27,
+      "learning_rate": 7.145638164375779e-05,
+      "loss": 0.205,
+      "step": 562000
+    },
+    {
+      "epoch": 6.27,
+      "eval_loss": 0.1958540827035904,
+      "eval_runtime": 2.6408,
+      "eval_samples_per_second": 869.814,
+      "eval_steps_per_second": 13.632,
+      "step": 562000
+    },
+    {
+      "epoch": 6.27,
+      "learning_rate": 7.134151581114565e-05,
+      "loss": 0.2053,
+      "step": 562500
+    },
+    {
+      "epoch": 6.28,
+      "learning_rate": 7.122667365049869e-05,
+      "loss": 0.2052,
+      "step": 563000
+    },
+    {
+      "epoch": 6.28,
+      "eval_loss": 0.19526307284832,
+      "eval_runtime": 2.6193,
+      "eval_samples_per_second": 876.966,
+      "eval_steps_per_second": 13.744,
+      "step": 563000
+    },
+    {
+      "epoch": 6.28,
+      "learning_rate": 7.111185547579099e-05,
+      "loss": 0.205,
+      "step": 563500
+    },
+    {
+      "epoch": 6.29,
+      "learning_rate": 7.099706160093098e-05,
+      "loss": 0.2051,
+      "step": 564000
+    },
+    {
+      "epoch": 6.29,
+      "eval_loss": 0.1962643265724182,
+      "eval_runtime": 2.4959,
+      "eval_samples_per_second": 920.299,
+      "eval_steps_per_second": 14.423,
+      "step": 564000
+    },
+    {
+      "epoch": 6.3,
+      "learning_rate": 7.08822923397608e-05,
+      "loss": 0.2054,
+      "step": 564500
+    },
+    {
+      "epoch": 6.3,
+      "learning_rate": 7.076754800605516e-05,
+      "loss": 0.2053,
+      "step": 565000
+    },
+    {
+      "epoch": 6.3,
+      "eval_loss": 0.19500210881233215,
+      "eval_runtime": 2.6355,
+      "eval_samples_per_second": 871.546,
+      "eval_steps_per_second": 13.659,
+      "step": 565000
+    },
+    {
+      "epoch": 6.31,
+      "learning_rate": 7.065282891352078e-05,
+      "loss": 0.2049,
+      "step": 565500
+    },
+    {
+      "epoch": 6.31,
+      "learning_rate": 7.053813537579523e-05,
+      "loss": 0.2052,
+      "step": 566000
+    },
+    {
+      "epoch": 6.31,
+      "eval_loss": 0.1964665800333023,
+      "eval_runtime": 2.6178,
+      "eval_samples_per_second": 877.444,
+      "eval_steps_per_second": 13.752,
+      "step": 566000
+    },
+    {
+      "epoch": 6.32,
+      "learning_rate": 7.042346770644624e-05,
+      "loss": 0.2046,
+      "step": 566500
+    },
+    {
+      "epoch": 6.32,
+      "learning_rate": 7.030882621897088e-05,
+      "loss": 0.2046,
+      "step": 567000
+    },
+    {
+      "epoch": 6.32,
+      "eval_loss": 0.19378143548965454,
+      "eval_runtime": 2.6471,
+      "eval_samples_per_second": 867.729,
+      "eval_steps_per_second": 13.6,
+      "step": 567000
+    },
+    {
+      "epoch": 6.33,
+      "learning_rate": 7.019421122679455e-05,
+      "loss": 0.2052,
+      "step": 567500
+    },
+    {
+      "epoch": 6.33,
+      "learning_rate": 7.00796230432703e-05,
+      "loss": 0.2045,
+      "step": 568000
+    },
+    {
+      "epoch": 6.33,
+      "eval_loss": 0.1938391774892807,
+      "eval_runtime": 2.5793,
+      "eval_samples_per_second": 890.552,
+      "eval_steps_per_second": 13.957,
+      "step": 568000
+    },
+    {
+      "epoch": 6.34,
+      "learning_rate": 6.996506198167789e-05,
+      "loss": 0.2046,
+      "step": 568500
+    },
+    {
+      "epoch": 6.35,
+      "learning_rate": 6.985052835522279e-05,
+      "loss": 0.2045,
+      "step": 569000
+    },
+    {
+      "epoch": 6.35,
+      "eval_loss": 0.19408397376537323,
+      "eval_runtime": 2.5021,
+      "eval_samples_per_second": 918.029,
+      "eval_steps_per_second": 14.388,
+      "step": 569000
+    },
+    {
+      "epoch": 6.35,
+      "learning_rate": 6.973602247703561e-05,
+      "loss": 0.2047,
+      "step": 569500
+    },
+    {
+      "epoch": 6.36,
+      "learning_rate": 6.962154466017105e-05,
+      "loss": 0.2047,
+      "step": 570000
+    },
+    {
+      "epoch": 6.36,
+      "eval_loss": 0.19305509328842163,
+      "eval_runtime": 2.5881,
+      "eval_samples_per_second": 887.536,
+      "eval_steps_per_second": 13.91,
+      "step": 570000
     }
   ],
   "max_steps": 1000000,
   "num_train_epochs": 12,
+  "total_flos": 3.9956820200446935e+22,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e49d2c84e31d61487864f2465a53de7f412017d3a4351764e556c2063f04b645
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:e478b13a76d10a83f73453a6a99a172c3f9841bd66c63610def2c769bf0b203a
 size 449471589