Training in progress, step 100000

Browse files

Files changed (7) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +71 -3
pytorch_model.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb0a61ab643c18ccf5b4cde2db1687dfb3353ac9730e884873954f6a4914ee49
 size 893439185

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0083228159a9684fe6abf7fc7f3682abd4e091c336c63a24ba76d9736a69586
 size 893439185

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8beb25b1f0bcf41b4de421e9957ad88915f0b34dec0216d6304d37fb6ed0fc2
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e59ec613175c3d9dd17cb09bb4ec1c875973ee63862b916c52950e17b26a470
 size 449471589

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc2ae884d96e2e98bf3a22796a951063fc339dc13602ff01679fb67e0849a907
 size 15459

 version https://git-lfs.github.com/spec/v1
+oid sha256:cccf7b9fd3b0a19dfeea5197c3f0bc5fb356c93fa129ae332264dec9fa909108
 size 15459

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb8cb9021f3a60f9e715cf2cfd338061faee5a6bd0df4d32e00e1489dc2742d6
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:4db330b47705f38a44d9ca503524acd867a7f5b9c7d09cea471cc2d7ef717581
 size 559

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1fa03e39186130ec116897cef176842ce103a90b1b243dfa00171a77a06fae55
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:53825b8848113cf8bc0172552a5393f9bdd93287620ad2f38f87ba251b8fb391
 size 623

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 7.529119874640898,
-  "global_step": 95000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1298,11 +1298,79 @@
       "eval_samples_per_second": 76.398,
       "eval_steps_per_second": 1.207,
       "step": 95000
     }
   ],
   "max_steps": 100000,
   "num_train_epochs": 9,
-  "total_flos": 4.474341992332253e+21,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 7.964394532950291,
+  "global_step": 100000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 76.398,
       "eval_steps_per_second": 1.207,
       "step": 95000
+    },
+    {
+      "epoch": 7.57,
+      "learning_rate": 1.0459487036725839e-05,
+      "loss": 0.4031,
+      "step": 95500
+    },
+    {
+      "epoch": 7.62,
+      "learning_rate": 1.0363840719071717e-05,
+      "loss": 0.4023,
+      "step": 96000
+    },
+    {
+      "epoch": 7.66,
+      "learning_rate": 1.0279462058166865e-05,
+      "loss": 0.4025,
+      "step": 96500
+    },
+    {
+      "epoch": 7.7,
+      "learning_rate": 1.0206033689125313e-05,
+      "loss": 0.4028,
+      "step": 97000
+    },
+    {
+      "epoch": 7.75,
+      "learning_rate": 1.0143753941878168e-05,
+      "loss": 0.4023,
+      "step": 97500
+    },
+    {
+      "epoch": 7.79,
+      "learning_rate": 1.0092638183021144e-05,
+      "loss": 0.4019,
+      "step": 98000
+    },
+    {
+      "epoch": 7.83,
+      "learning_rate": 1.0052699024602892e-05,
+      "loss": 0.4014,
+      "step": 98500
+    },
+    {
+      "epoch": 7.88,
+      "learning_rate": 1.0023946321013112e-05,
+      "loss": 0.4018,
+      "step": 99000
+    },
+    {
+      "epoch": 7.92,
+      "learning_rate": 1.0006411110358469e-05,
+      "loss": 0.4024,
+      "step": 99500
+    },
+    {
+      "epoch": 7.96,
+      "learning_rate": 1.0000027439232365e-05,
+      "loss": 0.4024,
+      "step": 100000
+    },
+    {
+      "epoch": 7.96,
+      "eval_loss": 0.38628044724464417,
+      "eval_runtime": 91.7648,
+      "eval_samples_per_second": 54.487,
+      "eval_steps_per_second": 0.861,
+      "step": 100000
     }
   ],
   "max_steps": 100000,
   "num_train_epochs": 9,
+  "total_flos": 4.7098350596970145e+21,
   "trial_name": null,
   "trial_params": null
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e8beb25b1f0bcf41b4de421e9957ad88915f0b34dec0216d6304d37fb6ed0fc2
 size 449471589

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e59ec613175c3d9dd17cb09bb4ec1c875973ee63862b916c52950e17b26a470
 size 449471589