Upload 8 files

Browse files

Files changed (6) hide show

optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +28 -88
training_args.bin +1 -1

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c169775c366010e4c5eaffadb34df1ccf4d9628fd6397bae1c7164e585adaf43
 size 686681861

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2b2b4d3e8b3ca0710d7da08763dd08c9fc1b6c9025b455de35158b306e28224
 size 686681861

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93ab5ed813a4061484bf42a72dcb86ac4bd68e8e621bd367d2d73de57acf6916
 size 347081849

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ebed1c9c75137b486f7f2940de8644ac13858c1d6a8d3695a6a51065b153c29
 size 347081849

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c800d296d274356dc7e1009bd7b9538d79376fb6a4db1bb2d6965f7da8688b3b
 size 14639

 version https://git-lfs.github.com/spec/v1
+oid sha256:066b0a5d1c6a4b9d7d21cbb83f91a0857780e54f0c0d79887ffbc027746303b7
 size 14639

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b91afddbb850d225c9270f7a06401619e44b4668e24d6085fb2561753e178a88
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f0539d160c7887cdd2d15e84b70d756c48293cc6e18cf2a2dcdbf254b4cb7de
 size 627

trainer_state.json CHANGED Viewed

@@ -1,118 +1,58 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 5.763363091874788,
-  "global_step": 51000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
-    {
-      "epoch": 0.34,
-      "learning_rate": 9.915244660413606e-06,
-      "loss": 3.3726,
-      "step": 3000
-    },
     {
       "epoch": 0.68,
-      "learning_rate": 9.830489320827214e-06,
-      "loss": 2.1283,
-      "step": 6000
-    },
-    {
-      "epoch": 1.02,
-      "learning_rate": 9.745733981240819e-06,
-      "loss": 1.8301,
-      "step": 9000
     },
     {
       "epoch": 1.36,
-      "learning_rate": 9.660978641654425e-06,
-      "loss": 1.6611,
-      "step": 12000
-    },
-    {
-      "epoch": 1.7,
-      "learning_rate": 9.57622330206803e-06,
-      "loss": 1.6012,
-      "step": 15000
     },
     {
       "epoch": 2.03,
-      "learning_rate": 9.491467962481637e-06,
-      "loss": 1.5255,
-      "step": 18000
-    },
-    {
-      "epoch": 2.37,
-      "learning_rate": 9.406712622895243e-06,
-      "loss": 1.4598,
-      "step": 21000
     },
     {
-      "epoch": 2.71,
-      "learning_rate": 9.321957283308849e-06,
-      "loss": 1.4156,
-      "step": 24000
     },
     {
-      "epoch": 3.05,
-      "learning_rate": 9.237201943722456e-06,
-      "loss": 1.3908,
-      "step": 27000
     },
     {
-      "epoch": 3.39,
-      "learning_rate": 9.152446604136061e-06,
-      "loss": 1.3406,
       "step": 30000
     },
     {
-      "epoch": 3.73,
-      "learning_rate": 9.067691264549667e-06,
-      "loss": 1.3277,
-      "step": 33000
-    },
-    {
-      "epoch": 4.07,
-      "learning_rate": 8.982935924963272e-06,
-      "loss": 1.3095,
-      "step": 36000
-    },
-    {
-      "epoch": 4.41,
-      "learning_rate": 8.89818058537688e-06,
-      "loss": 1.253,
-      "step": 39000
-    },
-    {
-      "epoch": 4.75,
-      "learning_rate": 8.813425245790485e-06,
-      "loss": 1.2553,
-      "step": 42000
-    },
-    {
-      "epoch": 5.09,
-      "learning_rate": 8.72866990620409e-06,
-      "loss": 1.2177,
-      "step": 45000
-    },
-    {
-      "epoch": 5.42,
-      "learning_rate": 8.643914566617696e-06,
-      "loss": 1.198,
-      "step": 48000
-    },
-    {
-      "epoch": 5.76,
-      "learning_rate": 8.559159227031304e-06,
-      "loss": 1.2163,
-      "step": 51000
     }
   ],
-  "max_steps": 353960,
   "num_train_epochs": 40,
-  "total_flos": 3.162362109935616e+19,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 9.03954802259887,
+  "global_step": 40000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.68,
+      "learning_rate": 9.830508474576272e-06,
+      "loss": 3.108,
+      "step": 3000
     },
     {
       "epoch": 1.36,
+      "learning_rate": 9.661016949152544e-06,
+      "loss": 2.0197,
+      "step": 6000
     },
     {
       "epoch": 2.03,
+      "learning_rate": 9.491525423728815e-06,
+      "loss": 1.6483,
+      "step": 9000
     },
     {
+      "epoch": 2.26,
+      "learning_rate": 9.435028248587572e-06,
+      "loss": 1.4748,
+      "step": 10000
     },
     {
+      "epoch": 4.52,
+      "learning_rate": 8.870056497175143e-06,
+      "loss": 1.2986,
+      "step": 20000
     },
     {
+      "epoch": 6.78,
+      "learning_rate": 8.305084745762712e-06,
+      "loss": 1.074,
       "step": 30000
     },
     {
+      "epoch": 9.04,
+      "learning_rate": 7.740112994350283e-06,
+      "loss": 0.9305,
+      "step": 40000
     }
   ],
+  "max_steps": 177000,
   "num_train_epochs": 40,
+  "total_flos": 4.960009951683527e+19,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02ae9bc04ec11a5436f1c2b5d27a074129d73b2e6339507f9d2f4304a8062bf8
 size 3643

 version https://git-lfs.github.com/spec/v1
+oid sha256:d951a2f5f0efbcf1dae67bc7db374d5cf8dc574b8d82c1b01a1d9990a79c623e
 size 3643