Training in progress, step 14910, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +80 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13c49d13392c3fae06cd958148dc1d8c21c82e46c55b624c32c6a4d49c7eed06
 size 959732416

 version https://git-lfs.github.com/spec/v1
+oid sha256:64427f25569a563b8c2defbd84ca69ba4d90afd505c9e406a4c8f353022714a8
 size 959732416

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f694fa68799379ed20f77406780e6a592acec751b9b1e5634daa36c050d17eac
 size 1919732806

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9fe8d213c768c62a0886a9765a45bf43aa8b4e68e16590e5b09647c80ca6fc6
 size 1919732806

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95bd719bf1326b1281aa016ff1c8f48d41524f5feaf22af9267d2acf3daac222
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e900a969e8846016f93435d082cd2003d0d6d8a749d2feaf16f2a2d17a65f38
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddcca23b21d6bdd8ad51b065d4aa1989126b3480f39bbd5a003d90f3b5366d5d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e84bdec20434ed7e461167492f3ba162916331ef4833923d73a78f7731f8135
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.35345405767941,
   "eval_steps": 500,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -186,6 +186,82 @@
       "learning_rate": 1.6465459423205902e-05,
       "loss": 0.0164,
       "step": 10000
     }
   ],
   "logging_steps": 500,
@@ -200,12 +276,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.216931469785039e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 14910,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.6465459423205902e-05,
       "loss": 0.0164,
       "step": 10000
+    },
+    {
+      "epoch": 3.52112676056338,
+      "grad_norm": 0.18832048773765564,
+      "learning_rate": 1.4788732394366198e-05,
+      "loss": 0.0145,
+      "step": 10500
+    },
+    {
+      "epoch": 3.6887994634473507,
+      "grad_norm": 0.2905516028404236,
+      "learning_rate": 1.3112005365526492e-05,
+      "loss": 0.0136,
+      "step": 11000
+    },
+    {
+      "epoch": 3.8564721663313213,
+      "grad_norm": 0.187967449426651,
+      "learning_rate": 1.1435278336686788e-05,
+      "loss": 0.0126,
+      "step": 11500
+    },
+    {
+      "epoch": 4.0,
+      "eval_gen_len": 100.0,
+      "eval_loss": 0.010295581072568893,
+      "eval_rouge1": 11.5414,
+      "eval_rouge2": 6.9713,
+      "eval_rougeL": 11.5097,
+      "eval_rougeLsum": 11.6223,
+      "eval_runtime": 1527.8852,
+      "eval_samples_per_second": 0.868,
+      "eval_steps_per_second": 0.217,
+      "step": 11928
+    },
+    {
+      "epoch": 4.0241448692152915,
+      "grad_norm": 0.2498534917831421,
+      "learning_rate": 9.758551307847083e-06,
+      "loss": 0.0112,
+      "step": 12000
+    },
+    {
+      "epoch": 4.1918175720992625,
+      "grad_norm": 0.2614792585372925,
+      "learning_rate": 8.08182427900738e-06,
+      "loss": 0.0083,
+      "step": 12500
+    },
+    {
+      "epoch": 4.359490274983233,
+      "grad_norm": 0.19220410287380219,
+      "learning_rate": 6.4050972501676726e-06,
+      "loss": 0.0081,
+      "step": 13000
+    },
+    {
+      "epoch": 4.527162977867203,
+      "grad_norm": 0.14213241636753082,
+      "learning_rate": 4.7283702213279675e-06,
+      "loss": 0.0074,
+      "step": 13500
+    },
+    {
+      "epoch": 4.694835680751174,
+      "grad_norm": 0.15386663377285004,
+      "learning_rate": 3.051643192488263e-06,
+      "loss": 0.0083,
+      "step": 14000
+    },
+    {
+      "epoch": 4.862508383635144,
+      "grad_norm": 0.3057624101638794,
+      "learning_rate": 1.3749161636485581e-06,
+      "loss": 0.0071,
+      "step": 14500
     }
   ],
   "logging_steps": 500,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0760159508037632e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null