Training in progress, step 14910, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +80 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a83f1be33217b1b069056dacb0ff8ae37760d3cda76a1ebd1a3907d70c03df0
 size 959732416

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a5bc33aedf743d24362840e92ed9fd65f57a07b6f3028844387a1394db71d39
 size 959732416

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e60223a37bbcf405c01528488fe445b676105c6f23500b842ac3f6ab1b5b6eb
 size 1919732806

 version https://git-lfs.github.com/spec/v1
+oid sha256:d18d09ee54385bab2a21626dc12365516fd785fee97f305ca844268c8eb24254
 size 1919732806

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95bd719bf1326b1281aa016ff1c8f48d41524f5feaf22af9267d2acf3daac222
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1e900a969e8846016f93435d082cd2003d0d6d8a749d2feaf16f2a2d17a65f38
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddcca23b21d6bdd8ad51b065d4aa1989126b3480f39bbd5a003d90f3b5366d5d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9e84bdec20434ed7e461167492f3ba162916331ef4833923d73a78f7731f8135
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.35345405767941,
   "eval_steps": 500,
-  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -186,6 +186,82 @@
       "learning_rate": 1.6465459423205902e-05,
       "loss": 0.0166,
       "step": 10000
     }
   ],
   "logging_steps": 500,
@@ -200,12 +276,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 7.216931469785039e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 5.0,
   "eval_steps": 500,
+  "global_step": 14910,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.6465459423205902e-05,
       "loss": 0.0166,
       "step": 10000
+    },
+    {
+      "epoch": 3.52112676056338,
+      "grad_norm": 0.2037188857793808,
+      "learning_rate": 1.4788732394366198e-05,
+      "loss": 0.0131,
+      "step": 10500
+    },
+    {
+      "epoch": 3.6887994634473507,
+      "grad_norm": 0.46498414874076843,
+      "learning_rate": 1.3112005365526492e-05,
+      "loss": 0.0142,
+      "step": 11000
+    },
+    {
+      "epoch": 3.8564721663313213,
+      "grad_norm": 0.1339973509311676,
+      "learning_rate": 1.1435278336686788e-05,
+      "loss": 0.0131,
+      "step": 11500
+    },
+    {
+      "epoch": 4.0,
+      "eval_gen_len": 100.0,
+      "eval_loss": 0.008116796612739563,
+      "eval_rouge1": 10.8352,
+      "eval_rouge2": 5.9986,
+      "eval_rougeL": 10.7776,
+      "eval_rougeLsum": 10.8786,
+      "eval_runtime": 1543.4295,
+      "eval_samples_per_second": 0.859,
+      "eval_steps_per_second": 0.215,
+      "step": 11928
+    },
+    {
+      "epoch": 4.0241448692152915,
+      "grad_norm": 0.11288804560899734,
+      "learning_rate": 9.758551307847083e-06,
+      "loss": 0.0113,
+      "step": 12000
+    },
+    {
+      "epoch": 4.1918175720992625,
+      "grad_norm": 0.18565210700035095,
+      "learning_rate": 8.08182427900738e-06,
+      "loss": 0.0088,
+      "step": 12500
+    },
+    {
+      "epoch": 4.359490274983233,
+      "grad_norm": 0.2599884867668152,
+      "learning_rate": 6.4050972501676726e-06,
+      "loss": 0.0083,
+      "step": 13000
+    },
+    {
+      "epoch": 4.527162977867203,
+      "grad_norm": 0.11985205858945847,
+      "learning_rate": 4.7283702213279675e-06,
+      "loss": 0.008,
+      "step": 13500
+    },
+    {
+      "epoch": 4.694835680751174,
+      "grad_norm": 0.10148580372333527,
+      "learning_rate": 3.051643192488263e-06,
+      "loss": 0.0079,
+      "step": 14000
+    },
+    {
+      "epoch": 4.862508383635144,
+      "grad_norm": 0.19774800539016724,
+      "learning_rate": 1.3749161636485581e-06,
+      "loss": 0.007,
+      "step": 14500
     }
   ],
   "logging_steps": 500,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0760159508037632e+19,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null