Training in progress, step 80, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +59 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09db3585ef14eb3513d30889662c80d7633ff3cd63316b82ef586dfb3b8d7b3e
 size 73911112

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4627a384ccfb90bc59a01e8a25eb50a60e5c9d9a7cbc4781a18e1ed2574854e
 size 73911112

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e71be3572f6a22a56350f714efc69d9efa4b44853add45b77f9c1f30917185b6
 size 148053627

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca2150c51d63f89d1bc1d324172bc8d004bf72d5ec3681b8fa7f23855405bf6f
 size 148053627

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7a8c328066465d046257df5f5a7cb6a0fb0a5264c4955cc6474ec70ef06d414
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:97ac64e05ebf46af43b314fca9573cea163c2749e9f4c0fcee8ac426f60f5872
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66992a317492aac8acf09f50982fcc03e33ec7b4ccd6cedceda25e040f1d8941
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:254aa974d264413ca1b60d5980136d8531f6cb9e9b28340b9a2daa2496d48c4c
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9953d7880628d69ca8ebcf3260c665a4b6406ffee3f2366f52cd98bb07b2bfff
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:f784d5566457719ec24fd9e3c13871706202c23a784efd0979d8609621573572
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0533333333333332,
   "eval_steps": 100,
-  "global_step": 40,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -64,6 +64,62 @@
       "learning_rate": 0.0004184239109116393,
       "loss": 0.7014,
       "step": 40
     }
   ],
   "logging_steps": 5,
@@ -83,7 +139,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2579880952725504.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 2.1066666666666665,
   "eval_steps": 100,
+  "global_step": 80,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004184239109116393,
       "loss": 0.7014,
       "step": 40
+    },
+    {
+      "epoch": 1.1866666666666668,
+      "grad_norm": 0.13756456971168518,
+      "learning_rate": 0.00038809124324012647,
+      "loss": 0.6655,
+      "step": 45
+    },
+    {
+      "epoch": 1.32,
+      "grad_norm": 0.14693589508533478,
+      "learning_rate": 0.00035449008622169586,
+      "loss": 0.6315,
+      "step": 50
+    },
+    {
+      "epoch": 1.4533333333333334,
+      "grad_norm": 0.16421127319335938,
+      "learning_rate": 0.0003184157475180208,
+      "loss": 0.6525,
+      "step": 55
+    },
+    {
+      "epoch": 1.5866666666666667,
+      "grad_norm": 0.1636400669813156,
+      "learning_rate": 0.00028072207266617854,
+      "loss": 0.6398,
+      "step": 60
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 0.16314196586608887,
+      "learning_rate": 0.00024230123536095747,
+      "loss": 0.6836,
+      "step": 65
+    },
+    {
+      "epoch": 1.8533333333333335,
+      "grad_norm": 0.1881810426712036,
+      "learning_rate": 0.0002040626205458574,
+      "loss": 0.6227,
+      "step": 70
+    },
+    {
+      "epoch": 1.9866666666666668,
+      "grad_norm": 0.1938481628894806,
+      "learning_rate": 0.00016691130013008512,
+      "loss": 0.6343,
+      "step": 75
+    },
+    {
+      "epoch": 2.1066666666666665,
+      "grad_norm": 0.18422311544418335,
+      "learning_rate": 0.0001317266107909975,
+      "loss": 0.6193,
+      "step": 80
     }
   ],
   "logging_steps": 5,
       "attributes": {}
     }
   },
+  "total_flos": 5159761905451008.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null