Training in progress, step 380, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +95 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38380338c84497aec8a18e496c8007491081e1964e3da7d16bd8dffe23468bbd
 size 368988278

 version https://git-lfs.github.com/spec/v1
+oid sha256:28dcd3b5382c79a069addedebdf143f227f44ee75cffadd0736ff309183f39ea
 size 368988278

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:30d58cbd13a4b29b5de718babe79e402bafd791f6e65d14ebd2deeb08aeeb6b2
 size 1107079290

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e7e6c876de6760c3f5951c7d6523996217801dcfa744d5a5e3060b6011af62b
 size 1107079290

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:249f286c7b5552b91a0e91534a1db9e637809ff969c652db0450f84ab559237f
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:d37d146a2261fdeb9c832f15d6c639749fec500eedd688c406191cf349cd4b31
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.309914642153644,
   "eval_steps": 250,
-  "global_step": 250,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -190,6 +190,97 @@
       "eval_samples_per_second": 1400.28,
       "eval_steps_per_second": 21.984,
       "step": 250
     }
   ],
   "logging_steps": 10,
@@ -204,12 +295,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.967181164570214e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.9927774130006566,
   "eval_steps": 250,
+  "global_step": 380,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 1400.28,
       "eval_steps_per_second": 21.984,
       "step": 250
+    },
+    {
+      "epoch": 1.3624425476034143,
+      "grad_norm": 5.1875,
+      "learning_rate": 4.973332239271355e-05,
+      "loss": 4.213,
+      "step": 260
+    },
+    {
+      "epoch": 1.4149704530531846,
+      "grad_norm": 4.5625,
+      "learning_rate": 4.9723065561664074e-05,
+      "loss": 4.2137,
+      "step": 270
+    },
+    {
+      "epoch": 1.4674983585029546,
+      "grad_norm": 6.1875,
+      "learning_rate": 4.971280873061459e-05,
+      "loss": 4.2314,
+      "step": 280
+    },
+    {
+      "epoch": 1.5200262639527249,
+      "grad_norm": 4.60546875,
+      "learning_rate": 4.970255189956511e-05,
+      "loss": 4.3171,
+      "step": 290
+    },
+    {
+      "epoch": 1.572554169402495,
+      "grad_norm": 5.1640625,
+      "learning_rate": 4.969229506851563e-05,
+      "loss": 4.2062,
+      "step": 300
+    },
+    {
+      "epoch": 1.6250820748522652,
+      "grad_norm": 4.95703125,
+      "learning_rate": 4.9682038237466155e-05,
+      "loss": 4.2513,
+      "step": 310
+    },
+    {
+      "epoch": 1.6776099803020355,
+      "grad_norm": 4.98046875,
+      "learning_rate": 4.9671781406416677e-05,
+      "loss": 4.2306,
+      "step": 320
+    },
+    {
+      "epoch": 1.7301378857518057,
+      "grad_norm": 4.96875,
+      "learning_rate": 4.96615245753672e-05,
+      "loss": 4.1554,
+      "step": 330
+    },
+    {
+      "epoch": 1.782665791201576,
+      "grad_norm": 4.78515625,
+      "learning_rate": 4.965126774431772e-05,
+      "loss": 4.1829,
+      "step": 340
+    },
+    {
+      "epoch": 1.835193696651346,
+      "grad_norm": 5.61328125,
+      "learning_rate": 4.9641010913268236e-05,
+      "loss": 4.21,
+      "step": 350
+    },
+    {
+      "epoch": 1.887721602101116,
+      "grad_norm": 5.38671875,
+      "learning_rate": 4.9630754082218764e-05,
+      "loss": 4.2034,
+      "step": 360
+    },
+    {
+      "epoch": 1.9402495075508863,
+      "grad_norm": 5.2578125,
+      "learning_rate": 4.962049725116928e-05,
+      "loss": 4.2066,
+      "step": 370
+    },
+    {
+      "epoch": 1.9927774130006566,
+      "grad_norm": 5.6484375,
+      "learning_rate": 4.96102404201198e-05,
+      "loss": 4.1512,
+      "step": 380
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.0599195412046807e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null