Training in progress, step 3750, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5e0ee8d2c58b69427f284488a89998c72e8bc4d55f0e05511449056f38778ee4
 size 264070024

 version https://git-lfs.github.com/spec/v1
+oid sha256:39266e7c64916445e7e1ffff7265128efcb14a748bf4d12222a6942b9efa8b29
 size 264070024

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df98e4ef5bda3ea5cd69e656addd52fa660b9f056128429fd1d464f169749690
 size 510816186

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f0e7d5ff1fdaf9d83fb8c2078034a345f95639cc95b7cc4e41a9b5157c9280f
 size 510816186

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65aab9aa5ddc29e8a011ec08503d5b4039cc9f073875c54a893f84d1988646ad
 size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:c28553b4def846f33c1be5403cbde93da158c64099cf7041a0cc043a46e7afc1
 size 14180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77a323baafebfbff16ec081ee1c71fdd1ae4559d76e02b5c97760983e07befc3
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:6fee260ff98ca59616802fa3adc6624edb5c906343c2fcd860cb04b54c36a948
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.084647297859192,
   "best_model_checkpoint": "./output/checkpoint-2850",
-  "epoch": 2.5192442267319803,
   "eval_steps": 150,
-  "global_step": 3600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2719,6 +2719,119 @@
       "eval_samples_per_second": 14.759,
       "eval_steps_per_second": 14.759,
       "step": 3600
     }
   ],
   "logging_steps": 10,
@@ -2738,7 +2851,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.1982155608264704e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 1.084647297859192,
   "best_model_checkpoint": "./output/checkpoint-2850",
+  "epoch": 2.624212736179146,
   "eval_steps": 150,
+  "global_step": 3750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.759,
       "eval_steps_per_second": 14.759,
       "step": 3600
+    },
+    {
+      "epoch": 2.526242127361791,
+      "grad_norm": 5.060881614685059,
+      "learning_rate": 1.8575520105593817e-05,
+      "loss": 0.3923,
+      "step": 3610
+    },
+    {
+      "epoch": 2.5332400279916025,
+      "grad_norm": 6.2594709396362305,
+      "learning_rate": 1.8326822041411524e-05,
+      "loss": 0.4288,
+      "step": 3620
+    },
+    {
+      "epoch": 2.540237928621414,
+      "grad_norm": 3.7940304279327393,
+      "learning_rate": 1.807942593751973e-05,
+      "loss": 0.3647,
+      "step": 3630
+    },
+    {
+      "epoch": 2.5472358292512247,
+      "grad_norm": 5.75860595703125,
+      "learning_rate": 1.783334196340331e-05,
+      "loss": 0.3543,
+      "step": 3640
+    },
+    {
+      "epoch": 2.5542337298810356,
+      "grad_norm": 4.745769500732422,
+      "learning_rate": 1.758858023461059e-05,
+      "loss": 0.3768,
+      "step": 3650
+    },
+    {
+      "epoch": 2.561231630510847,
+      "grad_norm": 5.631198406219482,
+      "learning_rate": 1.7345150812337564e-05,
+      "loss": 0.3826,
+      "step": 3660
+    },
+    {
+      "epoch": 2.5682295311406578,
+      "grad_norm": 5.964677333831787,
+      "learning_rate": 1.7103063703014372e-05,
+      "loss": 0.3529,
+      "step": 3670
+    },
+    {
+      "epoch": 2.575227431770469,
+      "grad_norm": 5.345946311950684,
+      "learning_rate": 1.6862328857893854e-05,
+      "loss": 0.3153,
+      "step": 3680
+    },
+    {
+      "epoch": 2.58222533240028,
+      "grad_norm": 4.739876747131348,
+      "learning_rate": 1.66229561726426e-05,
+      "loss": 0.3521,
+      "step": 3690
+    },
+    {
+      "epoch": 2.589223233030091,
+      "grad_norm": 5.451272964477539,
+      "learning_rate": 1.6384955486934156e-05,
+      "loss": 0.3648,
+      "step": 3700
+    },
+    {
+      "epoch": 2.596221133659902,
+      "grad_norm": 5.133406162261963,
+      "learning_rate": 1.614833658404454e-05,
+      "loss": 0.4097,
+      "step": 3710
+    },
+    {
+      "epoch": 2.603219034289713,
+      "grad_norm": 5.587733745574951,
+      "learning_rate": 1.5913109190450032e-05,
+      "loss": 0.3669,
+      "step": 3720
+    },
+    {
+      "epoch": 2.6102169349195243,
+      "grad_norm": 4.743875026702881,
+      "learning_rate": 1.567928297542749e-05,
+      "loss": 0.3723,
+      "step": 3730
+    },
+    {
+      "epoch": 2.617214835549335,
+      "grad_norm": 5.686123847961426,
+      "learning_rate": 1.544686755065677e-05,
+      "loss": 0.3921,
+      "step": 3740
+    },
+    {
+      "epoch": 2.624212736179146,
+      "grad_norm": 6.688653469085693,
+      "learning_rate": 1.5215872469825682e-05,
+      "loss": 0.4218,
+      "step": 3750
+    },
+    {
+      "epoch": 2.624212736179146,
+      "eval_loss": 1.1890102624893188,
+      "eval_runtime": 33.4721,
+      "eval_samples_per_second": 14.938,
+      "eval_steps_per_second": 14.938,
+      "step": 3750
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.372563304182989e+17,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null