Training in progress, step 1350, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/lora_top/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/lora_top/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:544243a4ad2d25dac28345763c4b1a3a8c1739a2bc60868444f3034f5c58a1e6
 size 6299784

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc3a67f4de1685ce6a0fced5c481b644514ade913e29a302672e9588e575aaad
 size 6299784

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0e5b197894eaf041b41b2f6fb6f957116cc9a9b767ef8d77107f2a93d846965
 size 12623930

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c3361d74875a43ac74d1859dc2aa87429b4f648ae5c9304cdab88e987b01c49
 size 12623930

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5dc19b6e4aa6d96d209bc3cde10ac40343788cc8e21e98e8b0ad66316abe87b9
 size 14180

 version https://git-lfs.github.com/spec/v1
+oid sha256:d242322c5678470cae524c621709ef41118946651d32e327740afb650f163702
 size 14180

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:80737f34367bed0f31ec1eeecad8be2c3717a20421ad2a8f693a5747cb780b5c
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:d682cfc5aa181fdf75f0f7c385234b0db148db5e71a3fbb7d749d518ba02734f
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 4.128114223480225,
-  "best_model_checkpoint": "./output/checkpoint-1200",
-  "epoch": 1.8518518518518519,
   "eval_steps": 150,
-  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -911,6 +911,119 @@
       "eval_samples_per_second": 39.057,
       "eval_steps_per_second": 39.057,
       "step": 1200
     }
   ],
   "logging_steps": 10,
@@ -930,7 +1043,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4599042483142656.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 4.089999198913574,
+  "best_model_checkpoint": "./output/checkpoint-1350",
+  "epoch": 2.0833333333333335,
   "eval_steps": 150,
+  "global_step": 1350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 39.057,
       "eval_steps_per_second": 39.057,
       "step": 1200
+    },
+    {
+      "epoch": 1.867283950617284,
+      "grad_norm": 3.017685651779175,
+      "learning_rate": 8.786371382380525e-05,
+      "loss": 4.0029,
+      "step": 1210
+    },
+    {
+      "epoch": 1.882716049382716,
+      "grad_norm": 3.601710081100464,
+      "learning_rate": 8.765357330018053e-05,
+      "loss": 3.9062,
+      "step": 1220
+    },
+    {
+      "epoch": 1.8981481481481481,
+      "grad_norm": 3.4925272464752197,
+      "learning_rate": 8.744188498563639e-05,
+      "loss": 3.8948,
+      "step": 1230
+    },
+    {
+      "epoch": 1.9135802469135803,
+      "grad_norm": 2.6413700580596924,
+      "learning_rate": 8.722865758185034e-05,
+      "loss": 4.0399,
+      "step": 1240
+    },
+    {
+      "epoch": 1.9290123456790123,
+      "grad_norm": 3.077667236328125,
+      "learning_rate": 8.701389985376575e-05,
+      "loss": 4.0094,
+      "step": 1250
+    },
+    {
+      "epoch": 1.9444444444444444,
+      "grad_norm": 3.1330454349517822,
+      "learning_rate": 8.679762062923174e-05,
+      "loss": 4.0173,
+      "step": 1260
+    },
+    {
+      "epoch": 1.9598765432098766,
+      "grad_norm": 2.7747910022735596,
+      "learning_rate": 8.657982879864005e-05,
+      "loss": 4.0111,
+      "step": 1270
+    },
+    {
+      "epoch": 1.9753086419753085,
+      "grad_norm": 2.432088851928711,
+      "learning_rate": 8.636053331455984e-05,
+      "loss": 4.0052,
+      "step": 1280
+    },
+    {
+      "epoch": 1.9907407407407407,
+      "grad_norm": 3.8459959030151367,
+      "learning_rate": 8.613974319136955e-05,
+      "loss": 3.9691,
+      "step": 1290
+    },
+    {
+      "epoch": 2.006172839506173,
+      "grad_norm": 3.38808536529541,
+      "learning_rate": 8.591746750488636e-05,
+      "loss": 3.8768,
+      "step": 1300
+    },
+    {
+      "epoch": 2.021604938271605,
+      "grad_norm": 4.70631742477417,
+      "learning_rate": 8.569371539199313e-05,
+      "loss": 3.8564,
+      "step": 1310
+    },
+    {
+      "epoch": 2.037037037037037,
+      "grad_norm": 2.96028208732605,
+      "learning_rate": 8.546849605026287e-05,
+      "loss": 3.913,
+      "step": 1320
+    },
+    {
+      "epoch": 2.052469135802469,
+      "grad_norm": 3.452777624130249,
+      "learning_rate": 8.524181873758057e-05,
+      "loss": 3.7249,
+      "step": 1330
+    },
+    {
+      "epoch": 2.067901234567901,
+      "grad_norm": 4.151051044464111,
+      "learning_rate": 8.501369277176273e-05,
+      "loss": 3.8788,
+      "step": 1340
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "grad_norm": 3.2513532638549805,
+      "learning_rate": 8.478412753017431e-05,
+      "loss": 3.875,
+      "step": 1350
+    },
+    {
+      "epoch": 2.0833333333333335,
+      "eval_loss": 4.089999198913574,
+      "eval_runtime": 13.4234,
+      "eval_samples_per_second": 37.248,
+      "eval_steps_per_second": 37.248,
+      "step": 1350
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 5153548116885504.0,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null