Training in progress, step 200, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +49 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e2373407f411bdefcd1c1619d4337dc75c28545aecea714e87ab0192268b19c0
 size 161533160

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a95a9f8b7fcf070707492d0022f4160abddf649685c62bd0acb0b2191db9f5e
 size 161533160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8045b3960c09346b96cc68600f0bdf0804cab1858372d8860af22c20ef37d7a5
 size 323298107

 version https://git-lfs.github.com/spec/v1
+oid sha256:e89b0e30ab40bd6b04823383a4786638df8d21340402e1007a60c2792997f10f
 size 323298107

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9fd9ac6b4a5aea74e8d8f24c3e96e1af47905735c7dc53e6854b8615703b8ae3
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a57f5c296f52913583d889b2b838ab564aea78743337131e3f761d182b38830
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c86ebf50ac9f9127531a301e6ae85c1c8d9423c1122d72f8eafb77301bb870e2
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:260ec53777947eef3f02712019ac0cb83b4c23f3edf0a8e24296a881ab7c1aa6
 size 14917

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09b104d11e23c43fff8ed10992448da9d6b5482113779cdd276876818600dad4
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0e56cd39bf95ce68a94b21421aa9b66e0b651c8e26272987fddafbf9f0a90ff
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b2bd53166d581902513a9666139854e88807312a6b89c8ab8f4eaed17fb5e63
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:eeaf5d2be120879c36c1f532c4608c5e2ddaf2ba45ca3f2b9987ec0786c20625
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "best_global_step": 150,
-  "best_metric": 0.32990705966949463,
-  "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-150",
-  "epoch": 0.35398230088495575,
   "eval_steps": 50,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -137,6 +137,49 @@
       "eval_samples_per_second": 3.14,
       "eval_steps_per_second": 0.785,
       "step": 150
     }
   ],
   "logging_steps": 10,
@@ -156,7 +199,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.620038096243917e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": 200,
+  "best_metric": 0.3187030255794525,
+  "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-200",
+  "epoch": 0.471976401179941,
   "eval_steps": 50,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.14,
       "eval_steps_per_second": 0.785,
       "step": 150
+    },
+    {
+      "epoch": 0.3775811209439528,
+      "grad_norm": 2.648970603942871,
+      "learning_rate": 1.660849203054426e-05,
+      "loss": 0.1936,
+      "step": 160
+    },
+    {
+      "epoch": 0.40117994100294985,
+      "grad_norm": 2.6948256492614746,
+      "learning_rate": 1.6496656341488462e-05,
+      "loss": 0.2291,
+      "step": 170
+    },
+    {
+      "epoch": 0.4247787610619469,
+      "grad_norm": 3.7231318950653076,
+      "learning_rate": 1.6371265707875017e-05,
+      "loss": 0.2248,
+      "step": 180
+    },
+    {
+      "epoch": 0.44837758112094395,
+      "grad_norm": 1.2238047122955322,
+      "learning_rate": 1.6232532676425206e-05,
+      "loss": 0.1789,
+      "step": 190
+    },
+    {
+      "epoch": 0.471976401179941,
+      "grad_norm": 3.25620174407959,
+      "learning_rate": 1.608069241024588e-05,
+      "loss": 0.3261,
+      "step": 200
+    },
+    {
+      "epoch": 0.471976401179941,
+      "eval_loss": 0.3187030255794525,
+      "eval_runtime": 376.0322,
+      "eval_samples_per_second": 3.138,
+      "eval_steps_per_second": 0.785,
+      "step": 200
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.822198368652493e+16,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null