Training in progress, step 800, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +49 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bbedb7a31d95d8d1b2490b8b28c441ca323e99c258d1384164387363cfdac1a
 size 161533160

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba09a28dfe6c190885d36a748d6d4f606001c428e0ca75dc33ab0d4e2d44b997
 size 161533160

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1add654e0ea460afcfd9053beb5eaeb1ce2bc3f9e227ab20eaa85fbc9c38c987
 size 323298107

 version https://git-lfs.github.com/spec/v1
+oid sha256:2fe0ea1fb40a0dbff711bf22868ea28098368008bf45c321edc6170d0ca527c7
 size 323298107

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:67f523d06e9b3de29a9502a254755eca5e74e9ceb5a4017f452eb1406d0f0e41
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:2632fce5aa2eaa97af9fae3ad958ab987fb27ed2b126487356b218293af2592e
 size 14917

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:746b3116a1b87bb2926b4d95142b6e208878ab769efed0bb65ee08710fa6901d
 size 14917

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e2ce1fb1d65981d4c19d899ab47cdabeff74c74db29571a6c3208a13f2fe09f
 size 14917

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:820ac3603f892e5dcd7b46df919a32995e52534ba0c4399503f0836c348452fc
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:f763907f0cbf59aa161087688f1a78f56f71e53e8dfeea7ff2de48b9be6de630
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69e8a4610861bc7ffbbca01ffefc787d1ef5581d77904a0eb29981d8a999b0dd
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6e8826d343e1d8f8985fc7998cb7ad286f4d69b1f5a3396d943985a019b6928
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "best_global_step": 750,
-  "best_metric": 0.28498101234436035,
-  "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-750",
-  "epoch": 1.769321533923304,
   "eval_steps": 50,
-  "global_step": 750,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -653,6 +653,49 @@
       "eval_samples_per_second": 3.138,
       "eval_steps_per_second": 0.785,
       "step": 750
     }
   ],
   "logging_steps": 10,
@@ -672,7 +715,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.7992304177656627e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": 800,
+  "best_metric": 0.2848590016365051,
+  "best_model_checkpoint": "/kaggle/working/Llama-Factory-out/checkpoint-800",
+  "epoch": 1.887315634218289,
   "eval_steps": 50,
+  "global_step": 800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 3.138,
       "eval_steps_per_second": 0.785,
       "step": 750
+    },
+    {
+      "epoch": 1.7929203539823009,
+      "grad_norm": 0.9809938669204712,
+      "learning_rate": 5.643573569362806e-07,
+      "loss": 0.1819,
+      "step": 760
+    },
+    {
+      "epoch": 1.8165191740412978,
+      "grad_norm": 3.2049953937530518,
+      "learning_rate": 4.4571905531273924e-07,
+      "loss": 0.2087,
+      "step": 770
+    },
+    {
+      "epoch": 1.840117994100295,
+      "grad_norm": 1.921066403388977,
+      "learning_rate": 3.4073337635982153e-07,
+      "loss": 0.2,
+      "step": 780
+    },
+    {
+      "epoch": 1.8637168141592921,
+      "grad_norm": 1.9557863473892212,
+      "learning_rate": 2.495782788388865e-07,
+      "loss": 0.1669,
+      "step": 790
+    },
+    {
+      "epoch": 1.887315634218289,
+      "grad_norm": 3.648777484893799,
+      "learning_rate": 1.7240827761718658e-07,
+      "loss": 0.1845,
+      "step": 800
+    },
+    {
+      "epoch": 1.887315634218289,
+      "eval_loss": 0.2848590016365051,
+      "eval_runtime": 376.2036,
+      "eval_samples_per_second": 3.137,
+      "eval_steps_per_second": 0.784,
+      "step": 800
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.921058365023191e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null