Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/alicogniai-cognichip/huggingface/runs/fp28nxtc)
 This model was trained with SFT.

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/alicogniai-cognichip/huggingface/runs/ugrxdaei)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,8 +1,13 @@
 {
-    "total_flos": 333840384000.0,
-    "train_loss": 0.35875171422958374,
-    "train_runtime": 14.4271,
     "train_samples": 1097,
-    "train_samples_per_second": 5.407,
-    "train_steps_per_second": 0.069
 }

 {
+    "eval_loss": 0.2394731640815735,
+    "eval_runtime": 0.9108,
+    "eval_samples": 843,
+    "eval_samples_per_second": 59.289,
+    "eval_steps_per_second": 2.196,
+    "total_flos": 3345047902748672.0,
+    "train_loss": 0.18449299203024971,
+    "train_runtime": 21.9388,
     "train_samples": 1097,
+    "train_samples_per_second": 3.555,
+    "train_steps_per_second": 0.41
 }

eval_results.json CHANGED Viewed

@@ -1,8 +1,7 @@
 {
-    "epoch": 1.0,
-    "eval_loss": 0.7804026007652283,
-    "eval_runtime": 0.7988,
-    "eval_samples": 100,
-    "eval_samples_per_second": 160.238,
-    "eval_steps_per_second": 5.007
 }

 {
+    "eval_loss": 0.2394731640815735,
+    "eval_runtime": 0.9108,
+    "eval_samples": 843,
+    "eval_samples_per_second": 59.289,
+    "eval_steps_per_second": 2.196
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dd924a11b4c220f385b51ffa522daea7c9f3d850e31b162bb5661df483c6d3ee
-size 3087467144

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2baa415c431b3e0c69ca0065979e0ee8127c2f06f44ac2b4c68d8c6b5241eeb
+size 3554214752

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 333840384000.0,
-    "train_loss": 0.35875171422958374,
-    "train_runtime": 14.4271,
     "train_samples": 1097,
-    "train_samples_per_second": 5.407,
-    "train_steps_per_second": 0.069
 }

 {
+    "total_flos": 3345047902748672.0,
+    "train_loss": 0.18449299203024971,
+    "train_runtime": 21.9388,
     "train_samples": 1097,
+    "train_samples_per_second": 3.555,
+    "train_steps_per_second": 0.41
 }

trainer_state.json CHANGED Viewed

@@ -1,26 +1,34 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0,
   "eval_steps": 100,
-  "global_step": 1,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "mean_token_accuracy": 0.9492798329413153,
-      "step": 1,
-      "total_flos": 333840384000.0,
-      "train_loss": 0.35875171422958374,
-      "train_runtime": 14.4271,
-      "train_samples_per_second": 5.407,
-      "train_steps_per_second": 0.069
     }
   ],
   "logging_steps": 5,
-  "max_steps": 1,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
@@ -36,8 +44,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 333840384000.0,
-  "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9230769230769231,
   "eval_steps": 100,
+  "global_step": 9,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 0.5559384822845459,
+      "learning_rate": 1e-05,
+      "loss": 0.2367,
+      "mean_token_accuracy": 0.9557622533964742,
+      "step": 5
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "mean_token_accuracy": 0.9720920825971266,
+      "step": 9,
+      "total_flos": 3345047902748672.0,
+      "train_loss": 0.18449299203024971,
+      "train_runtime": 21.9388,
+      "train_samples_per_second": 3.555,
+      "train_steps_per_second": 0.41
     }
   ],
   "logging_steps": 5,
+  "max_steps": 9,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 3345047902748672.0,
+  "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a02bff04ef8af8e499dbbcf331138d7ad9540070c8f083abe6f6e2e56b8004a2
 size 7160

 version https://git-lfs.github.com/spec/v1
+oid sha256:9581172b8deaf0ef914db42abf801181238df10634ba664216aaa426665059c7
 size 7160