End of training

Files changed (6) hide show

README.md CHANGED Viewed

@@ -3,6 +3,10 @@ library_name: transformers
 license: apache-2.0
 base_model: mistralai/Mistral-7B-Instruct-v0.1
 tags:
 - trl
 - sft
 - generated_from_trainer
@@ -19,6 +23,8 @@ should probably proofread and complete it, then remove this comment. -->
 # mistral_llama_2_code_math_3_full
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the generator dataset.
 ## Model description

 license: apache-2.0
 base_model: mistralai/Mistral-7B-Instruct-v0.1
 tags:
+- alignment-handbook
+- trl
+- sft
+- generated_from_trainer
 - trl
 - sft
 - generated_from_trainer
 # mistral_llama_2_code_math_3_full
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5239
 ## Model description

all_results.json CHANGED Viewed

@@ -1,9 +1,14 @@
 {
     "epoch": 0.9777777777777777,
     "total_flos": 2250831298560.0,
-    "train_loss": 0.6733812906525352,
-    "train_runtime": 124.7588,
     "train_samples": 3980,
-    "train_samples_per_second": 5.699,
-    "train_steps_per_second": 0.176
 }

 {
     "epoch": 0.9777777777777777,
+    "eval_loss": 0.5238552093505859,
+    "eval_runtime": 0.746,
+    "eval_samples": 20,
+    "eval_samples_per_second": 4.021,
+    "eval_steps_per_second": 1.34,
     "total_flos": 2250831298560.0,
+    "train_loss": 0.6714122566309842,
+    "train_runtime": 124.2809,
     "train_samples": 3980,
+    "train_samples_per_second": 5.721,
+    "train_steps_per_second": 0.177
 }

eval_results.json ADDED Viewed

+{
+    "epoch": 0.9777777777777777,
+    "eval_loss": 0.5238552093505859,
+    "eval_runtime": 0.746,
+    "eval_samples": 20,
+    "eval_samples_per_second": 4.021,
+    "eval_steps_per_second": 1.34
+}

runs/Jan20_00-46-11_dgx-a100-13/events.out.tfevents.1737330924.dgx-a100-13.995870.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:a380bbed4f0a431d172fc0ef7525a3c0b4bd08c91a74a554e35e0029e06f4fc3
+size 354

runs/Jan20_00-55-05_dgx-a100-14/events.out.tfevents.1737330919.dgx-a100-14.634646.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd08bc5b9fdc76af8629864471382822eb0a10f4f3d218b32a2315030428075b
+size 6206

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 0.9777777777777777,
     "total_flos": 2250831298560.0,
-    "train_loss": 0.6733812906525352,
-    "train_runtime": 124.7588,
     "train_samples": 3980,
-    "train_samples_per_second": 5.699,
-    "train_steps_per_second": 0.176
 }

 {
     "epoch": 0.9777777777777777,
     "total_flos": 2250831298560.0,
+    "train_loss": 0.6714122566309842,
+    "train_runtime": 124.2809,
     "train_samples": 3980,
+    "train_samples_per_second": 5.721,
+    "train_steps_per_second": 0.177
 }