Model save

Files changed (9) hide show

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.9190
 ## Model description
@@ -52,7 +52,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 2.8727        | 1.0   | 4    | 1.9190          |
 ### Framework versions

 This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.9179
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 2.8727        | 1.0   | 4    | 1.9179          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "gate_proj",
     "k_proj",
-    "down_proj",
     "q_proj",
     "v_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "up_proj",
     "q_proj",
+    "gate_proj",
     "v_proj",
+    "o_proj",
+    "down_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c7cf8e73622f67f47852fe1653c34f642065a261c76b73bbb244557d14c848d
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:d90e045b737c4a6531b4e11d358c56be4e34ff1f843c5b731db280a87c770a5e
 size 159967880

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 2.294321656227112,
-    "train_runtime": 13.2658,
     "train_samples": 10,
-    "train_samples_per_second": 0.754,
-    "train_steps_per_second": 0.302
 }

 {
     "epoch": 1.0,
+    "train_loss": 2.294134736061096,
+    "train_runtime": 13.2039,
     "train_samples": 10,
+    "train_samples_per_second": 0.757,
+    "train_steps_per_second": 0.303
 }

config.json CHANGED Viewed

@@ -20,7 +20,7 @@
   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
-    "bnb_4bit_compute_dtype": "bfloat16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": false,
     "llm_int8_enable_fp32_cpu_offload": false,
@@ -35,7 +35,6 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float16",
   "transformers_version": "4.38.2",
   "use_cache": true,
   "vocab_size": 32000

   "quantization_config": {
     "_load_in_4bit": true,
     "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "float16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": false,
     "llm_int8_enable_fp32_cpu_offload": false,
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "transformers_version": "4.38.2",
   "use_cache": true,
   "vocab_size": 32000

runs/Apr22_06-00-01_SYS-4029GP-TRT/events.out.tfevents.1713736812.SYS-4029GP-TRT.1197881.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7f3bc98e3cf2a0d13548bec631965604a1ae8784e49241b2abd9811f5a9e9f
+size 5930

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 2.294321656227112,
-    "train_runtime": 13.2658,
     "train_samples": 10,
-    "train_samples_per_second": 0.754,
-    "train_steps_per_second": 0.302
 }

 {
     "epoch": 1.0,
+    "train_loss": 2.294134736061096,
+    "train_runtime": 13.2039,
     "train_samples": 10,
+    "train_samples_per_second": 0.757,
+    "train_steps_per_second": 0.303
 }

trainer_state.json CHANGED Viewed

@@ -10,16 +10,16 @@
   "log_history": [
     {
       "epoch": 0.25,
-      "grad_norm": 60887.75,
       "learning_rate": 0.0002,
       "loss": 2.8727,
       "step": 1
     },
     {
       "epoch": 1.0,
-      "eval_loss": 1.9190164804458618,
-      "eval_runtime": 1.274,
-      "eval_samples_per_second": 7.849,
       "eval_steps_per_second": 0.785,
       "step": 4
     },
@@ -27,10 +27,10 @@
       "epoch": 1.0,
       "step": 4,
       "total_flos": 112033940987904.0,
-      "train_loss": 2.294321656227112,
-      "train_runtime": 13.2658,
-      "train_samples_per_second": 0.754,
-      "train_steps_per_second": 0.302
     }
   ],
   "logging_steps": 5,

   "log_history": [
     {
       "epoch": 0.25,
+      "grad_norm": 60655.65234375,
       "learning_rate": 0.0002,
       "loss": 2.8727,
       "step": 1
     },
     {
       "epoch": 1.0,
+      "eval_loss": 1.9178766012191772,
+      "eval_runtime": 1.2746,
+      "eval_samples_per_second": 7.846,
       "eval_steps_per_second": 0.785,
       "step": 4
     },
       "epoch": 1.0,
       "step": 4,
       "total_flos": 112033940987904.0,
+      "train_loss": 2.294134736061096,
+      "train_runtime": 13.2039,
+      "train_samples_per_second": 0.757,
+      "train_steps_per_second": 0.303
     }
   ],
   "logging_steps": 5,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:45ee5f56891ea0f726b72278063ca89172602a534d3ef1a13c3034b6193fb170
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8a365241bc54220fb3585734b5fd7590bc33327f68c094a81b83301188da5fa
 size 4984