End of training

Browse files

Files changed (7) hide show

README.md +27 -28
adapter_config.json +8 -8
adapter_model.safetensors +2 -2
runs/Sep26_00-00-07_sammie/events.out.tfevents.1727330530.sammie.423563.0 +3 -0
runs/Sep26_00-05-40_sammie/events.out.tfevents.1727330787.sammie.423563.1 +3 -0
runs/Sep26_00-16-01_sammie/events.out.tfevents.1727331385.sammie.430449.0 +3 -0
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -21,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [unsloth/tinyllama-chat-bnb-4bit](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.6823
 ## Model description
@@ -49,38 +49,37 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 1
-- mixed_precision_training: Native AMP
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 4.5606        | 0.0377 | 10   | 4.0606          |
-| 3.7693        | 0.0753 | 20   | 3.5539          |
-| 3.3915        | 0.1130 | 30   | 3.2022          |
-| 3.0938        | 0.1507 | 40   | 2.9560          |
-| 2.8692        | 0.1883 | 50   | 2.7576          |
-| 2.6774        | 0.2260 | 60   | 2.5706          |
-| 2.5012        | 0.2637 | 70   | 2.3805          |
-| 2.3332        | 0.3013 | 80   | 2.2277          |
-| 2.159         | 0.3390 | 90   | 2.1030          |
-| 2.0806        | 0.3766 | 100  | 2.0125          |
-| 1.9781        | 0.4143 | 110  | 1.9497          |
-| 1.9683        | 0.4520 | 120  | 1.9005          |
-| 1.917         | 0.4896 | 130  | 1.8584          |
-| 1.8551        | 0.5273 | 140  | 1.8224          |
-| 1.8121        | 0.5650 | 150  | 1.7910          |
-| 1.7998        | 0.6026 | 160  | 1.7673          |
-| 1.7484        | 0.6403 | 170  | 1.7486          |
-| 1.7221        | 0.6780 | 180  | 1.7331          |
-| 1.7171        | 0.7156 | 190  | 1.7207          |
-| 1.7103        | 0.7533 | 200  | 1.7108          |
-| 1.7086        | 0.7910 | 210  | 1.7025          |
-| 1.7083        | 0.8286 | 220  | 1.6955          |
-| 1.7065        | 0.8663 | 230  | 1.6907          |
-| 1.6829        | 0.9040 | 240  | 1.6864          |
-| 1.6892        | 0.9416 | 250  | 1.6838          |
-| 1.6985        | 0.9793 | 260  | 1.6823          |
 ### Framework versions

 This model is a fine-tuned version of [unsloth/tinyllama-chat-bnb-4bit](https://huggingface.co/unsloth/tinyllama-chat-bnb-4bit) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.3521
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - num_epochs: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 3.8522        | 0.0377 | 10   | 3.0444          |
+| 2.7085        | 0.0753 | 20   | 2.4069          |
+| 2.191         | 0.1130 | 30   | 2.0027          |
+| 1.8996        | 0.1507 | 40   | 1.8018          |
+| 1.7538        | 0.1883 | 50   | 1.6850          |
+| 1.6563        | 0.2260 | 60   | 1.6161          |
+| 1.6275        | 0.2637 | 70   | 1.5722          |
+| 1.5313        | 0.3013 | 80   | 1.5377          |
+| 1.5277        | 0.3390 | 90   | 1.5104          |
+| 1.5039        | 0.3766 | 100  | 1.4889          |
+| 1.4768        | 0.4143 | 110  | 1.4694          |
+| 1.4552        | 0.4520 | 120  | 1.4523          |
+| 1.4481        | 0.4896 | 130  | 1.4385          |
+| 1.4223        | 0.5273 | 140  | 1.4233          |
+| 1.4145        | 0.5650 | 150  | 1.4118          |
+| 1.4207        | 0.6026 | 160  | 1.4012          |
+| 1.4004        | 0.6403 | 170  | 1.3925          |
+| 1.4316        | 0.6780 | 180  | 1.3849          |
+| 1.3841        | 0.7156 | 190  | 1.3784          |
+| 1.3747        | 0.7533 | 200  | 1.3722          |
+| 1.388         | 0.7910 | 210  | 1.3665          |
+| 1.3508        | 0.8286 | 220  | 1.3623          |
+| 1.386         | 0.8663 | 230  | 1.3586          |
+| 1.3605        | 0.9040 | 240  | 1.3555          |
+| 1.369         | 0.9416 | 250  | 1.3534          |
+| 1.3645        | 0.9793 | 260  | 1.3521          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,19 +20,19 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
     "o_proj",
-    "mlp.gate_up_proj",
     "self_attn.o_proj.weight",
-    "up_proj",
-    "gate_proj",
     "mlp.down_proj",
-    "self_attn.qkv_proj.weight",
-    "v_proj",
-    "q_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
-  "use_rslora": false
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
+    "k_proj",
+    "mlp.gate_up_proj",
     "down_proj",
+    "v_proj",
+    "q_proj",
     "o_proj",
     "self_attn.o_proj.weight",
     "mlp.down_proj",
+    "up_proj",
+    "self_attn.qkv_proj.weight"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,
+  "use_rslora": true
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60d95b10b6e140a9626a7058d5038528f2ff80148dc4569b881db56052046509
-size 40

 version https://git-lfs.github.com/spec/v1
+oid sha256:56803c4d98022be7dfa3f8c0e1e22fc44c4994d771418a572448c62920ab292b
+size 403743472

runs/Sep26_00-00-07_sammie/events.out.tfevents.1727330530.sammie.423563.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7d328eeb1fd2ee1c738e7e41da038082fa2a499f4c138a80588772bb7046075
+size 6059

runs/Sep26_00-05-40_sammie/events.out.tfevents.1727330787.sammie.423563.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:06d71f55f938bbcfff0019190477660e8cec8bb67d5504d0ca5e4f580392b22b
+size 6059

runs/Sep26_00-16-01_sammie/events.out.tfevents.1727331385.sammie.430449.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb5ecb0e4177bd91562b8ea41ee32efba8d3a59ff9ee3d7150afc14a415e9f2f
+size 18837

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:92ef00ea7a676ed017f06f373e7b051d049f571471eb21b599f6dcb299f671b4
-size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:e88c7f613ec531155670d9379c212ae8331d83fa95b2fb8801a6da610e8bc9d9
+size 5624