Training in progress, step 100

Files changed (5) hide show

README.md CHANGED Viewed

@@ -64,7 +64,7 @@ lora_fan_in_fan_out: null
 lora_model_dir: null
 lora_r: 32
 lora_target_linear: true
-loraplus_lr_ratio: 16
 lr_scheduler: cosine
 micro_batch_size: 1
 mlflow_experiment_name: /tmp/8e23ab7f3136aade_train_data.json
@@ -103,7 +103,7 @@ xformers_attention: null
 This model is a fine-tuned version of [unsloth/Qwen2.5-3B](https://huggingface.co/unsloth/Qwen2.5-3B) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8736
 ## Model description
@@ -139,12 +139,15 @@ No additional optimizer arguments
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
 | 0.5376        | 0.0002 | 1    | 1.1038          |
-| 1.0685        | 0.0164 | 100  | 0.8534          |
-| 0.9163        | 0.0328 | 200  | 0.8613          |
-| 0.9678        | 0.0492 | 300  | 0.8594          |
-| 0.9531        | 0.0656 | 400  | 0.8629          |
-| 1.0449        | 0.0820 | 500  | 0.8701          |
-| 0.8456        | 0.0985 | 600  | 0.8736          |
 ### Framework versions

 lora_model_dir: null
 lora_r: 32
 lora_target_linear: true
+loraplus_lr_ratio: 8
 lr_scheduler: cosine
 micro_batch_size: 1
 mlflow_experiment_name: /tmp/8e23ab7f3136aade_train_data.json
 This model is a fine-tuned version of [unsloth/Qwen2.5-3B](https://huggingface.co/unsloth/Qwen2.5-3B) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8224
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
 | 0.5376        | 0.0002 | 1    | 1.1038          |
+| 1.0376        | 0.0164 | 100  | 0.8196          |
+| 0.8848        | 0.0328 | 200  | 0.8143          |
+| 0.9346        | 0.0492 | 300  | 0.8134          |
+| 0.8966        | 0.0656 | 400  | 0.8115          |
+| 1.008         | 0.0820 | 500  | 0.8160          |
+| 0.8225        | 0.0985 | 600  | 0.8154          |
+| 0.857         | 0.1149 | 700  | 0.8197          |
+| 1.0713        | 0.1313 | 800  | 0.8197          |
+| 1.0041        | 0.1477 | 900  | 0.8224          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
     "o_proj",
-    "gate_proj",
-    "v_proj",
     "q_proj",
     "down_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "up_proj",
     "o_proj",
     "q_proj",
+    "gate_proj",
     "down_proj",
+    "v_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c21555a7c41429f66d7bb15398b4bd28d52aef56734104fbcf8126caec3ffeb2
 size 239650666

 version https://git-lfs.github.com/spec/v1
+oid sha256:a2b97e7367f463598722838ea3e789eafccc2a2bf2de7bb47ba772e4f6b63839
 size 239650666

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df2c88e441560b7d30a0c68235fe450bd704794674e100b7ef8872d879e42c39
 size 239536272

 version https://git-lfs.github.com/spec/v1
+oid sha256:97e0aa8302f740897c3fc855e7ccb97c0f8d2873de5a0776adfdd8f5e0139e68
 size 239536272

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e6f892413ee6a33391e91a66fafb17b7974d3c90c2418e9ad8556ec130bd8439
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:969d207d1f6c0d67768fcd41b7675980b9dc88c38b5029cd598ebef348814045
 size 6776