sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files

Files changed (4) hide show

README.md +29 -1
adapter_config.json +8 -14
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,6 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
 # phi-3-mini-LoRA
 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 ## Model description
@@ -44,10 +46,36 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 5
-- training_steps: 5
 ### Training results
 ### Framework versions

 # phi-3-mini-LoRA
 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.7215
 ## Model description
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 5
+- training_steps: 120
 ### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 1.4909        | 1.1765  | 5    | 1.3154          |
+| 0.9704        | 2.3529  | 10   | 0.9087          |
+| 0.6673        | 3.5294  | 15   | 0.6343          |
+| 0.4418        | 4.7059  | 20   | 0.5075          |
+| 0.3375        | 5.8824  | 25   | 0.4491          |
+| 0.3033        | 7.0588  | 30   | 0.4069          |
+| 0.244         | 8.2353  | 35   | 0.3828          |
+| 0.2285        | 9.4118  | 40   | 0.3759          |
+| 0.1519        | 10.5882 | 45   | 0.3896          |
+| 0.1334        | 11.7647 | 50   | 0.4114          |
+| 0.099         | 12.9412 | 55   | 0.4291          |
+| 0.0823        | 14.1176 | 60   | 0.4610          |
+| 0.06          | 15.2941 | 65   | 0.4894          |
+| 0.0548        | 16.4706 | 70   | 0.5345          |
+| 0.0437        | 17.6471 | 75   | 0.5747          |
+| 0.0409        | 18.8235 | 80   | 0.6059          |
+| 0.0386        | 20.0    | 85   | 0.6349          |
+| 0.0272        | 21.1765 | 90   | 0.6590          |
+| 0.0262        | 22.3529 | 95   | 0.6933          |
+| 0.0303        | 23.5294 | 100  | 0.6960          |
+| 0.0249        | 24.7059 | 105  | 0.7021          |
+| 0.0291        | 25.8824 | 110  | 0.7173          |
+| 0.0255        | 27.0588 | 115  | 0.7195          |
+| 0.0208        | 28.2353 | 120  | 0.7215          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -1,9 +1,6 @@
 {
   "alpha_pattern": {},
-  "auto_mapping": {
-    "base_model_class": "Phi3ForCausalLM",
-    "parent_library": "transformers_modules.microsoft.Phi-3.5-mini-instruct.af0dfb8029e8a74545d0736d30cb6b58d2f0f3f0.modeling_phi3"
-  },
   "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
   "bias": "none",
   "fan_in_fan_out": false,
@@ -12,9 +9,9 @@
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
-  "loftq_config": null,
-  "lora_alpha": 32,
-  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
@@ -23,15 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "down_proj",
-    "q_proj",
-    "gate_proj",
-    "k_proj",
     "o_proj",
-    "up_proj"
   ],
-  "task_type": null,
   "use_dora": false,
   "use_rslora": false
 }

 {
   "alpha_pattern": {},
+  "auto_mapping": null,
   "base_model_name_or_path": "microsoft/Phi-3.5-mini-instruct",
   "bias": "none",
   "fan_in_fan_out": false,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "down_proj",
+    "gate_up_proj",
+    "qkv_proj"
   ],
+  "task_type": "CAUSAL_LM",
   "use_dora": false,
   "use_rslora": false
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:443965e8be292a2bcd1ef4837df3c4f3a69ab40d05701c88346764362f778a84
-size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d0d087ec02232d98e64e6f6b528eebfa7ca7a0bf61f2f00fe1c0991fe80fee6
+size 100697728

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e75e7e86a3fc1a236b9d82a1f17d4139e53851f26782b1abb4f2640be7a11dea
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae64cfbc9fafa79992f8f1dbc59d731406c1a3c9322aa24ad9ba448b90f16c6e
 size 5432