Model save

Browse files

Files changed (4) hide show

README.md +5 -25
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -2,7 +2,6 @@
 license: apache-2.0
 library_name: peft
 tags:
-- axolotl
 - generated_from_trainer
 base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 model-index:
@@ -27,7 +26,6 @@ load_in_4bit: true
 strict: false
 chat_template: inst
-datasets:
 datasets:
   - path: ./data/raw_format/tool_used_training.jsonl
     type: sharegpt
@@ -41,7 +39,7 @@ datasets:
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
-output_dir: ./mixtral-qlora-1-epochs-r64
 adapter: qlora
 lora_model_dir:
@@ -51,7 +49,7 @@ sample_packing: true
 pad_to_sequence_len: true
 lora_r: 64
-lora_alpha: 16
 lora_dropout: 0.05
 lora_fan_in_fan_out:
 hub_model_id: liuylhf/mixtral-lora-less-modules
@@ -69,7 +67,7 @@ wandb_log_model: end
 gradient_accumulation_steps: 4
 micro_batch_size: 2
-num_epochs: 1
 optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 learning_rate: 0.001
@@ -112,9 +110,7 @@ fsdp_config:
 # mixtral-lora-less-modules
-This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.1911
 ## Model description
@@ -145,23 +141,7 @@ The following hyperparameters were used during training:
 - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
-- num_epochs: 1
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 3.2966        | 0.0   | 1    | 3.2222          |
-| 0.261         | 0.1   | 31   | 0.2720          |
-| 0.1428        | 0.2   | 62   | 0.2252          |
-| 0.2674        | 0.3   | 93   | 0.2108          |
-| 0.1767        | 0.4   | 124  | 0.2043          |
-| 0.105         | 0.5   | 155  | 0.2003          |
-| 0.1799        | 0.6   | 186  | 0.1958          |
-| 0.1528        | 0.7   | 217  | 0.1942          |
-| 0.1954        | 0.8   | 248  | 0.1917          |
-| 0.1821        | 0.9   | 279  | 0.1911          |
 ### Framework versions

 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
 model-index:
 strict: false
 chat_template: inst
 datasets:
   - path: ./data/raw_format/tool_used_training.jsonl
     type: sharegpt
 dataset_prepared_path: last_run_prepared
 val_set_size: 0.01
+output_dir: ./mixtral-lora-2-epochs-r64
 adapter: qlora
 lora_model_dir:
 pad_to_sequence_len: true
 lora_r: 64
+lora_alpha: 64
 lora_dropout: 0.05
 lora_fan_in_fan_out:
 hub_model_id: liuylhf/mixtral-lora-less-modules
 gradient_accumulation_steps: 4
 micro_batch_size: 2
+num_epochs: 4
 optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 learning_rate: 0.001
 # mixtral-lora-less-modules
+This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
 ## Model description
 - optimizer: Adam with betas=(0.9,0.95) and epsilon=1e-05
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
+- num_epochs: 4
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -9,7 +9,7 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 16,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
@@ -19,10 +19,10 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "q_proj",
     "v_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 64,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "k_proj",
+    "q_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd223b7576019fbc18fdb6df07a26b9b662da56d34b65c55bfe245668d413a3f
 size 218138576

 version https://git-lfs.github.com/spec/v1
+oid sha256:1bd14376b2f68b0c33a669ad954dd4a0f21b6ff2c5861b9ffc7865fdf6749503
 size 218138576

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:007877440649249ac071b2c5c00afa3c113d2399d566bb3f2dab4235d750f45e
 size 5624

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e7d1c82c017295bf67e72f2f9c3276671b5c7c5e56727f36146b9af4b7bc72e
 size 5624