Update README.md
Browse files
README.md
CHANGED
|
@@ -25,10 +25,10 @@ Axolotl version: `0.8.0.dev0`
|
|
| 25 |
|
| 26 |
```yaml
|
| 27 |
adapter: lora
|
| 28 |
-
base_model: mistralai/Mistral-7B-v0.
|
| 29 |
model_type: MistralForCausalLM
|
| 30 |
tokenizer_type: AutoTokenizer
|
| 31 |
-
bf16:
|
| 32 |
dataset_processes: 32
|
| 33 |
datasets:
|
| 34 |
- path: bytess/zrah-personal-ai
|
|
@@ -38,7 +38,7 @@ gradient_checkpointing: false
|
|
| 38 |
learning_rate: 0.0002
|
| 39 |
lora_alpha: 32
|
| 40 |
lora_dropout: 0.05
|
| 41 |
-
lora_r: 16
|
| 42 |
lora_target_modules:
|
| 43 |
- q_proj
|
| 44 |
- v_proj
|
|
@@ -50,7 +50,7 @@ lora_target_modules:
|
|
| 50 |
loraplus_lr_embedding: 1.0e-06
|
| 51 |
lr_scheduler: cosine
|
| 52 |
max_prompt_len: 512
|
| 53 |
-
micro_batch_size: 2
|
| 54 |
num_epochs: 3
|
| 55 |
optimizer: adamw_torch
|
| 56 |
output_dir: ./outputs/zrah_model
|
|
@@ -63,14 +63,7 @@ sequence_len: 2048
|
|
| 63 |
shuffle_merged_datasets: true
|
| 64 |
train_on_inputs: false
|
| 65 |
trl:
|
| 66 |
-
use_vllm:
|
| 67 |
-
vllm_device: auto
|
| 68 |
-
vllm_dtype: bfloat16
|
| 69 |
-
vllm_gpu_memory_utilization: 0.9
|
| 70 |
-
log_completions: false
|
| 71 |
-
ref_model_mixup_alpha: 0.9
|
| 72 |
-
ref_model_sync_steps: 64
|
| 73 |
-
sync_ref_model: false
|
| 74 |
val_set_size: 0.0
|
| 75 |
weight_decay: 0.0
|
| 76 |
```
|
|
|
|
| 25 |
|
| 26 |
```yaml
|
| 27 |
adapter: lora
|
| 28 |
+
base_model: mistralai/Mistral-7B-Instruct-v0.3
|
| 29 |
model_type: MistralForCausalLM
|
| 30 |
tokenizer_type: AutoTokenizer
|
| 31 |
+
bf16: true
|
| 32 |
dataset_processes: 32
|
| 33 |
datasets:
|
| 34 |
- path: bytess/zrah-personal-ai
|
|
|
|
| 38 |
learning_rate: 0.0002
|
| 39 |
lora_alpha: 32
|
| 40 |
lora_dropout: 0.05
|
| 41 |
+
lora_r: 16 # Or try 8 for smaller size later
|
| 42 |
lora_target_modules:
|
| 43 |
- q_proj
|
| 44 |
- v_proj
|
|
|
|
| 50 |
loraplus_lr_embedding: 1.0e-06
|
| 51 |
lr_scheduler: cosine
|
| 52 |
max_prompt_len: 512
|
| 53 |
+
micro_batch_size: 4 # Increase from 2 if GPU allows
|
| 54 |
num_epochs: 3
|
| 55 |
optimizer: adamw_torch
|
| 56 |
output_dir: ./outputs/zrah_model
|
|
|
|
| 63 |
shuffle_merged_datasets: true
|
| 64 |
train_on_inputs: false
|
| 65 |
trl:
|
| 66 |
+
use_vllm: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
val_set_size: 0.0
|
| 68 |
weight_decay: 0.0
|
| 69 |
```
|