bytess commited on
Commit
43e3774
·
verified ·
1 Parent(s): fe4db89

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -12
README.md CHANGED
@@ -25,10 +25,10 @@ Axolotl version: `0.8.0.dev0`
25
 
26
  ```yaml
27
  adapter: lora
28
- base_model: mistralai/Mistral-7B-v0.1
29
  model_type: MistralForCausalLM
30
  tokenizer_type: AutoTokenizer
31
- bf16: auto
32
  dataset_processes: 32
33
  datasets:
34
  - path: bytess/zrah-personal-ai
@@ -38,7 +38,7 @@ gradient_checkpointing: false
38
  learning_rate: 0.0002
39
  lora_alpha: 32
40
  lora_dropout: 0.05
41
- lora_r: 16
42
  lora_target_modules:
43
  - q_proj
44
  - v_proj
@@ -50,7 +50,7 @@ lora_target_modules:
50
  loraplus_lr_embedding: 1.0e-06
51
  lr_scheduler: cosine
52
  max_prompt_len: 512
53
- micro_batch_size: 2
54
  num_epochs: 3
55
  optimizer: adamw_torch
56
  output_dir: ./outputs/zrah_model
@@ -63,14 +63,7 @@ sequence_len: 2048
63
  shuffle_merged_datasets: true
64
  train_on_inputs: false
65
  trl:
66
- use_vllm: true
67
- vllm_device: auto
68
- vllm_dtype: bfloat16
69
- vllm_gpu_memory_utilization: 0.9
70
- log_completions: false
71
- ref_model_mixup_alpha: 0.9
72
- ref_model_sync_steps: 64
73
- sync_ref_model: false
74
  val_set_size: 0.0
75
  weight_decay: 0.0
76
  ```
 
25
 
26
  ```yaml
27
  adapter: lora
28
+ base_model: mistralai/Mistral-7B-Instruct-v0.3
29
  model_type: MistralForCausalLM
30
  tokenizer_type: AutoTokenizer
31
+ bf16: true
32
  dataset_processes: 32
33
  datasets:
34
  - path: bytess/zrah-personal-ai
 
38
  learning_rate: 0.0002
39
  lora_alpha: 32
40
  lora_dropout: 0.05
41
+ lora_r: 16 # Or try 8 for smaller size later
42
  lora_target_modules:
43
  - q_proj
44
  - v_proj
 
50
  loraplus_lr_embedding: 1.0e-06
51
  lr_scheduler: cosine
52
  max_prompt_len: 512
53
+ micro_batch_size: 4 # Increase from 2 if GPU allows
54
  num_epochs: 3
55
  optimizer: adamw_torch
56
  output_dir: ./outputs/zrah_model
 
63
  shuffle_merged_datasets: true
64
  train_on_inputs: false
65
  trl:
66
+ use_vllm: false
 
 
 
 
 
 
 
67
  val_set_size: 0.0
68
  weight_decay: 0.0
69
  ```