winglian commited on
Commit
e7c1bfd
·
1 Parent(s): 131a902

add axolotl config

Browse files
Files changed (1) hide show
  1. configs/packing.yml +80 -0
configs/packing.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # base_model: meta-llama/Llama-2-7b-hf
2
+ # base_model_config: meta-llama/Llama-2-7b-hf
3
+ base_model: openlm-research/open_llama_3b_v2
4
+ base_model_config: openlm-research/open_llama_3b_v2
5
+ model_type: LlamaForCausalLM
6
+ tokenizer_type: LlamaTokenizer
7
+ tokenizer_use_fast: false
8
+ load_in_8bit: false
9
+ load_in_4bit: false
10
+ strict: false
11
+ push_dataset_to_hub: openaccess-ai-collective
12
+ hf_use_auth_token: true
13
+ datasets:
14
+ - path: Open-Orca/oo-gpt4-200k
15
+ type: alpaca_w_system.load_open_orca
16
+ dataset_shard_num: 5
17
+ dataset_shard_idx: 0
18
+ dataset_prepared_path: last_run_prepared
19
+ val_set_size: 0.01
20
+ adapter:
21
+ lora_model_dir:
22
+ sequence_len: 2048
23
+ max_packed_sequence_len:
24
+ sample_packing: true
25
+ sample_packing_eff_est: 0.956
26
+ lora_r:
27
+ lora_alpha:
28
+ lora_dropout:
29
+ lora_target_modules:
30
+ lora_target_linear:
31
+ lora_fan_in_fan_out:
32
+ wandb_project: packing-tests-3b
33
+ wandb_watch:
34
+ wandb_run_id:
35
+ wandb_log_model:
36
+ output_dir: ./open-orca-3b
37
+ gradient_accumulation_steps: 1
38
+ micro_batch_size: 8
39
+ num_epochs: 6
40
+ optimizer: adamw_torch
41
+ adam_beta2: 0.98
42
+ max_grad_norm: 1.0
43
+ torchdistx_path:
44
+ lr_scheduler: cosine
45
+ lr_quadratic_warmup: true
46
+ learning_rate: 0.000003
47
+ train_on_inputs: false
48
+ group_by_length: false
49
+ bf16: true
50
+ fp16: false
51
+ tf32: true
52
+ gradient_checkpointing: true
53
+ early_stopping_patience:
54
+ resume_from_checkpoint:
55
+ local_rank:
56
+ logging_steps: 1
57
+ xformers_attention:
58
+ flash_attention:
59
+ sdp_attention: true
60
+ flash_optimum:
61
+ gptq_groupsize:
62
+ gptq_model_v1:
63
+ warmup_steps: 10
64
+ eval_steps: 110
65
+ save_steps:
66
+ debug:
67
+ deepspeed:
68
+ weight_decay: 0.1
69
+ special_tokens:
70
+ bos_token: "<s>"
71
+ eos_token: "</s>"
72
+ unk_token: "<unk>"
73
+ fsdp:
74
+ - full_shard
75
+ - auto_wrap
76
+ fsdp_config:
77
+ fsdp_offload_params: true
78
+ fsdp_state_dict_type: FULL_STATE_DICT
79
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
80
+