Bogachevv commited on
Commit
7129e30
·
verified ·
1 Parent(s): 69ba172

Upload initialization_LOI/config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. initialization_LOI/config.yaml +87 -0
initialization_LOI/config.yaml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: meta-llama/Meta-Llama-3-8B-Instruct
2
+ exp_name: Llama-3-8b-optimizers
3
+ run_name: Riemannion-WD-OI-new
4
+ base_dir: /home/vabogachev/RiemanianFinetune/runs/${exp_name}/${run_name}/final
5
+ cfg_no: 1
6
+ tasks:
7
+ - FINETUNE
8
+ - VALIDATE
9
+ - INFERENCE
10
+ max_length: 768
11
+ dataset_path: /home/vabogachev/RiemanianFinetune/datasets/common_reasoning
12
+ n_shots: 0
13
+ fp16: false
14
+ bf16: true
15
+ num_ths: 3
16
+ loader_config:
17
+ num_proc: ${num_ths}
18
+ tokenizer_config:
19
+ padding_side: left
20
+ report_to: comet_ml
21
+ detailed_lora_logs: false
22
+ detailed_riemannian_logs: true
23
+ adapter_config:
24
+ peft_pretrained: true
25
+ peft_is_trainable: true
26
+ merge_tuned: true
27
+ peft_pretrained_path: ${base_dir}/finetuned_model_cfg-${cfg_no}
28
+ ft_strategy: LoRA
29
+ peft_init_path: ${base_dir}/initialization_${cfg_no}
30
+ target_modules:
31
+ - q_proj
32
+ - k_proj
33
+ - v_proj
34
+ - o_proj
35
+ - up_proj
36
+ - down_proj
37
+ - gate_proj
38
+ LoRA_config:
39
+ r: 32
40
+ lora_alpha: ${adapter_config.LoRA_config.r}
41
+ lora_dropout: 0.05
42
+ target_modules: ${adapter_config.target_modules}
43
+ split2zero: true
44
+ init_strategy: riemannian
45
+ B_gain_npwr: 0.5
46
+ B_gain_rpwr: -0.5
47
+ B_gain_mult: -1.0
48
+ evaluation_config:
49
+ num_splits: 10
50
+ max_new_tokens: 4
51
+ batch_size: 8
52
+ empty_cache: true
53
+ dump_path: ${base_dir}/cfg-${cfg_no}_preds_CR_{0}.bin
54
+ optimizer_config:
55
+ optim: Riemannion
56
+ lr: 0.0001
57
+ momentum: 0.9
58
+ nesterov: false
59
+ weight_decay: 0.00316
60
+ trainer_config:
61
+ run_name: ${exp_name}/${run_name}-final-cfg${cfg_no}
62
+ output_dir: bogachevv/${exp_name}-${run_name}-final-cfg${cfg_no}
63
+ max_seq_length: ${max_length}
64
+ dataset_text_field: text
65
+ fp16: ${fp16}
66
+ bf16: ${bf16}
67
+ full_determinism: false
68
+ per_device_train_batch_size: 4
69
+ per_device_eval_batch_size: 8
70
+ gradient_accumulation_steps: 16
71
+ lr_scheduler_type: linear
72
+ warmup_ratio: 0.1
73
+ num_train_epochs: 2
74
+ dataloader_num_workers: ${num_ths}
75
+ dataset_num_proc: ${num_ths}
76
+ eval_strategy: steps
77
+ eval_steps: 128
78
+ logging_steps: 16
79
+ load_best_model_at_end: true
80
+ seed: 23654
81
+ data_seed: 23654
82
+ report_to: ${report_to}
83
+ disable_tqdm: true
84
+ save_strategy: steps
85
+ save_steps: 128
86
+ save_total_limit: 4
87
+