RFT / initialization_LOI /config.yaml
Bogachevv's picture
Upload initialization_LOI/config.yaml with huggingface_hub
7129e30 verified
model_name: meta-llama/Meta-Llama-3-8B-Instruct
exp_name: Llama-3-8b-optimizers
run_name: Riemannion-WD-OI-new
base_dir: /home/vabogachev/RiemanianFinetune/runs/${exp_name}/${run_name}/final
cfg_no: 1
tasks:
- FINETUNE
- VALIDATE
- INFERENCE
max_length: 768
dataset_path: /home/vabogachev/RiemanianFinetune/datasets/common_reasoning
n_shots: 0
fp16: false
bf16: true
num_ths: 3
loader_config:
num_proc: ${num_ths}
tokenizer_config:
padding_side: left
report_to: comet_ml
detailed_lora_logs: false
detailed_riemannian_logs: true
adapter_config:
peft_pretrained: true
peft_is_trainable: true
merge_tuned: true
peft_pretrained_path: ${base_dir}/finetuned_model_cfg-${cfg_no}
ft_strategy: LoRA
peft_init_path: ${base_dir}/initialization_${cfg_no}
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- up_proj
- down_proj
- gate_proj
LoRA_config:
r: 32
lora_alpha: ${adapter_config.LoRA_config.r}
lora_dropout: 0.05
target_modules: ${adapter_config.target_modules}
split2zero: true
init_strategy: riemannian
B_gain_npwr: 0.5
B_gain_rpwr: -0.5
B_gain_mult: -1.0
evaluation_config:
num_splits: 10
max_new_tokens: 4
batch_size: 8
empty_cache: true
dump_path: ${base_dir}/cfg-${cfg_no}_preds_CR_{0}.bin
optimizer_config:
optim: Riemannion
lr: 0.0001
momentum: 0.9
nesterov: false
weight_decay: 0.00316
trainer_config:
run_name: ${exp_name}/${run_name}-final-cfg${cfg_no}
output_dir: bogachevv/${exp_name}-${run_name}-final-cfg${cfg_no}
max_seq_length: ${max_length}
dataset_text_field: text
fp16: ${fp16}
bf16: ${bf16}
full_determinism: false
per_device_train_batch_size: 4
per_device_eval_batch_size: 8
gradient_accumulation_steps: 16
lr_scheduler_type: linear
warmup_ratio: 0.1
num_train_epochs: 2
dataloader_num_workers: ${num_ths}
dataset_num_proc: ${num_ths}
eval_strategy: steps
eval_steps: 128
logging_steps: 16
load_best_model_at_end: true
seed: 23654
data_seed: 23654
report_to: ${report_to}
disable_tqdm: true
save_strategy: steps
save_steps: 128
save_total_limit: 4