model_name: meta-llama/Meta-Llama-3-8B-Instruct exp_name: Llama-3-8b-optimizers run_name: Riemannion-WD-OI-new base_dir: /home/vabogachev/RiemanianFinetune/runs/${exp_name}/${run_name}/final cfg_no: 1 tasks: - FINETUNE - VALIDATE - INFERENCE max_length: 768 dataset_path: /home/vabogachev/RiemanianFinetune/datasets/common_reasoning n_shots: 0 fp16: false bf16: true num_ths: 3 loader_config: num_proc: ${num_ths} tokenizer_config: padding_side: left report_to: comet_ml detailed_lora_logs: false detailed_riemannian_logs: true adapter_config: peft_pretrained: true peft_is_trainable: true merge_tuned: true peft_pretrained_path: ${base_dir}/finetuned_model_cfg-${cfg_no} ft_strategy: LoRA peft_init_path: ${base_dir}/initialization_${cfg_no} target_modules: - q_proj - k_proj - v_proj - o_proj - up_proj - down_proj - gate_proj LoRA_config: r: 32 lora_alpha: ${adapter_config.LoRA_config.r} lora_dropout: 0.05 target_modules: ${adapter_config.target_modules} split2zero: true init_strategy: riemannian B_gain_npwr: 0.5 B_gain_rpwr: -0.5 B_gain_mult: -1.0 evaluation_config: num_splits: 10 max_new_tokens: 4 batch_size: 8 empty_cache: true dump_path: ${base_dir}/cfg-${cfg_no}_preds_CR_{0}.bin optimizer_config: optim: Riemannion lr: 0.0001 momentum: 0.9 nesterov: false weight_decay: 0.00316 trainer_config: run_name: ${exp_name}/${run_name}-final-cfg${cfg_no} output_dir: bogachevv/${exp_name}-${run_name}-final-cfg${cfg_no} max_seq_length: ${max_length} dataset_text_field: text fp16: ${fp16} bf16: ${bf16} full_determinism: false per_device_train_batch_size: 4 per_device_eval_batch_size: 8 gradient_accumulation_steps: 16 lr_scheduler_type: linear warmup_ratio: 0.1 num_train_epochs: 2 dataloader_num_workers: ${num_ths} dataset_num_proc: ${num_ths} eval_strategy: steps eval_steps: 128 logging_steps: 16 load_best_model_at_end: true seed: 23654 data_seed: 23654 report_to: ${report_to} disable_tqdm: true save_strategy: steps save_steps: 128 save_total_limit: 4