| model: | |
| model_name: facebook/opt-125m #facebook/opt-125m #meta-llama/Llama-2-7b-hf #"openai-community/gpt2" #EleutherAI/pythia-160m #Qwen/Qwen2.5-0.5B | |
| # model_name: facebook/opt-125m | |
| # adapter_path: "./run_all/exnr15/ft2" | |
| # adapter_path: './run_all/run_exps9/ft2' | |
| # adapter_path: "./exp395/run_ex07/ft2" | |
| # data_collator_mode: 'dynamic' | |
| model_max_seq_length: 512 | |
| sama_adapter: | |
| col_L: 12 #24 | |
| row_R: 16 #32 | |
| # target_modules: ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj","up_proj","down_proj"] | |
| target_modules: ["q_proj", "v_proj",] | |
| num_unique_blocks_L: 4 | |
| num_unique_blocks_R: 3 | |
| data: | |
| dataset_name: 'CMS' | |
| split_ratio: 256 | |
| # path: "./data/gsm8k_test.jsonl" | |
| # path: ./data/MetaMathQA-40K/MetaMathQA-40K.json | |
| path: ft_training_set/commonsense_15k.json | |
| dataset_split: train | |
| # dataset_field: [question, answer] | |
| # dataset_field: [query, response] | |
| trainer_args: | |
| learning_rate: 5e-4 | |
| warmup_steps: 100 | |
| eval_strategy: steps | |
| per_device_train_batch_size: 4 | |
| per_device_eval_batch_size: 32 | |
| # gradient_accumulation_steps: 1 | |
| # save_steps: 1000 | |
| gradient_checkpointing: False # (Turn off for faster training) | |
| output_dir: "./exps" | |
| # save_path: "runs" | |
| report_to: none | |
| logging_steps: 10 | |
| eval_steps: 20 | |
| #dataloader_num_workers: 4 | |
| num_train_epochs: 3.0 | |
| max_steps: -1 | |
| # device: 'cuda' |