| { | |
| "cutoff_len": 512, | |
| "save_step": 1000, | |
| "train_lora_candidate_num": 2, | |
| "train_lora_simultaneously_num": 2, | |
| "train_strategy": "optim", | |
| "lora": [ | |
| { | |
| "name": "mixlora", | |
| "task_name": "", | |
| "optim": "adamw", | |
| "scheduler_type": "constant", | |
| "warmup_steps": 0, | |
| "lr": 2e-4, | |
| "batch_size": 16, | |
| "micro_batch_size": 8, | |
| "evaluate_batch_size": 16, | |
| "num_epochs": 2, | |
| "r": 14, | |
| "lora_alpha": 28, | |
| "lora_dropout": 0.05, | |
| "target_modules": { | |
| "qkv_proj": true, | |
| "dense": true, | |
| "dense_h_to_4h": true, | |
| "dense_4h_to_h": true | |
| }, | |
| "routing_strategy": "mixlora-dynamic", | |
| "num_experts": 8, | |
| "top_p": 0.8, | |
| "group_by_length": false | |
| } | |
| ] | |
| } |