| { | |
| "cmd": "sft", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "eval_requirements": { | |
| "gpu": "1" | |
| }, | |
| "eval_dataset": ["ceval", "gsm8k", "arc"], | |
| "args": { | |
| "model": "Qwen/Qwen-7B-Chat", | |
| "dataset": "iic/ms_agent", | |
| "per_device_train_batch_size": 1, | |
| "max_length": 2048, | |
| "loss_scale": "react", | |
| "gradient_accumulation_steps": 16, | |
| "learning_rate": 5e-5, | |
| "attn_impl": "flash_attn", | |
| "eval_steps": 2000, | |
| "save_steps": 2000, | |
| "num_train_epochs": 2, | |
| "gradient_checkpointing": true, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.03, | |
| "save_total_limit": 2, | |
| "logging_steps": 10 | |
| }, | |
| "experiment": [ | |
| { | |
| "name": "lora", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32 | |
| } | |
| }, | |
| { | |
| "name": "lora+packing", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "packing": true, | |
| "eval_steps": 200, | |
| "save_steps": 200 | |
| } | |
| }, | |
| { | |
| "name": "lora+packing+ddp", | |
| "requirements":{ | |
| "gpu": "2", | |
| "ddp": "2" | |
| }, | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "packing": true, | |
| "eval_steps": 100, | |
| "save_steps": 100 | |
| } | |
| }, | |
| { | |
| "name": "lora+packing+lazytokenize", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "packing": true, | |
| "lazy_tokenize": true, | |
| "eval_steps": 200, | |
| "save_steps": 200 | |
| } | |
| }, | |
| { | |
| "name": "lora+", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "lorap_lr_ratio": 16.0 | |
| } | |
| }, | |
| { | |
| "name": "rslora", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "use_rslora": true | |
| } | |
| }, | |
| { | |
| "name": "dora", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "use_dora": true | |
| } | |
| }, | |
| { | |
| "name": "lora+neftune", | |
| "args": { | |
| "train_type": "lora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "neftune_noise_alpha": 15.0 | |
| } | |
| }, | |
| { | |
| "name": "llamapro", | |
| "args": { | |
| "train_type": "llamapro", | |
| "llamapro_num_new_blocks": "4" | |
| } | |
| }, | |
| { | |
| "name": "full", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full" | |
| } | |
| }, | |
| { | |
| "name": "reft", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "reft", | |
| "gradient_checkpointing": "false", | |
| "loss_scale": "default" | |
| } | |
| }, | |
| { | |
| "name": "full+galore128+quantize", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "128", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "false", | |
| "galore_with_embedding": "false", | |
| "galore_quantization": "true" | |
| } | |
| }, | |
| { | |
| "name": "full+galore128+quantize+proj_quant", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "128", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "false", | |
| "galore_with_embedding": "false", | |
| "galore_quantization": "true", | |
| "galore_proj_quant": "true" | |
| } | |
| }, | |
| { | |
| "name": "full+galore128", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "128", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "false", | |
| "galore_with_embedding": "false" | |
| } | |
| }, | |
| { | |
| "name": "full+galore64", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "64", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "false", | |
| "galore_with_embedding": "false" | |
| } | |
| }, | |
| { | |
| "name": "full+galore32", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "32", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "false", | |
| "galore_with_embedding": "false" | |
| } | |
| }, | |
| { | |
| "name": "full+galore_emb", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "128", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "false", | |
| "galore_with_embedding": "true" | |
| } | |
| }, | |
| { | |
| "name": "full+galore_perparam", | |
| "requirements":{ | |
| "gpu": "1", | |
| "ddp": "1" | |
| }, | |
| "args": { | |
| "train_type": "full", | |
| "use_galore": "true", | |
| "galore_rank": "128", | |
| "galore_update_proj_gap": "200", | |
| "galore_optim_per_parameter": "true", | |
| "galore_with_embedding": "false" | |
| } | |
| }, | |
| { | |
| "name": "adalora", | |
| "args": { | |
| "train_type": "adalora", | |
| "lora_rank": 8, | |
| "lora_alpha": 32 | |
| } | |
| }, | |
| { | |
| "name": "adapter", | |
| "args": { | |
| "train_type": "adapter" | |
| } | |
| }, | |
| { | |
| "name": "full+lisa_2", | |
| "info": "lisa 2layers + full", | |
| "args": { | |
| "train_type": "full", | |
| "lisa_activated_layers": 2, | |
| "lisa_step_interval": 20 | |
| } | |
| }, | |
| { | |
| "name": "full+lisa_4", | |
| "info": "lisa 4layers + full", | |
| "args": { | |
| "train_type": "full", | |
| "lisa_activated_layers": 4, | |
| "lisa_step_interval": 20 | |
| } | |
| }, | |
| { | |
| "name": "unsloth+lora+q4", | |
| "info": "unsloth lora quantization bit 4", | |
| "args": { | |
| "train_type": "lora", | |
| "tuner_backend": "unsloth", | |
| "quantization_bit": 4, | |
| "model": "LLM-Research/Meta-Llama-3-8B-Instruct" | |
| } | |
| }, | |
| { | |
| "name": "unsloth+full", | |
| "info": "unsloth full", | |
| "args": { | |
| "train_type": "full", | |
| "tuner_backend": "unsloth", | |
| "model_type": "LLM-Research/Meta-Llama-3-8B-Instruct" | |
| } | |
| } | |
| ] | |
| } | |