{ "cmd": "sft", "requirements":{ "gpu": "1", "ddp": "1" }, "eval_requirements": { "gpu": "1" }, "eval_dataset": ["ceval", "gsm8k", "arc"], "args": { "model": "Qwen/Qwen-7B-Chat", "dataset": "iic/ms_agent", "per_device_train_batch_size": 1, "max_length": 2048, "loss_scale": "react", "gradient_accumulation_steps": 16, "learning_rate": 5e-5, "attn_impl": "flash_attn", "eval_steps": 2000, "save_steps": 2000, "num_train_epochs": 2, "gradient_checkpointing": true, "weight_decay": 0.01, "warmup_ratio": 0.03, "save_total_limit": 2, "logging_steps": 10 }, "experiment": [ { "name": "lora", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32 } }, { "name": "lora+packing", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "packing": true, "eval_steps": 200, "save_steps": 200 } }, { "name": "lora+packing+ddp", "requirements":{ "gpu": "2", "ddp": "2" }, "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "packing": true, "eval_steps": 100, "save_steps": 100 } }, { "name": "lora+packing+lazytokenize", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "packing": true, "lazy_tokenize": true, "eval_steps": 200, "save_steps": 200 } }, { "name": "lora+", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "lorap_lr_ratio": 16.0 } }, { "name": "rslora", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "use_rslora": true } }, { "name": "dora", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "use_dora": true } }, { "name": "lora+neftune", "args": { "train_type": "lora", "lora_rank": 8, "lora_alpha": 32, "neftune_noise_alpha": 15.0 } }, { "name": "llamapro", "args": { "train_type": "llamapro", "llamapro_num_new_blocks": "4" } }, { "name": "full", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full" } }, { "name": "reft", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "reft", "gradient_checkpointing": "false", "loss_scale": "default" } }, { "name": "full+galore128+quantize", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "128", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "false", "galore_with_embedding": "false", "galore_quantization": "true" } }, { "name": "full+galore128+quantize+proj_quant", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "128", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "false", "galore_with_embedding": "false", "galore_quantization": "true", "galore_proj_quant": "true" } }, { "name": "full+galore128", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "128", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "false", "galore_with_embedding": "false" } }, { "name": "full+galore64", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "64", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "false", "galore_with_embedding": "false" } }, { "name": "full+galore32", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "32", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "false", "galore_with_embedding": "false" } }, { "name": "full+galore_emb", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "128", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "false", "galore_with_embedding": "true" } }, { "name": "full+galore_perparam", "requirements":{ "gpu": "1", "ddp": "1" }, "args": { "train_type": "full", "use_galore": "true", "galore_rank": "128", "galore_update_proj_gap": "200", "galore_optim_per_parameter": "true", "galore_with_embedding": "false" } }, { "name": "adalora", "args": { "train_type": "adalora", "lora_rank": 8, "lora_alpha": 32 } }, { "name": "adapter", "args": { "train_type": "adapter" } }, { "name": "full+lisa_2", "info": "lisa 2layers + full", "args": { "train_type": "full", "lisa_activated_layers": 2, "lisa_step_interval": 20 } }, { "name": "full+lisa_4", "info": "lisa 4layers + full", "args": { "train_type": "full", "lisa_activated_layers": 4, "lisa_step_interval": 20 } }, { "name": "unsloth+lora+q4", "info": "unsloth lora quantization bit 4", "args": { "train_type": "lora", "tuner_backend": "unsloth", "quantization_bit": 4, "model": "LLM-Research/Meta-Llama-3-8B-Instruct" } }, { "name": "unsloth+full", "info": "unsloth full", "args": { "train_type": "full", "tuner_backend": "unsloth", "model_type": "LLM-Research/Meta-Llama-3-8B-Instruct" } } ] }