Student0809's picture
Add files using upload-large-folder tool
cb2428f verified
raw
history blame
7.22 kB
{
"cmd": "sft",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"eval_requirements": {
"gpu": "1"
},
"eval_dataset": ["ceval", "gsm8k", "arc"],
"args": {
"model": "Qwen/Qwen-7B-Chat",
"dataset": "iic/ms_agent",
"per_device_train_batch_size": 1,
"max_length": 2048,
"loss_scale": "react",
"gradient_accumulation_steps": 16,
"learning_rate": 5e-5,
"attn_impl": "flash_attn",
"eval_steps": 2000,
"save_steps": 2000,
"num_train_epochs": 2,
"gradient_checkpointing": true,
"weight_decay": 0.01,
"warmup_ratio": 0.03,
"save_total_limit": 2,
"logging_steps": 10
},
"experiment": [
{
"name": "lora",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32
}
},
{
"name": "lora+packing",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"packing": true,
"eval_steps": 200,
"save_steps": 200
}
},
{
"name": "lora+packing+ddp",
"requirements":{
"gpu": "2",
"ddp": "2"
},
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"packing": true,
"eval_steps": 100,
"save_steps": 100
}
},
{
"name": "lora+packing+lazytokenize",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"packing": true,
"lazy_tokenize": true,
"eval_steps": 200,
"save_steps": 200
}
},
{
"name": "lora+",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"lorap_lr_ratio": 16.0
}
},
{
"name": "rslora",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"use_rslora": true
}
},
{
"name": "dora",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"use_dora": true
}
},
{
"name": "lora+neftune",
"args": {
"train_type": "lora",
"lora_rank": 8,
"lora_alpha": 32,
"neftune_noise_alpha": 15.0
}
},
{
"name": "llamapro",
"args": {
"train_type": "llamapro",
"llamapro_num_new_blocks": "4"
}
},
{
"name": "full",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full"
}
},
{
"name": "reft",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "reft",
"gradient_checkpointing": "false",
"loss_scale": "default"
}
},
{
"name": "full+galore128+quantize",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false",
"galore_quantization": "true"
}
},
{
"name": "full+galore128+quantize+proj_quant",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false",
"galore_quantization": "true",
"galore_proj_quant": "true"
}
},
{
"name": "full+galore128",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false"
}
},
{
"name": "full+galore64",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "64",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false"
}
},
{
"name": "full+galore32",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "32",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "false"
}
},
{
"name": "full+galore_emb",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "false",
"galore_with_embedding": "true"
}
},
{
"name": "full+galore_perparam",
"requirements":{
"gpu": "1",
"ddp": "1"
},
"args": {
"train_type": "full",
"use_galore": "true",
"galore_rank": "128",
"galore_update_proj_gap": "200",
"galore_optim_per_parameter": "true",
"galore_with_embedding": "false"
}
},
{
"name": "adalora",
"args": {
"train_type": "adalora",
"lora_rank": 8,
"lora_alpha": 32
}
},
{
"name": "adapter",
"args": {
"train_type": "adapter"
}
},
{
"name": "full+lisa_2",
"info": "lisa 2layers + full",
"args": {
"train_type": "full",
"lisa_activated_layers": 2,
"lisa_step_interval": 20
}
},
{
"name": "full+lisa_4",
"info": "lisa 4layers + full",
"args": {
"train_type": "full",
"lisa_activated_layers": 4,
"lisa_step_interval": 20
}
},
{
"name": "unsloth+lora+q4",
"info": "unsloth lora quantization bit 4",
"args": {
"train_type": "lora",
"tuner_backend": "unsloth",
"quantization_bit": 4,
"model": "LLM-Research/Meta-Llama-3-8B-Instruct"
}
},
{
"name": "unsloth+full",
"info": "unsloth full",
"args": {
"train_type": "full",
"tuner_backend": "unsloth",
"model_type": "LLM-Research/Meta-Llama-3-8B-Instruct"
}
}
]
}