yuyuchily's picture
Upload E-9_baseline_seq_e9
a85c42f verified
raw
history blame contribute delete
654 Bytes
{
"sft_experiment_id": "E-9",
"dataset_strategy_id": "baseline",
"base_model": "unsloth/Qwen3-4B-Instruct-2507",
"neftune_alpha": 5.0,
"data_cleaning": true,
"lora": {
"r": 64,
"alpha": 128,
"dropout": 0.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"training": {
"max_seq_len": 512,
"epochs": 2,
"lr": 2e-06,
"per_device_train_bs": 2,
"grad_accum": 8,
"warmup_ratio": 0.1,
"weight_decay": 0.05
},
"special": {
"mask_cot": true,
"upsampling": true,
"curriculum": false
}
}