base_model: Qwen/Qwen3-4B-Instruct-2507 dataset_id: ./data/sft_structured_data_with_cot_512_v2.jsonl output_dir: ./model/sft_lora_model/sft_r16_e2_lr1e5_structured_data_with_cot_dataset_512_v2 seed: 3407 val_ratio: 0.05 max_seq_len: 2048 lora_r: 16 lora_alpha: 32 lora_dropout: 0.0 lora_target_modules: - q_proj - k_proj - v_proj - o_proj - gate_proj - up_proj - down_proj epochs: 2 batch_size: 4 eval_batch_size: 2 grad_accum: 8 lr: 1.0e-05 optim: adamw_torch warmup_ratio: 0.1 weight_decay: 0.05 lr_scheduler_type: cosine max_steps: -1 logging_steps: 10 eval_steps: 50 save_steps: 100 save_total_limit: 2 mask_cot: true output_markers: - 'Output:' - 'OUTPUT:' - 'Final:' - 'Answer:' - 'Result:' - 'Response:' output_learn_mode: after_marker upsample_enable: false upsample_rules: {}