| base_model: Qwen/Qwen3-4B-Instruct-2507 | |
| dataset_id: ./data/sft_structured_data_with_cot_512_v2.jsonl | |
| output_dir: ./model/sft_lora_model/sft_r16_e2_lr1e5_structured_data_with_cot_dataset_512_v2 | |
| seed: 3407 | |
| val_ratio: 0.05 | |
| max_seq_len: 2048 | |
| lora_r: 16 | |
| lora_alpha: 32 | |
| lora_dropout: 0.0 | |
| lora_target_modules: | |
| - q_proj | |
| - k_proj | |
| - v_proj | |
| - o_proj | |
| - gate_proj | |
| - up_proj | |
| - down_proj | |
| epochs: 2 | |
| batch_size: 4 | |
| eval_batch_size: 2 | |
| grad_accum: 8 | |
| lr: 1.0e-05 | |
| optim: adamw_torch | |
| warmup_ratio: 0.1 | |
| weight_decay: 0.05 | |
| lr_scheduler_type: cosine | |
| max_steps: -1 | |
| logging_steps: 10 | |
| eval_steps: 50 | |
| save_steps: 100 | |
| save_total_limit: 2 | |
| mask_cot: true | |
| output_markers: | |
| - 'Output:' | |
| - 'OUTPUT:' | |
| - 'Final:' | |
| - 'Answer:' | |
| - 'Result:' | |
| - 'Response:' | |
| output_learn_mode: after_marker | |
| upsample_enable: false | |
| upsample_rules: {} | |