| exp_name: exp001_baseline | |
| base_model: Qwen/Qwen3-4B-Instruct-2507 | |
| dataset_id: u-10bei/sft_alfworld_trajectory_dataset_v5 | |
| seed: 3407 | |
| val_ratio: 0.05 | |
| max_seq_len: 2048 | |
| lora_r: 64 | |
| lora_alpha: 128 | |
| lora_dropout: 0.0 | |
| lora_target_modules: | |
| - q_proj | |
| - k_proj | |
| - v_proj | |
| - o_proj | |
| - gate_proj | |
| - up_proj | |
| - down_proj | |
| epochs: 2 | |
| batch_size: 2 | |
| grad_accum: 4 | |
| lr: 2.0e-06 | |
| warmup_ratio: 0.1 | |
| weight_decay: 0.05 | |
| max_steps: -1 | |
| logging_steps: 10 | |
| eval_steps: 30 | |
| save_steps: 100 | |
| save_total_limit: 2 | |