lllqaq commited on
Commit
074ec68
·
verified ·
1 Parent(s): e0843da

Upload train_r2egym_14B_agent_coder_instruct.yaml

Browse files
train_r2egym_14B_agent_coder_instruct.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 论文训练参数+正常推理参数
2
+ ### model
3
+ model_name_or_path: Qwen/Qwen2.5-Coder-14B-Instruct
4
+ trust_remote_code: true
5
+
6
+ ### method
7
+ stage: sft
8
+ do_train: true
9
+ finetuning_type: full
10
+ deepspeed: /home/xuye_liu/yubo/LLaMA-Factory/examples/deepspeed/ds_z3_config.json
11
+
12
+ ### dataset
13
+ dataset: r2egym_sft_trajectories
14
+ dataset_dir: /home/xuye_liu/yubo/LLaMA-Factory/data
15
+ template: qwen
16
+ cutoff_len: 20000
17
+ max_samples: 100000
18
+ overwrite_cache: true
19
+ preprocessing_num_workers: 16
20
+
21
+ ### output
22
+ output_dir: /home/xuye_liu/yubo/LLaMA-Factory/saves/R2EGym-14B-Agent-Coder-Instruct
23
+ logging_steps: 10
24
+ resume_from_checkpoint: null
25
+ save_steps: 200
26
+ plot_loss: true
27
+ overwrite_output_dir: false
28
+
29
+ ### train
30
+ flash_attn: fa2
31
+ enable_liger_kernel: true
32
+ use_unsloth_gc: true
33
+
34
+ per_device_train_batch_size: 1
35
+ # Global batch size = per_device_train_batch_size * gradient_accumulation_steps * world_size.
36
+ # Using GPUs 4,5,6,7 => world_size=4, so 1 * 2 * 4 = 8.
37
+ gradient_accumulation_steps: 2
38
+
39
+ learning_rate: 1.0e-5
40
+ weight_decay: 0.05
41
+ num_train_epochs: 2.0
42
+ lr_scheduler_type: cosine
43
+ warmup_ratio: 0.1
44
+ bf16: true
45
+ ddp_timeout: 180000000
46
+
47
+ ### wandb
48
+ report_to: none
49
+ run_name: R2EGym-14B-Agent-Coder