luanns commited on
Commit
1844cc1
·
verified ·
1 Parent(s): 274c457

Upload configs/grpo_config.yaml

Browse files
Files changed (1) hide show
  1. configs/grpo_config.yaml +44 -0
configs/grpo_config.yaml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GRPO Training Configuration for GUI-Shift
2
+ # Based on: arXiv:2505.12493 Appendix A
3
+
4
+ # Model
5
+ model_name_or_path: Qwen/Qwen2.5-VL-7B-Instruct
6
+ freeze_vision_modules: true
7
+
8
+ # Training Hyperparameters
9
+ num_train_epochs: 4
10
+ per_device_train_batch_size: 2
11
+ gradient_accumulation_steps: 8
12
+ learning_rate: 1.0e-6
13
+ lr_scheduler_type: cosine
14
+ warmup_ratio: 0.1
15
+
16
+ # GRPO Specific
17
+ num_generations: 8
18
+ temperature: 0.9
19
+ beta: 0.04 # KL divergence coefficient
20
+ epsilon: 0.2 # Clipping parameter
21
+
22
+ # Sequence Length
23
+ max_prompt_length: 1024
24
+ max_completion_length: 256
25
+
26
+ # Hardware
27
+ bf16: true
28
+ gradient_checkpointing: true
29
+ attn_implementation: flash_attention_2
30
+
31
+ # Logging & Saving
32
+ logging_steps: 1
33
+ save_steps: 400
34
+ report_to: wandb
35
+
36
+ # Paths
37
+ data_file_paths: ./data/gui_transition/filtered/k1_transition_filtered.jsonl
38
+ image_folders: ./data/gui_transition/images
39
+ output_dir: ./checkpoints/gui-shift-qwen
40
+
41
+ # Reward Functions
42
+ reward_funcs:
43
+ - format
44
+ - accuracy