aliangdw commited on
Commit
b001a8d
·
verified ·
1 Parent(s): 289bd4c

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +189 -0
config.yaml ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_eval:
2
+ comparisons_per_task: 5
3
+ confusion_matrix:
4
+ - mw
5
+ custom_eval_random_seed: 42
6
+ eval_types:
7
+ - reward_alignment
8
+ - policy_ranking
9
+ max_comparisons: null
10
+ num_examples_per_quality_pr: 5
11
+ num_partial_successes: 5
12
+ policy_ranking:
13
+ - libero_pi0_no_fail
14
+ policy_ranking_max_tasks: 100
15
+ quality_preference:
16
+ - mw
17
+ reward_alignment:
18
+ - libero_pi0_no_fail
19
+ reward_alignment_max_trajectories: 10
20
+ similarity_score:
21
+ - aliangdw_metaworld_metaworld_eval
22
+ use_frame_steps: true
23
+ data:
24
+ data_source_weights:
25
+ metaworld_train: 1.0
26
+ molmoact_dataset_household: 1.0
27
+ molmoact_dataset_tabletop: 1.0
28
+ oxe_droid: 1.0
29
+ roboarena: 1.0
30
+ dataloader_num_workers: 8
31
+ dataloader_persistent_workers: true
32
+ dataloader_pin_memory: true
33
+ dataset_preference_ratio: 0.7
34
+ dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
35
+ dataset_type: rfm
36
+ eval_datasets:
37
+ - libero_pi0_no_fail
38
+ eval_subset_size: null
39
+ fps: 10
40
+ load_embeddings: false
41
+ max_frames: 4
42
+ max_frames_after_preprocessing: 64
43
+ max_success: 1.0
44
+ max_trajectories: -1
45
+ min_frames_per_trajectory: 1
46
+ min_success: 0.5
47
+ n_wrong_tasks: 5
48
+ num_bins: 10
49
+ partial_success_threshold: 0.2
50
+ preference_strategy_ratio:
51
+ - 1.0
52
+ - 1.0
53
+ - 1.0
54
+ - 1.0
55
+ progress_discrete_bins: 32
56
+ progress_loss_type: l2
57
+ progress_pred_type: absolute_wrt_total_frames
58
+ progress_strategy_ratio:
59
+ - 1.0
60
+ - 1.0
61
+ - 1.0
62
+ - 1.0
63
+ resized_height: 240
64
+ resized_width: 240
65
+ sample_type_ratio:
66
+ - 0.0
67
+ - 1.0
68
+ - 0.0
69
+ seed: 42
70
+ shuffle: true
71
+ shuffle_progress_frames: false
72
+ similarity_strategy_ratio:
73
+ - 1.0
74
+ - 1.0
75
+ - 1.0
76
+ train_datasets:
77
+ - libero_pi0_no_fail
78
+ traj_same_source_prob: 0.5
79
+ use_data_source_balance: true
80
+ use_multi_image: true
81
+ debug: false
82
+ logging:
83
+ log_level: debug
84
+ log_to:
85
+ - wandb
86
+ save_best:
87
+ greater_is_better:
88
+ - true
89
+ - true
90
+ hub_private: false
91
+ hub_save_every: 1000
92
+ hub_token: null
93
+ keep_top_k: 5
94
+ metric_names:
95
+ - eval_rew_align/pearson_mw_eval
96
+ - eval_p_rank/spearman_mw_eval
97
+ save_every: 500
98
+ upload_to_hub: false
99
+ save_model: true
100
+ save_processor: true
101
+ wandb_entity: clvr
102
+ wandb_mode: null
103
+ wandb_notes: libero prog only
104
+ wandb_project: rfm
105
+ loss:
106
+ predict_last_frame_progress: false
107
+ progress_discrete_bins: 32
108
+ progress_loss_type: l2
109
+ success_positive_weight: 1.0
110
+ mode: train
111
+ model:
112
+ average_temporal_patches: true
113
+ base_model_id: Qwen/Qwen3-VL-4B-Instruct
114
+ causal_mask: false
115
+ model_type: default
116
+ peft_vision_encoder: false
117
+ progress_discrete_bins: 32
118
+ progress_loss_type: l2
119
+ quantization: false
120
+ rewind: null
121
+ rewind_scale_model: false
122
+ torch_dtype: bfloat16
123
+ train_language_model: true
124
+ train_preference_head: false
125
+ train_progress_head: true
126
+ train_similarity_head: false
127
+ train_success_head: false
128
+ train_vision_encoder: false
129
+ trust_remote_code: true
130
+ use_multi_image: true
131
+ use_peft: false
132
+ use_progress_token: false
133
+ use_unsloth: true
134
+ peft:
135
+ bias: none
136
+ lora_alpha: 64
137
+ lora_dropout: 0.05
138
+ r: 32
139
+ target_modules:
140
+ - q_proj
141
+ - k_proj
142
+ - v_proj
143
+ - o_proj
144
+ - gate_proj
145
+ - up_proj
146
+ - down_proj
147
+ trainer_cls: rfm_heads
148
+ training:
149
+ beta: 0.1
150
+ bf16: true
151
+ custom_eval_steps: 500
152
+ dataloader_num_workers: 8
153
+ dataloader_persistent_workers: true
154
+ dataloader_pin_memory: true
155
+ ddp_bucket_cap_mb: 25
156
+ ddp_find_unused_parameters: false
157
+ do_eval: true
158
+ eval_steps: 500
159
+ evaluation_strategy: steps
160
+ exp_name: libero_ablation_prog_4frames_fixdata
161
+ fp16: false
162
+ gradient_accumulation_steps: 1
163
+ gradient_checkpointing: true
164
+ learning_rate: 2.5e-05
165
+ logging_steps: 1
166
+ lr_scheduler_type: cosine
167
+ max_grad_norm: 10.0
168
+ max_seq_length: 1024
169
+ max_steps: 5000
170
+ num_gpus: 2
171
+ num_train_epochs: -1
172
+ output_dir: ./logs
173
+ overwrite_output_dir: true
174
+ per_device_eval_batch_size: 64
175
+ per_device_train_batch_size: 64
176
+ predict_pref_progress: false
177
+ predict_pref_sim: false
178
+ predict_sim_progress: false
179
+ prediction_loss_only: true
180
+ remove_unused_columns: false
181
+ resume_from_checkpoint: null
182
+ run_default_eval: false
183
+ save_steps: 200
184
+ save_strategy: 'no'
185
+ vision_encoder_lr: 1.0e-05
186
+ vision_encoder_num_layers: 3
187
+ warmup_ratio: 0.1
188
+ warmup_steps: 0
189
+ weight_decay: 0.05