aliangdw commited on
Commit
4108e16
·
verified ·
1 Parent(s): 6c0f955

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +198 -0
config.yaml ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_eval:
2
+ comparisons_per_task: 5
3
+ confusion_matrix:
4
+ - mw
5
+ custom_eval_random_seed: 42
6
+ eval_types:
7
+ - reward_alignment
8
+ - policy_ranking
9
+ max_comparisons: null
10
+ num_examples_per_quality_pr: 5
11
+ num_partial_successes: 5
12
+ pad_frames: true
13
+ policy_ranking:
14
+ - libero_pi0
15
+ policy_ranking_max_tasks: 100
16
+ quality_preference:
17
+ - mw
18
+ reward_alignment:
19
+ - libero_pi0
20
+ reward_alignment_max_trajectories: 10
21
+ similarity_score:
22
+ - aliangdw_metaworld_metaworld_eval
23
+ subsample_n_frames: null
24
+ use_frame_steps: true
25
+ data:
26
+ data_source_weights:
27
+ metaworld_train: 1.0
28
+ molmoact_dataset_household: 1.0
29
+ molmoact_dataset_tabletop: 1.0
30
+ oxe_droid: 1.0
31
+ roboarena: 1.0
32
+ dataloader_num_workers: 8
33
+ dataloader_persistent_workers: true
34
+ dataloader_pin_memory: true
35
+ dataset_preference_ratio: 0.7
36
+ dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
37
+ dataset_type: strategy_first
38
+ eval_datasets:
39
+ - mw
40
+ eval_subset_size: null
41
+ fps: 10
42
+ load_embeddings: false
43
+ max_frames: 4
44
+ max_frames_after_preprocessing: 64
45
+ max_success: 1.0
46
+ max_trajectories: -1
47
+ min_frames_per_trajectory: 1
48
+ min_success: 0.5
49
+ n_wrong_tasks: 5
50
+ num_bins: 10
51
+ partial_success_threshold: 0.2
52
+ predict_last_frame_partial_progress: false
53
+ preference_strategy_ratio:
54
+ - 1.0
55
+ - 1.0
56
+ - 1.0
57
+ - 1.0
58
+ progress_discrete_bins: 32
59
+ progress_loss_type: discrete
60
+ progress_pred_type: absolute_wrt_total_frames
61
+ progress_strategy_ratio:
62
+ - 1.0
63
+ - 1.0
64
+ - 1.0
65
+ - 1.0
66
+ resized_height: null
67
+ resized_width: null
68
+ sample_type_ratio:
69
+ - 1.0
70
+ - 0.0
71
+ - 0.0
72
+ seed: 42
73
+ shuffle: true
74
+ shuffle_progress_frames: false
75
+ similarity_strategy_ratio:
76
+ - 1.0
77
+ - 1.0
78
+ - 1.0
79
+ train_datasets:
80
+ - libero_pi0
81
+ traj_same_source_prob: 0.5
82
+ use_data_source_balance: false
83
+ use_multi_image: true
84
+ use_per_frame_progress_token: true
85
+ debug: false
86
+ logging:
87
+ log_level: debug
88
+ log_to:
89
+ - wandb
90
+ save_best:
91
+ greater_is_better:
92
+ - true
93
+ - true
94
+ - true
95
+ hub_private: false
96
+ hub_save_every: 1000
97
+ hub_token: null
98
+ keep_top_k: 5
99
+ metric_names:
100
+ - eval_p_rank/kendall_rewind_last_libero_90
101
+ - eval_p_rank/avg_succ_fail_diff_last_libero_90
102
+ - eval_rew_align/pearson_libero_90
103
+ save_every: 50
104
+ upload_to_hub: false
105
+ save_model: true
106
+ save_processor: true
107
+ wandb_entity: clvr
108
+ wandb_mode: null
109
+ wandb_notes: libero prog pref with fail lora ft
110
+ wandb_project: rfm
111
+ loss:
112
+ predict_last_frame_progress: false
113
+ progress_discrete_bins: 32
114
+ progress_loss_type: discrete
115
+ success_positive_weight: 1.0
116
+ mode: train
117
+ model:
118
+ average_temporal_patches: true
119
+ base_model_id: Qwen/Qwen3-VL-4B-Instruct
120
+ causal_mask: false
121
+ frame_pooling: mean
122
+ frame_pooling_attn_temperature: 1.0
123
+ model_type: default
124
+ peft_vision_encoder: false
125
+ progress_discrete_bins: 32
126
+ progress_loss_type: discrete
127
+ quantization: false
128
+ rewind: null
129
+ rewind_scale_model: false
130
+ torch_dtype: bfloat16
131
+ train_language_model: true
132
+ train_preference_head: true
133
+ train_progress_head: true
134
+ train_similarity_head: false
135
+ train_success_head: false
136
+ train_vision_encoder: false
137
+ trust_remote_code: true
138
+ use_multi_image: true
139
+ use_peft: true
140
+ use_per_frame_progress_token: true
141
+ use_unsloth: true
142
+ peft:
143
+ bias: none
144
+ lora_alpha: 64
145
+ lora_dropout: 0.05
146
+ peft_vision_encoder: false
147
+ r: 32
148
+ target_modules:
149
+ - q_proj
150
+ - k_proj
151
+ - v_proj
152
+ - o_proj
153
+ - gate_proj
154
+ - up_proj
155
+ - down_proj
156
+ trainer_cls: rfm_heads
157
+ training:
158
+ beta: 0.1
159
+ bf16: true
160
+ custom_eval_steps: 50
161
+ dataloader_num_workers: 8
162
+ dataloader_persistent_workers: true
163
+ dataloader_pin_memory: true
164
+ ddp_bucket_cap_mb: 25
165
+ ddp_find_unused_parameters: false
166
+ do_eval: true
167
+ eval_steps: 50
168
+ evaluation_strategy: steps
169
+ exp_name: libero_ablation_prog_pref_with_fail_lora_ft_4frames_2000steps
170
+ fp16: false
171
+ gradient_accumulation_steps: 1
172
+ gradient_checkpointing: true
173
+ learning_rate: 2.5e-05
174
+ logging_steps: 1
175
+ lr_scheduler_type: cosine
176
+ max_grad_norm: 10.0
177
+ max_seq_length: 1024
178
+ max_steps: 2000
179
+ num_gpus: 2
180
+ num_train_epochs: -1
181
+ output_dir: ./logs
182
+ overwrite_output_dir: true
183
+ per_device_eval_batch_size: 64
184
+ per_device_train_batch_size: 64
185
+ predict_pref_progress: true
186
+ predict_pref_sim: false
187
+ predict_sim_progress: false
188
+ prediction_loss_only: true
189
+ remove_unused_columns: false
190
+ resume_from_checkpoint: null
191
+ run_default_eval: false
192
+ save_steps: 200
193
+ save_strategy: 'no'
194
+ vision_encoder_lr: 1.0e-05
195
+ vision_encoder_num_layers: 3
196
+ warmup_ratio: 0.1
197
+ warmup_steps: 0
198
+ weight_decay: 0.05