HenryZhang commited on
Commit
e6740d3
·
verified ·
1 Parent(s): 7f5f327

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +182 -0
config.yaml ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_eval:
2
+ comparisons_per_task: 5
3
+ confusion_matrix:
4
+ - aliangdw_metaworld_metaworld_eval
5
+ eval_types:
6
+ - reward_alignment
7
+ - policy_ranking
8
+ num_examples_per_quality_pr: 5
9
+ policy_ranking:
10
+ - aliangdw_metaworld_metaworld_eval
11
+ - aliangdw_utd_so101_policy_ranking_utd_so101_policy_ranking
12
+ - aliangdw_usc_franka_policy_ranking_usc_franka_policy_ranking
13
+ - aliangdw_usc_xarm_policy_ranking_usc_xarm_policy_ranking
14
+ - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
15
+ quality_preference:
16
+ - mw
17
+ reward_alignment:
18
+ - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
19
+ - aliangdw_metaworld_metaworld_eval
20
+ similarity_score:
21
+ - aliangdw_metaworld_metaworld_eval
22
+ data:
23
+ data_source_weights:
24
+ metaworld_train: 1.0
25
+ roboarena: 1.0
26
+ dataloader_num_workers: 8
27
+ dataloader_persistent_workers: true
28
+ dataloader_pin_memory: true
29
+ dataset_preference_ratio: 0.7
30
+ dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
31
+ dataset_type: rfm
32
+ eval_datasets:
33
+ - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
34
+ eval_subset_size: null
35
+ fps: 10
36
+ load_embeddings: true
37
+ max_frames: 8
38
+ max_frames_after_preprocessing: 64
39
+ max_success: 1.0
40
+ max_trajectories: -1
41
+ min_frames_per_trajectory: 10
42
+ min_success: 0.8
43
+ n_wrong_tasks: 5
44
+ num_bins: 10
45
+ pairwise_progress: false
46
+ preference_strategy_ratio:
47
+ - 6.0
48
+ - 1.0
49
+ - 1.0
50
+ progress_pred_type: absolute_wrt_total_frames
51
+ progress_strategy_ratio:
52
+ - 1.0
53
+ - 2.0
54
+ - 1.0
55
+ - 1.0
56
+ - 1.0
57
+ resized_height: 196
58
+ resized_width: 196
59
+ rewind_lengths: null
60
+ roboarena_partial_success_threshold: 0.2
61
+ sample_type_ratio:
62
+ - 1.0
63
+ - 0.0
64
+ - 0.0
65
+ samples_per_trajectory: 1
66
+ seed: 42
67
+ shuffle: true
68
+ shuffle_progress_frames: false
69
+ similarity_strategy_ratio:
70
+ - 1.0
71
+ - 1.0
72
+ - 1.0
73
+ task_instruction_same_source_prob: 0.5
74
+ train_datasets:
75
+ - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
76
+ use_multi_image: true
77
+ use_uniform_sampling: false
78
+ debug: false
79
+ logging:
80
+ log_level: INFO
81
+ log_to:
82
+ - wandb
83
+ save_best:
84
+ greater_is_better:
85
+ - true
86
+ - true
87
+ hub_private: false
88
+ hub_save_every: 250
89
+ hub_token: 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
90
+ keep_top_k: 5
91
+ metric_names:
92
+ - eval_rew_align/pearson_mw_eval
93
+ - eval_p_rank/spearman_mw_eval
94
+ save_every: 250
95
+ upload_to_hub: false
96
+ save_model: true
97
+ save_processor: true
98
+ wandb_entity: clvr
99
+ wandb_mode: null
100
+ wandb_notes: training RFM
101
+ wandb_project: rfm
102
+ loss:
103
+ predict_last_frame_progress: false
104
+ success_positive_weight: 6.0
105
+ mode: train
106
+ model:
107
+ average_temporal_patches: false
108
+ base_model_id: rewind_scale_transformer
109
+ causal_mask: false
110
+ model_type: default
111
+ pairwise_progress: false
112
+ peft_vision_encoder: false
113
+ quantization: false
114
+ rewind: null
115
+ rewind_scale_model: true
116
+ torch_dtype: bfloat16
117
+ train_language_model: false
118
+ train_preference_head: true
119
+ train_progress_head: true
120
+ train_similarity_head: false
121
+ train_success_head: false
122
+ train_vision_encoder: false
123
+ trust_remote_code: true
124
+ use_multi_image: true
125
+ use_peft: false
126
+ use_progress_token: false
127
+ use_unsloth: false
128
+ peft:
129
+ bias: none
130
+ lora_alpha: 64
131
+ lora_dropout: 0.05
132
+ r: 32
133
+ target_modules:
134
+ - q_proj
135
+ - k_proj
136
+ - v_proj
137
+ - o_proj
138
+ - gate_proj
139
+ - up_proj
140
+ - down_proj
141
+ trainer_cls: rewind_scale_transformer
142
+ training:
143
+ beta: 0.1
144
+ bf16: true
145
+ custom_eval_steps: 250
146
+ dataloader_num_workers: 8
147
+ dataloader_persistent_workers: true
148
+ dataloader_pin_memory: true
149
+ ddp_bucket_cap_mb: 25
150
+ ddp_find_unused_parameters: false
151
+ do_eval: true
152
+ eval_steps: 250
153
+ evaluation_strategy: steps
154
+ exp_name: rewind_scale_Progress_Pref_test_save
155
+ fp16: false
156
+ gradient_accumulation_steps: 1
157
+ gradient_checkpointing: false
158
+ learning_rate: 2.0e-05
159
+ logging_steps: 1
160
+ lr_scheduler_type: cosine
161
+ max_grad_norm: 10.0
162
+ max_seq_length: 1024
163
+ max_steps: 100000
164
+ num_gpus: 2
165
+ num_train_epochs: -1
166
+ output_dir: ./rewind_logs
167
+ overwrite_output_dir: true
168
+ per_device_eval_batch_size: 16
169
+ per_device_train_batch_size: 1024
170
+ predict_pref_progress: true
171
+ predict_sim_progress: true
172
+ prediction_loss_only: true
173
+ remove_unused_columns: false
174
+ resume_from_checkpoint: null
175
+ run_default_eval: false
176
+ save_steps: 250
177
+ save_strategy: 'no'
178
+ vision_encoder_lr: 1.0e-05
179
+ vision_encoder_num_layers: 3
180
+ warmup_ratio: 0.1
181
+ warmup_steps: 0
182
+ weight_decay: 0.1