Update config.yaml
Browse files- config.yaml +7 -17
config.yaml
CHANGED
|
@@ -11,16 +11,14 @@ custom_eval:
|
|
| 11 |
num_partial_successes: 5
|
| 12 |
pad_frames: true
|
| 13 |
policy_ranking:
|
| 14 |
-
-
|
| 15 |
policy_ranking_max_tasks: 100
|
| 16 |
quality_preference:
|
| 17 |
- mw
|
| 18 |
reward_alignment:
|
| 19 |
-
-
|
| 20 |
-
-
|
| 21 |
reward_alignment_max_trajectories: 10
|
| 22 |
-
similarity_score:
|
| 23 |
-
- aliangdw_metaworld_metaworld_eval
|
| 24 |
subsample_n_frames: null
|
| 25 |
use_frame_steps: true
|
| 26 |
data:
|
|
@@ -34,7 +32,7 @@ data:
|
|
| 34 |
dataloader_persistent_workers: true
|
| 35 |
dataloader_pin_memory: true
|
| 36 |
dataset_preference_ratio: 0.7
|
| 37 |
-
dataset_success_cutoff_file:
|
| 38 |
dataset_type: strategy_first
|
| 39 |
eval_datasets:
|
| 40 |
- mw
|
|
@@ -70,14 +68,9 @@ data:
|
|
| 70 |
seed: 42
|
| 71 |
shuffle: true
|
| 72 |
shuffle_progress_frames: false
|
| 73 |
-
similarity_strategy_ratio:
|
| 74 |
-
- 1.0
|
| 75 |
-
- 1.0
|
| 76 |
-
- 1.0
|
| 77 |
train_datasets:
|
| 78 |
-
-
|
| 79 |
traj_same_source_prob: 0.5
|
| 80 |
-
use_data_source_balance: false
|
| 81 |
use_multi_image: true
|
| 82 |
use_per_frame_progress_token: true
|
| 83 |
debug: false
|
|
@@ -109,7 +102,7 @@ logging:
|
|
| 109 |
wandb_entity: clvr
|
| 110 |
wandb_mode: null
|
| 111 |
wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
|
| 112 |
-
wandb_project:
|
| 113 |
loss:
|
| 114 |
predict_last_frame_progress: false
|
| 115 |
progress_discrete_bins: 10
|
|
@@ -131,7 +124,6 @@ model:
|
|
| 131 |
train_language_model: true
|
| 132 |
train_preference_head: true
|
| 133 |
train_progress_head: true
|
| 134 |
-
train_similarity_head: false
|
| 135 |
train_success_head: true
|
| 136 |
train_vision_encoder: false
|
| 137 |
trust_remote_code: true
|
|
@@ -153,7 +145,7 @@ peft:
|
|
| 153 |
- gate_proj
|
| 154 |
- up_proj
|
| 155 |
- down_proj
|
| 156 |
-
trainer_cls:
|
| 157 |
training:
|
| 158 |
beta: 0.1
|
| 159 |
bf16: true
|
|
@@ -183,8 +175,6 @@ training:
|
|
| 183 |
per_device_eval_batch_size: 16
|
| 184 |
per_device_train_batch_size: 16
|
| 185 |
predict_pref_progress: true
|
| 186 |
-
predict_pref_sim: false
|
| 187 |
-
predict_sim_progress: false
|
| 188 |
prediction_loss_only: true
|
| 189 |
remove_unused_columns: false
|
| 190 |
resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000
|
|
|
|
| 11 |
num_partial_successes: 5
|
| 12 |
pad_frames: true
|
| 13 |
policy_ranking:
|
| 14 |
+
- rbm-1m-ood
|
| 15 |
policy_ranking_max_tasks: 100
|
| 16 |
quality_preference:
|
| 17 |
- mw
|
| 18 |
reward_alignment:
|
| 19 |
+
- rbm-1m-id
|
| 20 |
+
- rbm-1m-ood
|
| 21 |
reward_alignment_max_trajectories: 10
|
|
|
|
|
|
|
| 22 |
subsample_n_frames: null
|
| 23 |
use_frame_steps: true
|
| 24 |
data:
|
|
|
|
| 32 |
dataloader_persistent_workers: true
|
| 33 |
dataloader_pin_memory: true
|
| 34 |
dataset_preference_ratio: 0.7
|
| 35 |
+
dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
|
| 36 |
dataset_type: strategy_first
|
| 37 |
eval_datasets:
|
| 38 |
- mw
|
|
|
|
| 68 |
seed: 42
|
| 69 |
shuffle: true
|
| 70 |
shuffle_progress_frames: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
train_datasets:
|
| 72 |
+
- rbm-1m-id
|
| 73 |
traj_same_source_prob: 0.5
|
|
|
|
| 74 |
use_multi_image: true
|
| 75 |
use_per_frame_progress_token: true
|
| 76 |
debug: false
|
|
|
|
| 102 |
wandb_entity: clvr
|
| 103 |
wandb_mode: null
|
| 104 |
wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
|
| 105 |
+
wandb_project: robometer
|
| 106 |
loss:
|
| 107 |
predict_last_frame_progress: false
|
| 108 |
progress_discrete_bins: 10
|
|
|
|
| 124 |
train_language_model: true
|
| 125 |
train_preference_head: true
|
| 126 |
train_progress_head: true
|
|
|
|
| 127 |
train_success_head: true
|
| 128 |
train_vision_encoder: false
|
| 129 |
trust_remote_code: true
|
|
|
|
| 145 |
- gate_proj
|
| 146 |
- up_proj
|
| 147 |
- down_proj
|
| 148 |
+
trainer_cls: rbm_heads
|
| 149 |
training:
|
| 150 |
beta: 0.1
|
| 151 |
bf16: true
|
|
|
|
| 175 |
per_device_eval_batch_size: 16
|
| 176 |
per_device_train_batch_size: 16
|
| 177 |
predict_pref_progress: true
|
|
|
|
|
|
|
| 178 |
prediction_loss_only: true
|
| 179 |
remove_unused_columns: false
|
| 180 |
resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000
|