aliangdw commited on
Commit
e051c92
·
verified ·
1 Parent(s): ec9b401

Update config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +7 -17
config.yaml CHANGED
@@ -11,16 +11,14 @@ custom_eval:
11
  num_partial_successes: 5
12
  pad_frames: true
13
  policy_ranking:
14
- - rfm-1m-ood
15
  policy_ranking_max_tasks: 100
16
  quality_preference:
17
  - mw
18
  reward_alignment:
19
- - rfm-1m-id
20
- - rfm-1m-ood
21
  reward_alignment_max_trajectories: 10
22
- similarity_score:
23
- - aliangdw_metaworld_metaworld_eval
24
  subsample_n_frames: null
25
  use_frame_steps: true
26
  data:
@@ -34,7 +32,7 @@ data:
34
  dataloader_persistent_workers: true
35
  dataloader_pin_memory: true
36
  dataset_preference_ratio: 0.7
37
- dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
38
  dataset_type: strategy_first
39
  eval_datasets:
40
  - mw
@@ -70,14 +68,9 @@ data:
70
  seed: 42
71
  shuffle: true
72
  shuffle_progress_frames: false
73
- similarity_strategy_ratio:
74
- - 1.0
75
- - 1.0
76
- - 1.0
77
  train_datasets:
78
- - rfm-1m-id
79
  traj_same_source_prob: 0.5
80
- use_data_source_balance: false
81
  use_multi_image: true
82
  use_per_frame_progress_token: true
83
  debug: false
@@ -109,7 +102,7 @@ logging:
109
  wandb_entity: clvr
110
  wandb_mode: null
111
  wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
112
- wandb_project: rfm
113
  loss:
114
  predict_last_frame_progress: false
115
  progress_discrete_bins: 10
@@ -131,7 +124,6 @@ model:
131
  train_language_model: true
132
  train_preference_head: true
133
  train_progress_head: true
134
- train_similarity_head: false
135
  train_success_head: true
136
  train_vision_encoder: false
137
  trust_remote_code: true
@@ -153,7 +145,7 @@ peft:
153
  - gate_proj
154
  - up_proj
155
  - down_proj
156
- trainer_cls: rfm_heads
157
  training:
158
  beta: 0.1
159
  bf16: true
@@ -183,8 +175,6 @@ training:
183
  per_device_eval_batch_size: 16
184
  per_device_train_batch_size: 16
185
  predict_pref_progress: true
186
- predict_pref_sim: false
187
- predict_sim_progress: false
188
  prediction_loss_only: true
189
  remove_unused_columns: false
190
  resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000
 
11
  num_partial_successes: 5
12
  pad_frames: true
13
  policy_ranking:
14
+ - rbm-1m-ood
15
  policy_ranking_max_tasks: 100
16
  quality_preference:
17
  - mw
18
  reward_alignment:
19
+ - rbm-1m-id
20
+ - rbm-1m-ood
21
  reward_alignment_max_trajectories: 10
 
 
22
  subsample_n_frames: null
23
  use_frame_steps: true
24
  data:
 
32
  dataloader_persistent_workers: true
33
  dataloader_pin_memory: true
34
  dataset_preference_ratio: 0.7
35
+ dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
36
  dataset_type: strategy_first
37
  eval_datasets:
38
  - mw
 
68
  seed: 42
69
  shuffle: true
70
  shuffle_progress_frames: false
 
 
 
 
71
  train_datasets:
72
+ - rbm-1m-id
73
  traj_same_source_prob: 0.5
 
74
  use_multi_image: true
75
  use_per_frame_progress_token: true
76
  debug: false
 
102
  wandb_entity: clvr
103
  wandb_mode: null
104
  wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
105
+ wandb_project: robometer
106
  loss:
107
  predict_last_frame_progress: false
108
  progress_discrete_bins: 10
 
124
  train_language_model: true
125
  train_preference_head: true
126
  train_progress_head: true
 
127
  train_success_head: true
128
  train_vision_encoder: false
129
  trust_remote_code: true
 
145
  - gate_proj
146
  - up_proj
147
  - down_proj
148
+ trainer_cls: rbm_heads
149
  training:
150
  beta: 0.1
151
  bf16: true
 
175
  per_device_eval_batch_size: 16
176
  per_device_train_batch_size: 16
177
  predict_pref_progress: true
 
 
178
  prediction_loss_only: true
179
  remove_unused_columns: false
180
  resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000