Robometer-4B

aliangdw commited on Feb 13

Commit

e051c92

verified ·

1 Parent(s): ec9b401

Update config.yaml

Browse files

Files changed (1) hide show

config.yaml +7 -17

config.yaml CHANGED Viewed

@@ -11,16 +11,14 @@ custom_eval:
   num_partial_successes: 5
   pad_frames: true
   policy_ranking:
-  - rfm-1m-ood
   policy_ranking_max_tasks: 100
   quality_preference:
   - mw
   reward_alignment:
-  - rfm-1m-id
-  - rfm-1m-ood
   reward_alignment_max_trajectories: 10
-  similarity_score:
-  - aliangdw_metaworld_metaworld_eval
   subsample_n_frames: null
   use_frame_steps: true
 data:
@@ -34,7 +32,7 @@ data:
   dataloader_persistent_workers: true
   dataloader_pin_memory: true
   dataset_preference_ratio: 0.7
-  dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
   dataset_type: strategy_first
   eval_datasets:
   - mw
@@ -70,14 +68,9 @@ data:
   seed: 42
   shuffle: true
   shuffle_progress_frames: false
-  similarity_strategy_ratio:
-  - 1.0
-  - 1.0
-  - 1.0
   train_datasets:
-  - rfm-1m-id
   traj_same_source_prob: 0.5
-  use_data_source_balance: false
   use_multi_image: true
   use_per_frame_progress_token: true
 debug: false
@@ -109,7 +102,7 @@ logging:
   wandb_entity: clvr
   wandb_mode: null
   wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
-  wandb_project: rfm
 loss:
   predict_last_frame_progress: false
   progress_discrete_bins: 10
@@ -131,7 +124,6 @@ model:
   train_language_model: true
   train_preference_head: true
   train_progress_head: true
-  train_similarity_head: false
   train_success_head: true
   train_vision_encoder: false
   trust_remote_code: true
@@ -153,7 +145,7 @@ peft:
   - gate_proj
   - up_proj
   - down_proj
-trainer_cls: rfm_heads
 training:
   beta: 0.1
   bf16: true
@@ -183,8 +175,6 @@ training:
   per_device_eval_batch_size: 16
   per_device_train_batch_size: 16
   predict_pref_progress: true
-  predict_pref_sim: false
-  predict_sim_progress: false
   prediction_loss_only: true
   remove_unused_columns: false
   resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000

   num_partial_successes: 5
   pad_frames: true
   policy_ranking:
+  - rbm-1m-ood
   policy_ranking_max_tasks: 100
   quality_preference:
   - mw
   reward_alignment:
+  - rbm-1m-id
+  - rbm-1m-ood
   reward_alignment_max_trajectories: 10
   subsample_n_frames: null
   use_frame_steps: true
 data:
   dataloader_persistent_workers: true
   dataloader_pin_memory: true
   dataset_preference_ratio: 0.7
+  dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
   dataset_type: strategy_first
   eval_datasets:
   - mw
   seed: 42
   shuffle: true
   shuffle_progress_frames: false
   train_datasets:
+  - rbm-1m-id
   traj_same_source_prob: 0.5
   use_multi_image: true
   use_per_frame_progress_token: true
 debug: false
   wandb_entity: clvr
   wandb_mode: null
   wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
+  wandb_project: robometer
 loss:
   predict_last_frame_progress: false
   progress_discrete_bins: 10
   train_language_model: true
   train_preference_head: true
   train_progress_head: true
   train_success_head: true
   train_vision_encoder: false
   trust_remote_code: true
   - gate_proj
   - up_proj
   - down_proj
+trainer_cls: rbm_heads
 training:
   beta: 0.1
   bf16: true
   per_device_eval_batch_size: 16
   per_device_train_batch_size: 16
   predict_pref_progress: true
   prediction_loss_only: true
   remove_unused_columns: false
   resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000