File size: 4,527 Bytes
01f02fa e051c92 01f02fa e051c92 01f02fa e051c92 01f02fa e051c92 01f02fa e051c92 01f02fa e051c92 01f02fa | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 | custom_eval:
comparisons_per_task: 5
confusion_matrix:
- mw
custom_eval_random_seed: 42
eval_types:
- reward_alignment
- policy_ranking
num_examples_per_quality_pr: 5
num_partial_successes: 5
pad_frames: true
policy_ranking:
- rbm-1m-ood
policy_ranking_max_tasks: 100
quality_preference:
- mw
reward_alignment:
- rbm-1m-id
- rbm-1m-ood
reward_alignment_max_trajectories: 10
subsample_n_frames: null
use_frame_steps: true
data:
data_source_weights:
metaworld_train: 1.0
molmoact_dataset_household: 1.0
molmoact_dataset_tabletop: 1.0
oxe_droid: 1.0
roboarena: 1.0
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
dataset_preference_ratio: 0.7
dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
dataset_type: strategy_first
eval_datasets:
- mw
eval_subset_size: null
load_embeddings: false
max_frames: 8
max_frames_after_preprocessing: 64
max_success: 1.0
max_trajectories: -1
min_frames_per_trajectory: 5
min_success: 0.5
partial_success_threshold: 0.2
predict_last_frame_partial_progress: false
preference_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
progress_discrete_bins: 10
progress_loss_type: discrete
progress_pred_type: absolute_wrt_total_frames
progress_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
resized_height: null
resized_width: null
sample_type_ratio:
- 1.0
- 0.0
- 0.0
seed: 42
shuffle: true
shuffle_progress_frames: false
train_datasets:
- rbm-1m-id
traj_same_source_prob: 0.5
use_multi_image: true
use_per_frame_progress_token: true
debug: false
logging:
log_level: debug
log_to:
- wandb
save_best:
greater_is_better:
- true
- true
- true
- true
- true
hub_private: false
hub_save_every: 1000
hub_token: null
keep_top_k: 5
metric_names:
- eval_p_rank/kendall_last_utd_so101_clean_top
- eval_p_rank/kendall_last_usc_xarm
- eval_p_rank/kendall_last_usc_franka
- eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist
- eval_p_rank/kendall_last_usc_trossen
save_every: 250
upload_to_hub: false
save_model: true
save_processor: true
wandb_entity: clvr
wandb_mode: null
wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
wandb_project: robometer
loss:
predict_last_frame_progress: false
progress_discrete_bins: 10
progress_loss_type: discrete
success_positive_weight: 1.0
mode: train
model:
average_temporal_patches: true
base_model_id: Qwen/Qwen3-VL-4B-Instruct
frame_pooling: mean
frame_pooling_attn_temperature: 1.0
model_type: default
peft_vision_encoder: false
progress_discrete_bins: 10
progress_loss_type: discrete
quantization: false
rewind: null
torch_dtype: bfloat16
train_language_model: true
train_preference_head: true
train_progress_head: true
train_success_head: true
train_vision_encoder: false
trust_remote_code: true
use_multi_image: true
use_peft: false
use_per_frame_progress_token: true
use_unsloth: true
peft:
bias: none
lora_alpha: 64
lora_dropout: 0.05
peft_vision_encoder: false
r: 32
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
trainer_cls: rbm_heads
training:
beta: 0.1
bf16: true
custom_eval_steps: 250
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
ddp_bucket_cap_mb: 25
ddp_find_unused_parameters: false
do_eval: true
eval_steps: 250
evaluation_strategy: steps
exp_name: ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins_part2
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 2.0e-05
logging_steps: 1
lr_scheduler_type: cosine
max_grad_norm: 10.0
max_seq_length: 1024
max_steps: 15000
num_gpus: 2
num_train_epochs: -1
output_dir: ./logs
overwrite_output_dir: true
per_device_eval_batch_size: 16
per_device_train_batch_size: 16
predict_pref_progress: true
prediction_loss_only: true
remove_unused_columns: false
resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000
run_default_eval: false
save_steps: 200
save_strategy: 'no'
vision_encoder_lr: 5.0e-06
vision_encoder_num_layers: 3
warmup_ratio: 0.1
warmup_steps: 0
weight_decay: 0.01
|