Robometer-4B / config.yaml
aliangdw's picture
Update config.yaml
20c4320 verified
custom_eval:
comparisons_per_task: 5
confusion_matrix:
- mw
custom_eval_random_seed: 42
eval_types:
- reward_alignment
- policy_ranking
num_examples_per_quality_pr: 5
num_partial_successes: 5
pad_frames: true
policy_ranking:
- rbm-1m-ood
policy_ranking_max_tasks: 100
quality_preference:
- mw
reward_alignment:
- rbm-1m-id
- rbm-1m-ood
reward_alignment_max_trajectories: 10
subsample_n_frames: null
use_frame_steps: true
data:
data_source_weights:
metaworld_train: 1.0
molmoact_dataset_household: 1.0
molmoact_dataset_tabletop: 1.0
oxe_droid: 1.0
roboarena: 1.0
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
dataset_preference_ratio: 0.7
dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
dataset_type: strategy_first
eval_datasets:
- mw
eval_subset_size: null
load_embeddings: false
max_frames: 8
max_frames_after_preprocessing: 64
max_success: 1.0
max_trajectories: -1
min_frames_per_trajectory: 5
min_success: 0.5
partial_success_threshold: 0.2
predict_last_frame_partial_progress: false
preference_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
progress_discrete_bins: 10
progress_loss_type: discrete
progress_pred_type: absolute_wrt_total_frames
progress_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
resized_height: null
resized_width: null
sample_type_ratio:
- 1.0
- 0.0
- 0.0
seed: 42
shuffle: true
shuffle_progress_frames: false
train_datasets:
- rbm-1m-id
traj_same_source_prob: 0.5
use_multi_image: true
use_per_frame_progress_token: true
debug: false
logging:
log_level: debug
log_to:
- wandb
save_best:
greater_is_better:
- true
- true
- true
- true
- true
hub_private: false
hub_save_every: 1000
hub_token: null
keep_top_k: 5
metric_names:
- eval_p_rank/kendall_last_utd_so101_clean_top
- eval_p_rank/kendall_last_usc_xarm
- eval_p_rank/kendall_last_usc_franka
- eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist
- eval_p_rank/kendall_last_usc_trossen
save_every: 250
upload_to_hub: false
save_model: true
save_processor: true
wandb_entity: clvr
wandb_mode: null
wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
wandb_project: robometer
loss:
predict_last_frame_progress: false
progress_discrete_bins: 10
progress_loss_type: discrete
success_positive_weight: 1.0
mode: train
model:
average_temporal_patches: true
base_model_id: Qwen/Qwen3-VL-4B-Instruct
frame_pooling: mean
frame_pooling_attn_temperature: 1.0
model_type: default
peft_vision_encoder: false
progress_discrete_bins: 10
progress_loss_type: discrete
quantization: false
rewind: null
torch_dtype: bfloat16
train_language_model: true
train_preference_head: true
train_progress_head: true
train_success_head: true
train_vision_encoder: false
trust_remote_code: true
use_multi_image: true
use_peft: false
use_per_frame_progress_token: true
use_unsloth: true
peft:
bias: none
lora_alpha: 64
lora_dropout: 0.05
peft_vision_encoder: false
r: 32
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
trainer_cls: rbm_heads
training:
beta: 0.1
bf16: true
custom_eval_steps: 250
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
ddp_bucket_cap_mb: 25
ddp_find_unused_parameters: false
do_eval: true
eval_steps: 250
evaluation_strategy: steps
exp_name: ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins_part2
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 2.0e-05
logging_steps: 1
lr_scheduler_type: cosine
max_grad_norm: 10.0
max_seq_length: 1024
max_steps: 15000
num_gpus: 2
num_train_epochs: -1
output_dir: ./logs
overwrite_output_dir: true
per_device_eval_batch_size: 16
per_device_train_batch_size: 16
predict_pref_progress: true
prediction_loss_only: true
remove_unused_columns: false
resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000
run_default_eval: false
save_steps: 200
save_strategy: 'no'
vision_encoder_lr: 5.0e-06
vision_encoder_num_layers: 3
warmup_ratio: 0.1
warmup_steps: 0
weight_decay: 0.01