custom_eval: comparisons_per_task: 5 confusion_matrix: - mw custom_eval_random_seed: 42 eval_types: - reward_alignment - policy_ranking num_examples_per_quality_pr: 5 num_partial_successes: 5 pad_frames: true policy_ranking: - rbm-1m-ood policy_ranking_max_tasks: 100 quality_preference: - mw reward_alignment: - rbm-1m-id - rbm-1m-ood reward_alignment_max_trajectories: 10 subsample_n_frames: null use_frame_steps: true data: data_source_weights: metaworld_train: 1.0 molmoact_dataset_household: 1.0 molmoact_dataset_tabletop: 1.0 oxe_droid: 1.0 roboarena: 1.0 dataloader_num_workers: 8 dataloader_persistent_workers: true dataloader_pin_memory: true dataset_preference_ratio: 0.7 dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt dataset_type: strategy_first eval_datasets: - mw eval_subset_size: null load_embeddings: false max_frames: 8 max_frames_after_preprocessing: 64 max_success: 1.0 max_trajectories: -1 min_frames_per_trajectory: 5 min_success: 0.5 partial_success_threshold: 0.2 predict_last_frame_partial_progress: false preference_strategy_ratio: - 1.0 - 1.0 - 1.0 - 1.0 progress_discrete_bins: 10 progress_loss_type: discrete progress_pred_type: absolute_wrt_total_frames progress_strategy_ratio: - 1.0 - 1.0 - 1.0 - 1.0 resized_height: null resized_width: null sample_type_ratio: - 1.0 - 0.0 - 0.0 seed: 42 shuffle: true shuffle_progress_frames: false train_datasets: - rbm-1m-id traj_same_source_prob: 0.5 use_multi_image: true use_per_frame_progress_token: true debug: false logging: log_level: debug log_to: - wandb save_best: greater_is_better: - true - true - true - true - true hub_private: false hub_save_every: 1000 hub_token: null keep_top_k: 5 metric_names: - eval_p_rank/kendall_last_utd_so101_clean_top - eval_p_rank/kendall_last_usc_xarm - eval_p_rank/kendall_last_usc_franka - eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist - eval_p_rank/kendall_last_usc_trossen save_every: 250 upload_to_hub: false save_model: true save_processor: true wandb_entity: clvr wandb_mode: null wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins wandb_project: robometer loss: predict_last_frame_progress: false progress_discrete_bins: 10 progress_loss_type: discrete success_positive_weight: 1.0 mode: train model: average_temporal_patches: true base_model_id: Qwen/Qwen3-VL-4B-Instruct frame_pooling: mean frame_pooling_attn_temperature: 1.0 model_type: default peft_vision_encoder: false progress_discrete_bins: 10 progress_loss_type: discrete quantization: false rewind: null torch_dtype: bfloat16 train_language_model: true train_preference_head: true train_progress_head: true train_success_head: true train_vision_encoder: false trust_remote_code: true use_multi_image: true use_peft: false use_per_frame_progress_token: true use_unsloth: true peft: bias: none lora_alpha: 64 lora_dropout: 0.05 peft_vision_encoder: false r: 32 target_modules: - q_proj - k_proj - v_proj - o_proj - gate_proj - up_proj - down_proj trainer_cls: rbm_heads training: beta: 0.1 bf16: true custom_eval_steps: 250 dataloader_num_workers: 8 dataloader_persistent_workers: true dataloader_pin_memory: true ddp_bucket_cap_mb: 25 ddp_find_unused_parameters: false do_eval: true eval_steps: 250 evaluation_strategy: steps exp_name: ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins_part2 fp16: false gradient_accumulation_steps: 1 gradient_checkpointing: true learning_rate: 2.0e-05 logging_steps: 1 lr_scheduler_type: cosine max_grad_norm: 10.0 max_seq_length: 1024 max_steps: 15000 num_gpus: 2 num_train_epochs: -1 output_dir: ./logs overwrite_output_dir: true per_device_eval_batch_size: 16 per_device_train_batch_size: 16 predict_pref_progress: true prediction_loss_only: true remove_unused_columns: false resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000 run_default_eval: false save_steps: 200 save_strategy: 'no' vision_encoder_lr: 5.0e-06 vision_encoder_num_layers: 3 warmup_ratio: 0.1 warmup_steps: 0 weight_decay: 0.01