custom_eval: comparisons_per_task: 5 confusion_matrix: - mw custom_eval_random_seed: 42 eval_types: - reward_alignment - policy_ranking max_comparisons: null num_examples_per_quality_pr: 5 num_partial_successes: 5 policy_ranking: - libero_pi0_no_fail policy_ranking_max_tasks: 100 quality_preference: - mw reward_alignment: - libero_pi0_no_fail reward_alignment_max_trajectories: 10 similarity_score: - aliangdw_metaworld_metaworld_eval use_frame_steps: true data: data_source_weights: metaworld_train: 1.0 molmoact_dataset_household: 1.0 molmoact_dataset_tabletop: 1.0 oxe_droid: 1.0 roboarena: 1.0 dataloader_num_workers: 8 dataloader_persistent_workers: true dataloader_pin_memory: true dataset_preference_ratio: 0.7 dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt dataset_type: rfm eval_datasets: - libero_pi0_no_fail eval_subset_size: null fps: 10 load_embeddings: false max_frames: 4 max_frames_after_preprocessing: 64 max_success: 1.0 max_trajectories: -1 min_frames_per_trajectory: 1 min_success: 0.5 n_wrong_tasks: 5 num_bins: 10 partial_success_threshold: 0.2 preference_strategy_ratio: - 1.0 - 1.0 - 1.0 - 1.0 progress_discrete_bins: 32 progress_loss_type: l2 progress_pred_type: absolute_wrt_total_frames progress_strategy_ratio: - 1.0 - 1.0 - 1.0 - 1.0 resized_height: 240 resized_width: 240 sample_type_ratio: - 0.0 - 1.0 - 0.0 seed: 42 shuffle: true shuffle_progress_frames: false similarity_strategy_ratio: - 1.0 - 1.0 - 1.0 train_datasets: - libero_pi0_no_fail traj_same_source_prob: 0.5 use_data_source_balance: true use_multi_image: true debug: false logging: log_level: debug log_to: - wandb save_best: greater_is_better: - true - true hub_private: false hub_save_every: 1000 hub_token: null keep_top_k: 5 metric_names: - eval_rew_align/pearson_mw_eval - eval_p_rank/spearman_mw_eval save_every: 500 upload_to_hub: false save_model: true save_processor: true wandb_entity: clvr wandb_mode: null wandb_notes: libero prog only wandb_project: rfm loss: predict_last_frame_progress: false progress_discrete_bins: 32 progress_loss_type: l2 success_positive_weight: 1.0 mode: train model: average_temporal_patches: true base_model_id: Qwen/Qwen3-VL-4B-Instruct causal_mask: false model_type: default peft_vision_encoder: false progress_discrete_bins: 32 progress_loss_type: l2 quantization: false rewind: null rewind_scale_model: false torch_dtype: bfloat16 train_language_model: true train_preference_head: false train_progress_head: true train_similarity_head: false train_success_head: false train_vision_encoder: false trust_remote_code: true use_multi_image: true use_peft: false use_progress_token: false use_unsloth: true peft: bias: none lora_alpha: 64 lora_dropout: 0.05 r: 32 target_modules: - q_proj - k_proj - v_proj - o_proj - gate_proj - up_proj - down_proj trainer_cls: rfm_heads training: beta: 0.1 bf16: true custom_eval_steps: 500 dataloader_num_workers: 8 dataloader_persistent_workers: true dataloader_pin_memory: true ddp_bucket_cap_mb: 25 ddp_find_unused_parameters: false do_eval: true eval_steps: 500 evaluation_strategy: steps exp_name: libero_ablation_prog_4frames_fixdata fp16: false gradient_accumulation_steps: 1 gradient_checkpointing: true learning_rate: 2.5e-05 logging_steps: 1 lr_scheduler_type: cosine max_grad_norm: 10.0 max_seq_length: 1024 max_steps: 5000 num_gpus: 2 num_train_epochs: -1 output_dir: ./logs overwrite_output_dir: true per_device_eval_batch_size: 64 per_device_train_batch_size: 64 predict_pref_progress: false predict_pref_sim: false predict_sim_progress: false prediction_loss_only: true remove_unused_columns: false resume_from_checkpoint: null run_default_eval: false save_steps: 200 save_strategy: 'no' vision_encoder_lr: 1.0e-05 vision_encoder_num_layers: 3 warmup_ratio: 0.1 warmup_steps: 0 weight_decay: 0.05