custom_eval: comparisons_per_task: 5 confusion_matrix: - aliangdw_metaworld_metaworld_eval eval_types: - reward_alignment - policy_ranking num_examples_per_quality_pr: 5 policy_ranking: - aliangdw_metaworld_metaworld_eval - aliangdw_utd_so101_policy_ranking_utd_so101_policy_ranking - aliangdw_usc_franka_policy_ranking_usc_franka_policy_ranking - aliangdw_usc_xarm_policy_ranking_usc_xarm_policy_ranking - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist quality_preference: - mw reward_alignment: - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist - aliangdw_metaworld_metaworld_eval similarity_score: - aliangdw_metaworld_metaworld_eval data: data_source_weights: metaworld_train: 1.0 roboarena: 1.0 dataloader_num_workers: 8 dataloader_persistent_workers: true dataloader_pin_memory: true dataset_preference_ratio: 0.7 dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt dataset_type: rfm eval_datasets: - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist eval_subset_size: null fps: 10 load_embeddings: true max_frames: 8 max_frames_after_preprocessing: 64 max_success: 1.0 max_trajectories: -1 min_frames_per_trajectory: 10 min_success: 0.8 n_wrong_tasks: 5 num_bins: 10 pairwise_progress: false preference_strategy_ratio: - 6.0 - 1.0 - 1.0 progress_pred_type: absolute_wrt_total_frames progress_strategy_ratio: - 1.0 - 2.0 - 1.0 - 1.0 - 1.0 resized_height: 196 resized_width: 196 rewind_lengths: null roboarena_partial_success_threshold: 0.2 sample_type_ratio: - 1.0 - 0.0 - 0.0 samples_per_trajectory: 1 seed: 42 shuffle: true shuffle_progress_frames: false similarity_strategy_ratio: - 1.0 - 1.0 - 1.0 task_instruction_same_source_prob: 0.5 train_datasets: - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist use_multi_image: true use_uniform_sampling: false debug: false logging: log_level: INFO log_to: - wandb save_best: greater_is_better: - true - true hub_private: false hub_save_every: 250 hub_token: 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 keep_top_k: 5 metric_names: - eval_rew_align/pearson_mw_eval - eval_p_rank/spearman_mw_eval save_every: 250 upload_to_hub: false save_model: true save_processor: true wandb_entity: clvr wandb_mode: null wandb_notes: training RFM wandb_project: rfm loss: predict_last_frame_progress: false success_positive_weight: 6.0 mode: train model: average_temporal_patches: false base_model_id: rewind_scale_transformer causal_mask: false model_type: default pairwise_progress: false peft_vision_encoder: false quantization: false rewind: null rewind_scale_model: true torch_dtype: bfloat16 train_language_model: false train_preference_head: true train_progress_head: true train_similarity_head: false train_success_head: false train_vision_encoder: false trust_remote_code: true use_multi_image: true use_peft: false use_progress_token: false use_unsloth: false peft: bias: none lora_alpha: 64 lora_dropout: 0.05 r: 32 target_modules: - q_proj - k_proj - v_proj - o_proj - gate_proj - up_proj - down_proj trainer_cls: rewind_scale_transformer training: beta: 0.1 bf16: true custom_eval_steps: 250 dataloader_num_workers: 8 dataloader_persistent_workers: true dataloader_pin_memory: true ddp_bucket_cap_mb: 25 ddp_find_unused_parameters: false do_eval: true eval_steps: 250 evaluation_strategy: steps exp_name: rewind_scale_Progress_Pref_test_save fp16: false gradient_accumulation_steps: 1 gradient_checkpointing: false learning_rate: 2.0e-05 logging_steps: 1 lr_scheduler_type: cosine max_grad_norm: 10.0 max_seq_length: 1024 max_steps: 100000 num_gpus: 2 num_train_epochs: -1 output_dir: ./rewind_logs overwrite_output_dir: true per_device_eval_batch_size: 16 per_device_train_batch_size: 1024 predict_pref_progress: true predict_sim_progress: true prediction_loss_only: true remove_unused_columns: false resume_from_checkpoint: null run_default_eval: false save_steps: 250 save_strategy: 'no' vision_encoder_lr: 1.0e-05 vision_encoder_num_layers: 3 warmup_ratio: 0.1 warmup_steps: 0 weight_decay: 0.1