aliangdw's picture
Upload config.yaml with huggingface_hub
b001a8d verified
custom_eval:
comparisons_per_task: 5
confusion_matrix:
- mw
custom_eval_random_seed: 42
eval_types:
- reward_alignment
- policy_ranking
max_comparisons: null
num_examples_per_quality_pr: 5
num_partial_successes: 5
policy_ranking:
- libero_pi0_no_fail
policy_ranking_max_tasks: 100
quality_preference:
- mw
reward_alignment:
- libero_pi0_no_fail
reward_alignment_max_trajectories: 10
similarity_score:
- aliangdw_metaworld_metaworld_eval
use_frame_steps: true
data:
data_source_weights:
metaworld_train: 1.0
molmoact_dataset_household: 1.0
molmoact_dataset_tabletop: 1.0
oxe_droid: 1.0
roboarena: 1.0
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
dataset_preference_ratio: 0.7
dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
dataset_type: rfm
eval_datasets:
- libero_pi0_no_fail
eval_subset_size: null
fps: 10
load_embeddings: false
max_frames: 4
max_frames_after_preprocessing: 64
max_success: 1.0
max_trajectories: -1
min_frames_per_trajectory: 1
min_success: 0.5
n_wrong_tasks: 5
num_bins: 10
partial_success_threshold: 0.2
preference_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
progress_discrete_bins: 32
progress_loss_type: l2
progress_pred_type: absolute_wrt_total_frames
progress_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
resized_height: 240
resized_width: 240
sample_type_ratio:
- 0.0
- 1.0
- 0.0
seed: 42
shuffle: true
shuffle_progress_frames: false
similarity_strategy_ratio:
- 1.0
- 1.0
- 1.0
train_datasets:
- libero_pi0_no_fail
traj_same_source_prob: 0.5
use_data_source_balance: true
use_multi_image: true
debug: false
logging:
log_level: debug
log_to:
- wandb
save_best:
greater_is_better:
- true
- true
hub_private: false
hub_save_every: 1000
hub_token: null
keep_top_k: 5
metric_names:
- eval_rew_align/pearson_mw_eval
- eval_p_rank/spearman_mw_eval
save_every: 500
upload_to_hub: false
save_model: true
save_processor: true
wandb_entity: clvr
wandb_mode: null
wandb_notes: libero prog only
wandb_project: rfm
loss:
predict_last_frame_progress: false
progress_discrete_bins: 32
progress_loss_type: l2
success_positive_weight: 1.0
mode: train
model:
average_temporal_patches: true
base_model_id: Qwen/Qwen3-VL-4B-Instruct
causal_mask: false
model_type: default
peft_vision_encoder: false
progress_discrete_bins: 32
progress_loss_type: l2
quantization: false
rewind: null
rewind_scale_model: false
torch_dtype: bfloat16
train_language_model: true
train_preference_head: false
train_progress_head: true
train_similarity_head: false
train_success_head: false
train_vision_encoder: false
trust_remote_code: true
use_multi_image: true
use_peft: false
use_progress_token: false
use_unsloth: true
peft:
bias: none
lora_alpha: 64
lora_dropout: 0.05
r: 32
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
trainer_cls: rfm_heads
training:
beta: 0.1
bf16: true
custom_eval_steps: 500
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
ddp_bucket_cap_mb: 25
ddp_find_unused_parameters: false
do_eval: true
eval_steps: 500
evaluation_strategy: steps
exp_name: libero_ablation_prog_4frames_fixdata
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 2.5e-05
logging_steps: 1
lr_scheduler_type: cosine
max_grad_norm: 10.0
max_seq_length: 1024
max_steps: 5000
num_gpus: 2
num_train_epochs: -1
output_dir: ./logs
overwrite_output_dir: true
per_device_eval_batch_size: 64
per_device_train_batch_size: 64
predict_pref_progress: false
predict_pref_sim: false
predict_sim_progress: false
prediction_loss_only: true
remove_unused_columns: false
resume_from_checkpoint: null
run_default_eval: false
save_steps: 200
save_strategy: 'no'
vision_encoder_lr: 1.0e-05
vision_encoder_num_layers: 3
warmup_ratio: 0.1
warmup_steps: 0
weight_decay: 0.05