amburger66's picture
Upload config.yaml with huggingface_hub
8171912 verified
custom_eval:
comparisons_per_task: 5
confusion_matrix:
- mw
custom_eval_random_seed: 42
eval_types:
- reward_alignment
- policy_ranking
max_comparisons: null
num_examples_per_quality_pr: 5
num_partial_successes: 5
pad_frames: true
policy_ranking:
- amburger66_robotsmith_rbm_robotsmith
policy_ranking_max_tasks: 100
quality_preference:
- mw
reward_alignment:
- amburger66_robotsmith_rbm_robotsmith
reward_alignment_max_trajectories: 10
subsample_n_frames: null
use_frame_steps: true
data:
data_source_weights: null
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
dataset_preference_ratio: 0.7
dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
dataset_type: rbm
eval_datasets:
- amburger66_robotsmith_rbm_robotsmith
eval_subset_size: null
load_embeddings: false
max_frames: 16
max_frames_after_preprocessing: 64
max_success: 1.0
max_trajectories: -1
min_frames_per_trajectory: 5
min_success: 0.5
partial_success_threshold: 0.2
predict_last_frame_partial_progress: false
preference_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
progress_discrete_bins: 10
progress_loss_type: discrete
progress_pred_type: absolute_first_frame
progress_strategy_ratio:
- 1.0
- 1.0
- 1.0
- 1.0
resized_height: null
resized_width: null
sample_type_ratio:
- 1.0
- 0.0
- 0.0
seed: 42
shuffle: true
shuffle_progress_frames: false
train_datasets:
- amburger66_robotsmith_rbm_robotsmith
traj_same_source_prob: 0.5
use_multi_image: true
use_per_frame_progress_token: true
debug: false
logging:
log_level: INFO
log_to:
- wandb
save_best:
greater_is_better:
- true
- true
hub_private: false
hub_save_every: 1000
hub_token: null
keep_top_k: 5
metric_names:
- eval_rew_align/pearson_robotsmith
- eval_p_rank/kendall_last_robotsmith
save_every: 1000
upload_to_hub: false
save_model: true
save_processor: true
wandb_entity: r-pad
wandb_mode: null
wandb_notes: fine-tuning Robometer on RobotSmith
wandb_project: rbm-finetune-robotsmith
loss:
predict_last_frame_progress: false
progress_discrete_bins: 10
progress_loss_type: discrete
success_positive_weight: 1.0
mode: train
model:
average_temporal_patches: true
base_model_id: Qwen/Qwen3-VL-4B-Instruct
frame_pooling: mean
frame_pooling_attn_temperature: 1.0
model_type: default
peft_vision_encoder: false
progress_discrete_bins: 10
progress_loss_type: discrete
quantization: false
rewind: null
torch_dtype: bfloat16
train_language_model: true
train_preference_head: true
train_progress_head: true
train_success_head: true
train_vision_encoder: false
trust_remote_code: true
use_multi_image: true
use_peft: true
use_per_frame_progress_token: true
use_unsloth: true
peft:
bias: none
lora_alpha: 64
lora_dropout: 0.05
peft_vision_encoder: false
r: 32
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
trainer_cls: rbm_heads
training:
beta: 0.1
bf16: true
custom_eval_steps: 50
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
ddp_bucket_cap_mb: 25
ddp_find_unused_parameters: false
do_eval: true
eval_steps: 50
evaluation_strategy: steps
exp_name: lora_task03_data_fixed
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: true
learning_rate: 2.0e-05
load_from_checkpoint: robometer/Robometer-4B
logging_steps: 1
lr_scheduler_type: cosine
max_grad_norm: 10.0
max_seq_length: 1024
max_steps: 1000
num_gpus: 2
num_train_epochs: -1
output_dir: /data/robometer/logs
overwrite_output_dir: true
per_device_eval_batch_size: 16
per_device_train_batch_size: 8
predict_pref_progress: true
prediction_loss_only: true
remove_unused_columns: false
resume_from_checkpoint: null
run_default_eval: false
save_steps: 200
save_strategy: 'no'
vision_encoder_lr: 1.0e-05
vision_encoder_num_layers: 3
warmup_ratio: 0.1
warmup_steps: 0
weight_decay: 0.01