File size: 4,527 Bytes
01f02fa
 
 
 
 
 
 
 
 
 
 
 
e051c92
01f02fa
 
 
 
e051c92
 
01f02fa
 
 
 
 
 
 
 
 
 
 
 
 
 
e051c92
01f02fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e051c92
01f02fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e051c92
01f02fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e051c92
01f02fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
custom_eval:
  comparisons_per_task: 5
  confusion_matrix:
  - mw
  custom_eval_random_seed: 42
  eval_types:
  - reward_alignment
  - policy_ranking
  num_examples_per_quality_pr: 5
  num_partial_successes: 5
  pad_frames: true
  policy_ranking:
  - rbm-1m-ood
  policy_ranking_max_tasks: 100
  quality_preference:
  - mw
  reward_alignment:
  - rbm-1m-id
  - rbm-1m-ood
  reward_alignment_max_trajectories: 10
  subsample_n_frames: null
  use_frame_steps: true
data:
  data_source_weights:
    metaworld_train: 1.0
    molmoact_dataset_household: 1.0
    molmoact_dataset_tabletop: 1.0
    oxe_droid: 1.0
    roboarena: 1.0
  dataloader_num_workers: 8
  dataloader_persistent_workers: true
  dataloader_pin_memory: true
  dataset_preference_ratio: 0.7
  dataset_success_cutoff_file: robometer/data/dataset_success_cutoff.txt
  dataset_type: strategy_first
  eval_datasets:
  - mw
  eval_subset_size: null
  load_embeddings: false
  max_frames: 8
  max_frames_after_preprocessing: 64
  max_success: 1.0
  max_trajectories: -1
  min_frames_per_trajectory: 5
  min_success: 0.5
  partial_success_threshold: 0.2
  predict_last_frame_partial_progress: false
  preference_strategy_ratio:
  - 1.0
  - 1.0
  - 1.0
  - 1.0
  progress_discrete_bins: 10
  progress_loss_type: discrete
  progress_pred_type: absolute_wrt_total_frames
  progress_strategy_ratio:
  - 1.0
  - 1.0
  - 1.0
  - 1.0
  resized_height: null
  resized_width: null
  sample_type_ratio:
  - 1.0
  - 0.0
  - 0.0
  seed: 42
  shuffle: true
  shuffle_progress_frames: false
  train_datasets:
  - rbm-1m-id
  traj_same_source_prob: 0.5
  use_multi_image: true
  use_per_frame_progress_token: true
debug: false
logging:
  log_level: debug
  log_to:
  - wandb
  save_best:
    greater_is_better:
    - true
    - true
    - true
    - true
    - true
    hub_private: false
    hub_save_every: 1000
    hub_token: null
    keep_top_k: 5
    metric_names:
    - eval_p_rank/kendall_last_utd_so101_clean_top
    - eval_p_rank/kendall_last_usc_xarm
    - eval_p_rank/kendall_last_usc_franka
    - eval_p_rank/kendall_last_rfm_new_mit_franka_nowrist
    - eval_p_rank/kendall_last_usc_trossen
    save_every: 250
    upload_to_hub: false
  save_model: true
  save_processor: true
  wandb_entity: clvr
  wandb_mode: null
  wandb_notes: all run with prog_token per frame, qwen 4b, discrete progress, 10 bins
  wandb_project: robometer
loss:
  predict_last_frame_progress: false
  progress_discrete_bins: 10
  progress_loss_type: discrete
  success_positive_weight: 1.0
mode: train
model:
  average_temporal_patches: true
  base_model_id: Qwen/Qwen3-VL-4B-Instruct
  frame_pooling: mean
  frame_pooling_attn_temperature: 1.0
  model_type: default
  peft_vision_encoder: false
  progress_discrete_bins: 10
  progress_loss_type: discrete
  quantization: false
  rewind: null
  torch_dtype: bfloat16
  train_language_model: true
  train_preference_head: true
  train_progress_head: true
  train_success_head: true
  train_vision_encoder: false
  trust_remote_code: true
  use_multi_image: true
  use_peft: false
  use_per_frame_progress_token: true
  use_unsloth: true
peft:
  bias: none
  lora_alpha: 64
  lora_dropout: 0.05
  peft_vision_encoder: false
  r: 32
  target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - gate_proj
  - up_proj
  - down_proj
trainer_cls: rbm_heads
training:
  beta: 0.1
  bf16: true
  custom_eval_steps: 250
  dataloader_num_workers: 8
  dataloader_persistent_workers: true
  dataloader_pin_memory: true
  ddp_bucket_cap_mb: 25
  ddp_find_unused_parameters: false
  do_eval: true
  eval_steps: 250
  evaluation_strategy: steps
  exp_name: ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins_part2
  fp16: false
  gradient_accumulation_steps: 1
  gradient_checkpointing: true
  learning_rate: 2.0e-05
  logging_steps: 1
  lr_scheduler_type: cosine
  max_grad_norm: 10.0
  max_seq_length: 1024
  max_steps: 15000
  num_gpus: 2
  num_train_epochs: -1
  output_dir: ./logs
  overwrite_output_dir: true
  per_device_eval_batch_size: 16
  per_device_train_batch_size: 16
  predict_pref_progress: true
  prediction_loss_only: true
  remove_unused_columns: false
  resume_from_checkpoint: /gpfs/home/jessezha/scrubbed_storage/reward_fm/logs/ant_rfm_qwen4b_4gpu_bs16_pref_prog_succ_8_frames_all_discrete_10_bins/ckpt-avg-5metrics=0.6973_step=3000
  run_default_eval: false
  save_steps: 200
  save_strategy: 'no'
  vision_encoder_lr: 5.0e-06
  vision_encoder_num_layers: 3
  warmup_ratio: 0.1
  warmup_steps: 0
  weight_decay: 0.01