File size: 4,395 Bytes
e6740d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
custom_eval:
  comparisons_per_task: 5
  confusion_matrix:
  - aliangdw_metaworld_metaworld_eval
  eval_types:
  - reward_alignment
  - policy_ranking
  num_examples_per_quality_pr: 5
  policy_ranking:
  - aliangdw_metaworld_metaworld_eval
  - aliangdw_utd_so101_policy_ranking_utd_so101_policy_ranking
  - aliangdw_usc_franka_policy_ranking_usc_franka_policy_ranking
  - aliangdw_usc_xarm_policy_ranking_usc_xarm_policy_ranking
  - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
  quality_preference:
  - mw
  reward_alignment:
  - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
  - aliangdw_metaworld_metaworld_eval
  similarity_score:
  - aliangdw_metaworld_metaworld_eval
data:
  data_source_weights:
    metaworld_train: 1.0
    roboarena: 1.0
  dataloader_num_workers: 8
  dataloader_persistent_workers: true
  dataloader_pin_memory: true
  dataset_preference_ratio: 0.7
  dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
  dataset_type: rfm
  eval_datasets:
  - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
  eval_subset_size: null
  fps: 10
  load_embeddings: true
  max_frames: 8
  max_frames_after_preprocessing: 64
  max_success: 1.0
  max_trajectories: -1
  min_frames_per_trajectory: 10
  min_success: 0.8
  n_wrong_tasks: 5
  num_bins: 10
  pairwise_progress: false
  preference_strategy_ratio:
  - 6.0
  - 1.0
  - 1.0
  progress_pred_type: absolute_wrt_total_frames
  progress_strategy_ratio:
  - 1.0
  - 2.0
  - 1.0
  - 1.0
  - 1.0
  resized_height: 196
  resized_width: 196
  rewind_lengths: null
  roboarena_partial_success_threshold: 0.2
  sample_type_ratio:
  - 1.0
  - 0.0
  - 0.0
  samples_per_trajectory: 1
  seed: 42
  shuffle: true
  shuffle_progress_frames: false
  similarity_strategy_ratio:
  - 1.0
  - 1.0
  - 1.0
  task_instruction_same_source_prob: 0.5
  train_datasets:
  - jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
  use_multi_image: true
  use_uniform_sampling: false
debug: false
logging:
  log_level: INFO
  log_to:
  - wandb
  save_best:
    greater_is_better:
    - true
    - true
    hub_private: false
    hub_save_every: 250
    hub_token: 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
    keep_top_k: 5
    metric_names:
    - eval_rew_align/pearson_mw_eval
    - eval_p_rank/spearman_mw_eval
    save_every: 250
    upload_to_hub: false
  save_model: true
  save_processor: true
  wandb_entity: clvr
  wandb_mode: null
  wandb_notes: training RFM
  wandb_project: rfm
loss:
  predict_last_frame_progress: false
  success_positive_weight: 6.0
mode: train
model:
  average_temporal_patches: false
  base_model_id: rewind_scale_transformer
  causal_mask: false
  model_type: default
  pairwise_progress: false
  peft_vision_encoder: false
  quantization: false
  rewind: null
  rewind_scale_model: true
  torch_dtype: bfloat16
  train_language_model: false
  train_preference_head: true
  train_progress_head: true
  train_similarity_head: false
  train_success_head: false
  train_vision_encoder: false
  trust_remote_code: true
  use_multi_image: true
  use_peft: false
  use_progress_token: false
  use_unsloth: false
peft:
  bias: none
  lora_alpha: 64
  lora_dropout: 0.05
  r: 32
  target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
  - gate_proj
  - up_proj
  - down_proj
trainer_cls: rewind_scale_transformer
training:
  beta: 0.1
  bf16: true
  custom_eval_steps: 250
  dataloader_num_workers: 8
  dataloader_persistent_workers: true
  dataloader_pin_memory: true
  ddp_bucket_cap_mb: 25
  ddp_find_unused_parameters: false
  do_eval: true
  eval_steps: 250
  evaluation_strategy: steps
  exp_name: rewind_scale_Progress_Pref_test_save
  fp16: false
  gradient_accumulation_steps: 1
  gradient_checkpointing: false
  learning_rate: 2.0e-05
  logging_steps: 1
  lr_scheduler_type: cosine
  max_grad_norm: 10.0
  max_seq_length: 1024
  max_steps: 100000
  num_gpus: 2
  num_train_epochs: -1
  output_dir: ./rewind_logs
  overwrite_output_dir: true
  per_device_eval_batch_size: 16
  per_device_train_batch_size: 1024
  predict_pref_progress: true
  predict_sim_progress: true
  prediction_loss_only: true
  remove_unused_columns: false
  resume_from_checkpoint: null
  run_default_eval: false
  save_steps: 250
  save_strategy: 'no'
  vision_encoder_lr: 1.0e-05
  vision_encoder_num_layers: 3
  warmup_ratio: 0.1
  warmup_steps: 0
  weight_decay: 0.1