File size: 4,395 Bytes
e6740d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
custom_eval:
comparisons_per_task: 5
confusion_matrix:
- aliangdw_metaworld_metaworld_eval
eval_types:
- reward_alignment
- policy_ranking
num_examples_per_quality_pr: 5
policy_ranking:
- aliangdw_metaworld_metaworld_eval
- aliangdw_utd_so101_policy_ranking_utd_so101_policy_ranking
- aliangdw_usc_franka_policy_ranking_usc_franka_policy_ranking
- aliangdw_usc_xarm_policy_ranking_usc_xarm_policy_ranking
- jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
quality_preference:
- mw
reward_alignment:
- jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
- aliangdw_metaworld_metaworld_eval
similarity_score:
- aliangdw_metaworld_metaworld_eval
data:
data_source_weights:
metaworld_train: 1.0
roboarena: 1.0
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
dataset_preference_ratio: 0.7
dataset_success_cutoff_file: rfm/data/dataset_success_cutoff.txt
dataset_type: rfm
eval_datasets:
- jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
eval_subset_size: null
fps: 10
load_embeddings: true
max_frames: 8
max_frames_after_preprocessing: 64
max_success: 1.0
max_trajectories: -1
min_frames_per_trajectory: 10
min_success: 0.8
n_wrong_tasks: 5
num_bins: 10
pairwise_progress: false
preference_strategy_ratio:
- 6.0
- 1.0
- 1.0
progress_pred_type: absolute_wrt_total_frames
progress_strategy_ratio:
- 1.0
- 2.0
- 1.0
- 1.0
- 1.0
resized_height: 196
resized_width: 196
rewind_lengths: null
roboarena_partial_success_threshold: 0.2
sample_type_ratio:
- 1.0
- 0.0
- 0.0
samples_per_trajectory: 1
seed: 42
shuffle: true
shuffle_progress_frames: false
similarity_strategy_ratio:
- 1.0
- 1.0
- 1.0
task_instruction_same_source_prob: 0.5
train_datasets:
- jesbu1_roboarena_eval_debug_nowrist_roboarena_eval_debug_nowrist
use_multi_image: true
use_uniform_sampling: false
debug: false
logging:
log_level: INFO
log_to:
- wandb
save_best:
greater_is_better:
- true
- true
hub_private: false
hub_save_every: 250
hub_token: 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
keep_top_k: 5
metric_names:
- eval_rew_align/pearson_mw_eval
- eval_p_rank/spearman_mw_eval
save_every: 250
upload_to_hub: false
save_model: true
save_processor: true
wandb_entity: clvr
wandb_mode: null
wandb_notes: training RFM
wandb_project: rfm
loss:
predict_last_frame_progress: false
success_positive_weight: 6.0
mode: train
model:
average_temporal_patches: false
base_model_id: rewind_scale_transformer
causal_mask: false
model_type: default
pairwise_progress: false
peft_vision_encoder: false
quantization: false
rewind: null
rewind_scale_model: true
torch_dtype: bfloat16
train_language_model: false
train_preference_head: true
train_progress_head: true
train_similarity_head: false
train_success_head: false
train_vision_encoder: false
trust_remote_code: true
use_multi_image: true
use_peft: false
use_progress_token: false
use_unsloth: false
peft:
bias: none
lora_alpha: 64
lora_dropout: 0.05
r: 32
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
trainer_cls: rewind_scale_transformer
training:
beta: 0.1
bf16: true
custom_eval_steps: 250
dataloader_num_workers: 8
dataloader_persistent_workers: true
dataloader_pin_memory: true
ddp_bucket_cap_mb: 25
ddp_find_unused_parameters: false
do_eval: true
eval_steps: 250
evaluation_strategy: steps
exp_name: rewind_scale_Progress_Pref_test_save
fp16: false
gradient_accumulation_steps: 1
gradient_checkpointing: false
learning_rate: 2.0e-05
logging_steps: 1
lr_scheduler_type: cosine
max_grad_norm: 10.0
max_seq_length: 1024
max_steps: 100000
num_gpus: 2
num_train_epochs: -1
output_dir: ./rewind_logs
overwrite_output_dir: true
per_device_eval_batch_size: 16
per_device_train_batch_size: 1024
predict_pref_progress: true
predict_sim_progress: true
prediction_loss_only: true
remove_unused_columns: false
resume_from_checkpoint: null
run_default_eval: false
save_steps: 250
save_strategy: 'no'
vision_encoder_lr: 1.0e-05
vision_encoder_num_layers: 3
warmup_ratio: 0.1
warmup_steps: 0
weight_decay: 0.1
|