lsnu's picture
Add files using upload-large-folder tool
5ce8761 verified
main_dir=Peract
DATA_PATH=/data/user_data/ngkanats
train_data_dir=$DATA_PATH/zarr_datasets/peract/train.zarr/
eval_data_dir=$DATA_PATH/zarr_datasets/peract/val.zarr/
train_instructions=instructions/peract/instructions.json
val_instructions=instructions/peract/instructions.json
dataset=PeractTwoCam
num_workers=4
B=64 # we used 64 but you can use as low as 16 without much performance drop - it's much faster
B_val=64
chunk_size=1
memory_limit=8 # this means 8GB CPU RAM per worker per GPU,
# but it will never reach that, because these datasets are small
# reduce this if you can't allocate more than 96GB of CPU memory
# Training/testing arguments
val_freq=4000
eval_only=false
lr=1e-4
backbone_lr=1e-6 # doesn't matter when we don't finetune
lr_scheduler=constant
wd=1e-10
train_iters=300000
use_compile=false # much faster, but sometimes unstable
use_ema=false
lv2_batch_size=1 # you can increase this and divide B equally, speed/accuracy tradeoff
# Model arguments, change (some of) these for new architectures
model_type=denoise3d
bimanual=false
keypose_only=true
pre_tokenize=true
workspace_normalizer_buffer=0.04
backbone=clip
finetune_backbone=false
finetune_text_encoder=false
fps_subsampling_factor=4
C=120
num_attn_heads=8
num_vis_instr_attn_layers=2
num_history=3
num_shared_attn_layers=4
relative_action=false
rotation_format=quat_xyzw
denoise_timesteps=5
denoise_model=rectified_flow
run_log_dir=$model_type-$dataset-C$C-B$B-lr$lr-$lr_scheduler-H$num_history-$denoise_model
checkpoint=train_logs/${main_dir}/${run_log_dir}/last.pth
ngpus=1 # we used 4
torchrun --nproc_per_node $ngpus --master_port $RANDOM \
main.py \
--train_data_dir $train_data_dir \
--eval_data_dir $eval_data_dir \
--train_instructions $train_instructions \
--val_instructions $val_instructions \
--dataset $dataset \
--num_workers $num_workers \
--batch_size $B \
--batch_size_val $B_val \
--chunk_size $chunk_size \
--memory_limit $memory_limit \
--exp_log_dir $main_dir \
--run_log_dir ${run_log_dir} \
--checkpoint $checkpoint \
--val_freq $val_freq \
--eval_only $eval_only \
--lr $lr \
--backbone_lr $backbone_lr \
--lr_scheduler $lr_scheduler \
--wd $wd \
--train_iters $train_iters \
--use_compile $use_compile \
--use_ema $use_ema \
--lv2_batch_size $lv2_batch_size \
--model_type $model_type \
--bimanual $bimanual \
--keypose_only $keypose_only \
--pre_tokenize $pre_tokenize \
--backbone $backbone \
--finetune_backbone $finetune_backbone \
--finetune_text_encoder $finetune_text_encoder \
--fps_subsampling_factor $fps_subsampling_factor \
--embedding_dim $C \
--num_attn_heads $num_attn_heads \
--num_vis_instr_attn_layers $num_vis_instr_attn_layers \
--num_history $num_history \
--num_shared_attn_layers $num_shared_attn_layers \
--workspace_normalizer_buffer $workspace_normalizer_buffer \
--relative_action $relative_action \
--rotation_format $rotation_format \
--denoise_timesteps $denoise_timesteps \
--denoise_model $denoise_model