| | set -x |
| |
|
| | lpips_lambda=0.8 |
| |
|
| | image_size=128 |
| | |
| | image_size_encoder=224 |
| |
|
| | patch_size=14 |
| |
|
| | batch_size=4 |
| | |
| | |
| | |
| | num_samples=50 |
| | |
| | |
| |
|
| | dataset_name=chair |
| |
|
| | eval_data_dir=/mnt/lustre/yslan/3D_Dataset/get3d/${dataset_name}_test |
| | DATASET_FLAGS=" |
| | --data_dir /mnt/cache/yslan/get3d/lmdb_debug/${dataset_name} \ |
| | --eval_data_dir $eval_data_dir \ |
| | " |
| |
|
| | lr=2e-5 |
| | kl_lambda=0 |
| | vit_lr=1e-5 |
| |
|
| | encoder_lr=$vit_lr |
| | vit_decoder_lr=$vit_lr |
| | conv_lr=0.0005 |
| | triplane_decoder_lr=$conv_lr |
| | super_resolution_lr=$conv_lr |
| |
|
| | scale_clip_encoding=18.4 |
| | triplane_scaling_divider=1 |
| | unconditional_guidance_scale=1.0 |
| |
|
| | CKPT_FLAGS=" |
| | --resume_checkpoint checkpoints/shapenet/chair/model_joint_denoise_rec_model2030000.pt \ |
| | " |
| |
|
| |
|
| | |
| |
|
| | LR_FLAGS="--encoder_lr $encoder_lr \ |
| | --vit_decoder_lr $vit_decoder_lr \ |
| | --triplane_decoder_lr $triplane_decoder_lr \ |
| | --super_resolution_lr $super_resolution_lr \ |
| | --lr $lr" |
| |
|
| |
|
| | TRAIN_FLAGS="--iterations 10001 --anneal_lr False \ |
| | --batch_size $batch_size --save_interval 10000 \ |
| | --image_size_encoder $image_size_encoder \ |
| | --image_size $image_size \ |
| | --dino_version v2 \ |
| | --sr_training False \ |
| | --cls_token False \ |
| | --weight_decay 0.05 \ |
| | --image_size $image_size \ |
| | --kl_lambda ${kl_lambda} \ |
| | --no_dim_up_mlp True \ |
| | --uvit_skip_encoder True \ |
| | --fg_mse True \ |
| | --vae_p 2 \ |
| | --bg_lamdba 0.01 \ |
| | " |
| |
|
| |
|
| | DDPM_MODEL_FLAGS=" |
| | --learn_sigma False \ |
| | --num_heads 8 \ |
| | --num_res_blocks 2 \ |
| | --num_channels 320 \ |
| | --attention_resolutions "4,2,1" \ |
| | --use_spatial_transformer True \ |
| | --transformer_depth 1 \ |
| | --context_dim 768 \ |
| | " |
| |
|
| |
|
| | DIFFUSION_FLAGS="--diffusion_steps 1000 --noise_schedule linear \ |
| | --use_kl False \ |
| | --use_amp False \ |
| | --triplane_scaling_divider ${triplane_scaling_divider} \ |
| | --trainer_name vpsde_crossattn \ |
| | --mixed_prediction True \ |
| | --denoise_in_channels 12 \ |
| | --denoise_out_channels 12 \ |
| | --diffusion_input_size 32 \ |
| | --p_rendering_loss False \ |
| | --pred_type v \ |
| | --predict_v True \ |
| | " |
| |
|
| | DDIM_FLAGS=" |
| | --timestep_respacing ddim250 \ |
| | --use_ddim True \ |
| | --unconditional_guidance_scale ${unconditional_guidance_scale} \ |
| | " |
| |
|
| | |
| | CONTROL_FLAGS=" |
| | --train_vae False \ |
| | --create_controlnet False \ |
| | --control_key img_sr \ |
| | " |
| |
|
| |
|
| | prompt="a gaming chair" |
| |
|
| | logdir="./logs/LSGM/inference/t23d/${dataset_name}/cfg=${unconditional_guidance_scale}/gaming_chair/" |
| |
|
| | SR_TRAIN_FLAGS_v1_2XC=" |
| | --decoder_in_chans 32 \ |
| | --out_chans 96 \ |
| | --alpha_lambda 1 \ |
| | --logdir $logdir \ |
| | --arch_encoder vits \ |
| | --arch_decoder vitb \ |
| | --vit_decoder_wd 0.001 \ |
| | --encoder_weight_decay 0.001 \ |
| | --color_criterion mse \ |
| | --decoder_output_dim 32 \ |
| | --ae_classname vit.vit_triplane.RodinSR_256_fusionv5_ConvQuant_liteSR_dinoInit3DAttn \ |
| | " |
| |
|
| | SR_TRAIN_FLAGS=${SR_TRAIN_FLAGS_v1_2XC} |
| |
|
| | NUM_GPUS=1 |
| |
|
| | rm -rf "$logdir"/runs |
| | mkdir -p "$logdir"/ |
| | cp "$0" "$logdir"/ |
| |
|
| | export OMP_NUM_THREADS=12 |
| | export NCCL_ASYNC_ERROR_HANDLING=1 |
| | export OMP_NUM_THREADS=12 |
| | export CUDA_VISIBLE_DEVICES=1 |
| |
|
| | torchrun --nproc_per_node=$NUM_GPUS \ |
| | --master_port=0 \ |
| | --rdzv_backend=c10d \ |
| | --rdzv-endpoint=localhost:23325 \ |
| | --nnodes 1 \ |
| | scripts/vit_triplane_diffusion_sample.py \ |
| | --num_workers 4 \ |
| | --depth_lambda 0 \ |
| | ${TRAIN_FLAGS} \ |
| | ${SR_TRAIN_FLAGS} \ |
| | ${DIFFUSION_FLAGS} \ |
| | ${CONTROL_FLAGS} \ |
| | ${DDPM_MODEL_FLAGS} \ |
| | ${DATASET_FLAGS} \ |
| | ${CKPT_FLAGS} \ |
| | ${LR_FLAGS} \ |
| | ${DDIM_FLAGS} \ |
| | --lpips_lambda $lpips_lambda \ |
| | --overfitting False \ |
| | --load_pretrain_encoder True \ |
| | --iterations 5000001 \ |
| | --save_interval 10000 \ |
| | --eval_interval 2500 \ |
| | --decomposed True \ |
| | --logdir $logdir \ |
| | --cfg shapenet_tuneray_aug_resolution_64_64_nearestSR \ |
| | --ray_start 0.6 \ |
| | --ray_end 1.8 \ |
| | --patch_size ${patch_size} \ |
| | --eval_batch_size 50 \ |
| | --prompt "$prompt" \ |
| | --interval 5 \ |
| | --save_img True \ |
| | --num_samples ${num_samples} \ |
| | --export_mesh True \ |
| | --normalize_clip_encoding True \ |
| | --scale_clip_encoding ${scale_clip_encoding} \ |
| |
|