anthony.gosselin commited on
Commit ·
b9cb209
1
Parent(s): e409eb0
train_script
Browse files- train_scripts.sh +45 -0
train_scripts.sh
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# nvidia-smi | grep 'python' | awk '{ print $5 }' | xargs -n1 kill -9
|
| 2 |
+
|
| 3 |
+
timestamp=$(date +%y%m%d_%H%M%S)
|
| 4 |
+
DATASET="nuscenes" #"kitti/vkitti/bdd100k/nuscenes/..."
|
| 5 |
+
DATASET_PATH="/network/scratch/a/anthony.gosselin"
|
| 6 |
+
NAME="${DATASET}_svd_7hz_3"
|
| 7 |
+
OUT_DIR="/network/scratch/a/anthony.gosselin/Results/ctrlv/${NAME}"
|
| 8 |
+
mkdir -p $OUT_DIR
|
| 9 |
+
|
| 10 |
+
PROJECT_NAME='ctrl-v'
|
| 11 |
+
|
| 12 |
+
SCRIPT_PATH=$0
|
| 13 |
+
SAVE_SCRIPT_PATH="${OUT_DIR}/train_scripts.sh"
|
| 14 |
+
cp $SCRIPT_PATH $SAVE_SCRIPT_PATH
|
| 15 |
+
echo "Saved script to ${SAVE_SCRIPT_PATH}"
|
| 16 |
+
|
| 17 |
+
CUDA_LAUNCH_BLOCKING=1 accelerate launch tools/train_video_diffusion.py \
|
| 18 |
+
--run_name $NAME \
|
| 19 |
+
--data_root $DATASET_PATH \
|
| 20 |
+
--project_name $PROJECT_NAME \
|
| 21 |
+
--pretrained_model_name_or_path stabilityai/stable-video-diffusion-img2vid-xt \
|
| 22 |
+
--output_dir $OUT_DIR \
|
| 23 |
+
--variant fp16 \
|
| 24 |
+
--dataset_name $DATASET \
|
| 25 |
+
--train_batch_size 1 \
|
| 26 |
+
--learning_rate 1e-5 \
|
| 27 |
+
--checkpoints_total_limit 1 \
|
| 28 |
+
--checkpointing_steps 300 \
|
| 29 |
+
--gradient_accumulation_steps 5 \
|
| 30 |
+
--validation_steps 300 \
|
| 31 |
+
--enable_gradient_checkpointing \
|
| 32 |
+
--lr_scheduler constant \
|
| 33 |
+
--report_to wandb \
|
| 34 |
+
--seed 1234 \
|
| 35 |
+
--mixed_precision no \
|
| 36 |
+
--clip_length 25 \
|
| 37 |
+
--min_guidance_scale 1.0 \
|
| 38 |
+
--max_guidance_scale 3.0 \
|
| 39 |
+
--noise_aug_strength 0.01 \
|
| 40 |
+
--bbox_dropout_prob 0.1 \
|
| 41 |
+
--num_demo_samples 15 \
|
| 42 |
+
--backprop_temporal_blocks_start_iter -1 \
|
| 43 |
+
--num_train_epochs 20 \
|
| 44 |
+
--resume_from_checkpoint latest \
|
| 45 |
+
--wandb_entity chris-pal
|