anthony.gosselin commited on
Commit
b9cb209
·
1 Parent(s): e409eb0

train_script

Browse files
Files changed (1) hide show
  1. train_scripts.sh +45 -0
train_scripts.sh ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # nvidia-smi | grep 'python' | awk '{ print $5 }' | xargs -n1 kill -9
2
+
3
+ timestamp=$(date +%y%m%d_%H%M%S)
4
+ DATASET="nuscenes" #"kitti/vkitti/bdd100k/nuscenes/..."
5
+ DATASET_PATH="/network/scratch/a/anthony.gosselin"
6
+ NAME="${DATASET}_svd_7hz_3"
7
+ OUT_DIR="/network/scratch/a/anthony.gosselin/Results/ctrlv/${NAME}"
8
+ mkdir -p $OUT_DIR
9
+
10
+ PROJECT_NAME='ctrl-v'
11
+
12
+ SCRIPT_PATH=$0
13
+ SAVE_SCRIPT_PATH="${OUT_DIR}/train_scripts.sh"
14
+ cp $SCRIPT_PATH $SAVE_SCRIPT_PATH
15
+ echo "Saved script to ${SAVE_SCRIPT_PATH}"
16
+
17
+ CUDA_LAUNCH_BLOCKING=1 accelerate launch tools/train_video_diffusion.py \
18
+ --run_name $NAME \
19
+ --data_root $DATASET_PATH \
20
+ --project_name $PROJECT_NAME \
21
+ --pretrained_model_name_or_path stabilityai/stable-video-diffusion-img2vid-xt \
22
+ --output_dir $OUT_DIR \
23
+ --variant fp16 \
24
+ --dataset_name $DATASET \
25
+ --train_batch_size 1 \
26
+ --learning_rate 1e-5 \
27
+ --checkpoints_total_limit 1 \
28
+ --checkpointing_steps 300 \
29
+ --gradient_accumulation_steps 5 \
30
+ --validation_steps 300 \
31
+ --enable_gradient_checkpointing \
32
+ --lr_scheduler constant \
33
+ --report_to wandb \
34
+ --seed 1234 \
35
+ --mixed_precision no \
36
+ --clip_length 25 \
37
+ --min_guidance_scale 1.0 \
38
+ --max_guidance_scale 3.0 \
39
+ --noise_aug_strength 0.01 \
40
+ --bbox_dropout_prob 0.1 \
41
+ --num_demo_samples 15 \
42
+ --backprop_temporal_blocks_start_iter -1 \
43
+ --num_train_epochs 20 \
44
+ --resume_from_checkpoint latest \
45
+ --wandb_entity chris-pal