Init

Files changed (9) hide show

checkpoint-33400/optimizer.bin +3 -0
checkpoint-33400/random_states_0.pkl +3 -0
checkpoint-33400/scaler.pt +3 -0
checkpoint-33400/scheduler.bin +3 -0
checkpoint-33400/unet/config.json +38 -0
checkpoint-33400/unet/diffusion_pytorch_model.bin +3 -0
train_scripts.sh +51 -0
unet/config.json +38 -0
unet/diffusion_pytorch_model.safetensors +3 -0

checkpoint-33400/optimizer.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cea5b252d6739c4a0ebc2110ad460797b3f0020e48a4d8585fadeb568266f10f
+size 12198228894

checkpoint-33400/random_states_0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:033caefb0ceb526b1e246a589a060a8550d522d3910eca5a7de325ca2dc7f2ef
+size 14344

checkpoint-33400/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9e92d7ca7e11f8c8280a110c0e95f7fc8368104e07cf4940880e3ea39426da
+size 988

checkpoint-33400/scheduler.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2b3dfff9c1ad4d9153cf1a31ee06b9a882cf0259a3aa7a98c6220f5eeb88c15
+size 1000

checkpoint-33400/unet/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_class_name": "UNetSpatioTemporalConditionModel",
+  "_diffusers_version": "0.27.2",
+  "_name_or_path": "/network/scratch/a/anthony.gosselin/Results/ctrlv/nuscenes_box_predict_2/checkpoint-22400",
+  "addition_time_embed_dim": 256,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "cross_attention_dim": 1024,
+  "down_block_types": [
+    "CrossAttnDownBlockSpatioTemporal",
+    "CrossAttnDownBlockSpatioTemporal",
+    "CrossAttnDownBlockSpatioTemporal",
+    "DownBlockSpatioTemporal"
+  ],
+  "in_channels": 8,
+  "layers_per_block": 2,
+  "num_attention_heads": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "num_frames": 25,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 768,
+  "sample_size": 96,
+  "transformer_layers_per_block": 1,
+  "up_block_types": [
+    "UpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal"
+  ]
+}

checkpoint-33400/unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ba9a0bba0f415a5d0c135e3e48922ce1b89a605eb4edcde9903bdd942ac5d0
+size 6099139590

train_scripts.sh ADDED Viewed

	@@ -0,0 +1,51 @@

+# nvidia-smi | grep 'python' | awk '{ print $5 }' | xargs -n1 kill -9
+timestamp=$(date +%y%m%d_%H%M%S)
+DATASET="nuscenes" #"kitti/vkitti/bdd100k/..."
+DATASET_PATH="/network/scratch/a/anthony.gosselin"
+NAME="${DATASET}_box_predict_2"  #"${DATASET}_box_predict_${timestamp}"
+OUT_DIR="/network/scratch/a/anthony.gosselin/Results/ctrlv/${NAME}"
+mkdir -p $OUT_DIR
+PROJECT_NAME='ctrl_v'
+SCRIPT_PATH=$0
+SAVE_SCRIPT_PATH="${OUT_DIR}/train_scripts.sh"
+cp $SCRIPT_PATH $SAVE_SCRIPT_PATH
+echo "Saved script to ${SAVE_SCRIPT_PATH}"
+CUDA_LAUNCH_BLOCKING=1 accelerate launch tools/train_video_diffusion.py \
+    --run_name $NAME \
+    --data_root $DATASET_PATH \
+    --project_name $PROJECT_NAME \
+    --pretrained_model_name_or_path stabilityai/stable-video-diffusion-img2vid-xt \
+    --output_dir $OUT_DIR \
+    --variant fp16 \
+    --dataset_name $DATASET \
+    --train_batch_size 1 \
+    --learning_rate 5e-6 \
+    --checkpoints_total_limit 2 \
+    --checkpointing_steps 200 \
+    --gradient_accumulation_steps 5 \
+    --validation_steps 100 \
+    --enable_gradient_checkpointing \
+    --lr_scheduler constant \
+    --report_to wandb \
+    --seed 1234 \
+    --mixed_precision fp16 \
+    --clip_length 25 \
+    --min_guidance_scale 3 \
+    --max_guidance_scale 7 \
+    --noise_aug_strength 0.01 \
+    --bbox_dropout_prob 0.1 \
+    --conditioning_dropout_prob 0.0 \
+    --num_demo_samples 10 \
+    --backprop_temporal_blocks_start_iter -1 \
+    --num_train_epochs 2 \
+    --predict_bbox \
+    --num_inference_steps 30 \
+    --resume_from_checkpoint latest \
+    --num_cond_bbox_frames 3 \
+    --wandb_entity chris-pal \
+    --fps 7
+    # --if_last_frame_trajectory

unet/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_class_name": "UNetSpatioTemporalConditionModel",
+  "_diffusers_version": "0.27.2",
+  "_name_or_path": "/network/scratch/a/anthony.gosselin/Results/ctrlv/nuscenes_box_predict_2/checkpoint-21000",
+  "addition_time_embed_dim": 256,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "cross_attention_dim": 1024,
+  "down_block_types": [
+    "CrossAttnDownBlockSpatioTemporal",
+    "CrossAttnDownBlockSpatioTemporal",
+    "CrossAttnDownBlockSpatioTemporal",
+    "DownBlockSpatioTemporal"
+  ],
+  "in_channels": 8,
+  "layers_per_block": 2,
+  "num_attention_heads": [
+    5,
+    10,
+    20,
+    20
+  ],
+  "num_frames": 25,
+  "out_channels": 4,
+  "projection_class_embeddings_input_dim": 768,
+  "sample_size": 96,
+  "transformer_layers_per_block": 1,
+  "up_block_types": [
+    "UpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal"
+  ]
+}

unet/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a013992e7f9ac1259b96f764dc753d18f51624a5b193b9895d5916d324d3ce68
+size 6098682464