anthony.gosselin commited on
Commit ·
2006222
1
Parent(s): 5dabcda
Init
Browse files- checkpoint-33400/optimizer.bin +3 -0
- checkpoint-33400/random_states_0.pkl +3 -0
- checkpoint-33400/scaler.pt +3 -0
- checkpoint-33400/scheduler.bin +3 -0
- checkpoint-33400/unet/config.json +38 -0
- checkpoint-33400/unet/diffusion_pytorch_model.bin +3 -0
- train_scripts.sh +51 -0
- unet/config.json +38 -0
- unet/diffusion_pytorch_model.safetensors +3 -0
checkpoint-33400/optimizer.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cea5b252d6739c4a0ebc2110ad460797b3f0020e48a4d8585fadeb568266f10f
|
| 3 |
+
size 12198228894
|
checkpoint-33400/random_states_0.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:033caefb0ceb526b1e246a589a060a8550d522d3910eca5a7de325ca2dc7f2ef
|
| 3 |
+
size 14344
|
checkpoint-33400/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c9e92d7ca7e11f8c8280a110c0e95f7fc8368104e07cf4940880e3ea39426da
|
| 3 |
+
size 988
|
checkpoint-33400/scheduler.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2b3dfff9c1ad4d9153cf1a31ee06b9a882cf0259a3aa7a98c6220f5eeb88c15
|
| 3 |
+
size 1000
|
checkpoint-33400/unet/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNetSpatioTemporalConditionModel",
|
| 3 |
+
"_diffusers_version": "0.27.2",
|
| 4 |
+
"_name_or_path": "/network/scratch/a/anthony.gosselin/Results/ctrlv/nuscenes_box_predict_2/checkpoint-22400",
|
| 5 |
+
"addition_time_embed_dim": 256,
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
320,
|
| 8 |
+
640,
|
| 9 |
+
1280,
|
| 10 |
+
1280
|
| 11 |
+
],
|
| 12 |
+
"cross_attention_dim": 1024,
|
| 13 |
+
"down_block_types": [
|
| 14 |
+
"CrossAttnDownBlockSpatioTemporal",
|
| 15 |
+
"CrossAttnDownBlockSpatioTemporal",
|
| 16 |
+
"CrossAttnDownBlockSpatioTemporal",
|
| 17 |
+
"DownBlockSpatioTemporal"
|
| 18 |
+
],
|
| 19 |
+
"in_channels": 8,
|
| 20 |
+
"layers_per_block": 2,
|
| 21 |
+
"num_attention_heads": [
|
| 22 |
+
5,
|
| 23 |
+
10,
|
| 24 |
+
20,
|
| 25 |
+
20
|
| 26 |
+
],
|
| 27 |
+
"num_frames": 25,
|
| 28 |
+
"out_channels": 4,
|
| 29 |
+
"projection_class_embeddings_input_dim": 768,
|
| 30 |
+
"sample_size": 96,
|
| 31 |
+
"transformer_layers_per_block": 1,
|
| 32 |
+
"up_block_types": [
|
| 33 |
+
"UpBlockSpatioTemporal",
|
| 34 |
+
"CrossAttnUpBlockSpatioTemporal",
|
| 35 |
+
"CrossAttnUpBlockSpatioTemporal",
|
| 36 |
+
"CrossAttnUpBlockSpatioTemporal"
|
| 37 |
+
]
|
| 38 |
+
}
|
checkpoint-33400/unet/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5ba9a0bba0f415a5d0c135e3e48922ce1b89a605eb4edcde9903bdd942ac5d0
|
| 3 |
+
size 6099139590
|
train_scripts.sh
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# nvidia-smi | grep 'python' | awk '{ print $5 }' | xargs -n1 kill -9
|
| 2 |
+
|
| 3 |
+
timestamp=$(date +%y%m%d_%H%M%S)
|
| 4 |
+
DATASET="nuscenes" #"kitti/vkitti/bdd100k/..."
|
| 5 |
+
DATASET_PATH="/network/scratch/a/anthony.gosselin"
|
| 6 |
+
NAME="${DATASET}_box_predict_2" #"${DATASET}_box_predict_${timestamp}"
|
| 7 |
+
OUT_DIR="/network/scratch/a/anthony.gosselin/Results/ctrlv/${NAME}"
|
| 8 |
+
mkdir -p $OUT_DIR
|
| 9 |
+
|
| 10 |
+
PROJECT_NAME='ctrl_v'
|
| 11 |
+
|
| 12 |
+
SCRIPT_PATH=$0
|
| 13 |
+
SAVE_SCRIPT_PATH="${OUT_DIR}/train_scripts.sh"
|
| 14 |
+
cp $SCRIPT_PATH $SAVE_SCRIPT_PATH
|
| 15 |
+
echo "Saved script to ${SAVE_SCRIPT_PATH}"
|
| 16 |
+
|
| 17 |
+
CUDA_LAUNCH_BLOCKING=1 accelerate launch tools/train_video_diffusion.py \
|
| 18 |
+
--run_name $NAME \
|
| 19 |
+
--data_root $DATASET_PATH \
|
| 20 |
+
--project_name $PROJECT_NAME \
|
| 21 |
+
--pretrained_model_name_or_path stabilityai/stable-video-diffusion-img2vid-xt \
|
| 22 |
+
--output_dir $OUT_DIR \
|
| 23 |
+
--variant fp16 \
|
| 24 |
+
--dataset_name $DATASET \
|
| 25 |
+
--train_batch_size 1 \
|
| 26 |
+
--learning_rate 5e-6 \
|
| 27 |
+
--checkpoints_total_limit 2 \
|
| 28 |
+
--checkpointing_steps 200 \
|
| 29 |
+
--gradient_accumulation_steps 5 \
|
| 30 |
+
--validation_steps 100 \
|
| 31 |
+
--enable_gradient_checkpointing \
|
| 32 |
+
--lr_scheduler constant \
|
| 33 |
+
--report_to wandb \
|
| 34 |
+
--seed 1234 \
|
| 35 |
+
--mixed_precision fp16 \
|
| 36 |
+
--clip_length 25 \
|
| 37 |
+
--min_guidance_scale 3 \
|
| 38 |
+
--max_guidance_scale 7 \
|
| 39 |
+
--noise_aug_strength 0.01 \
|
| 40 |
+
--bbox_dropout_prob 0.1 \
|
| 41 |
+
--conditioning_dropout_prob 0.0 \
|
| 42 |
+
--num_demo_samples 10 \
|
| 43 |
+
--backprop_temporal_blocks_start_iter -1 \
|
| 44 |
+
--num_train_epochs 2 \
|
| 45 |
+
--predict_bbox \
|
| 46 |
+
--num_inference_steps 30 \
|
| 47 |
+
--resume_from_checkpoint latest \
|
| 48 |
+
--num_cond_bbox_frames 3 \
|
| 49 |
+
--wandb_entity chris-pal \
|
| 50 |
+
--fps 7
|
| 51 |
+
# --if_last_frame_trajectory
|
unet/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNetSpatioTemporalConditionModel",
|
| 3 |
+
"_diffusers_version": "0.27.2",
|
| 4 |
+
"_name_or_path": "/network/scratch/a/anthony.gosselin/Results/ctrlv/nuscenes_box_predict_2/checkpoint-21000",
|
| 5 |
+
"addition_time_embed_dim": 256,
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
320,
|
| 8 |
+
640,
|
| 9 |
+
1280,
|
| 10 |
+
1280
|
| 11 |
+
],
|
| 12 |
+
"cross_attention_dim": 1024,
|
| 13 |
+
"down_block_types": [
|
| 14 |
+
"CrossAttnDownBlockSpatioTemporal",
|
| 15 |
+
"CrossAttnDownBlockSpatioTemporal",
|
| 16 |
+
"CrossAttnDownBlockSpatioTemporal",
|
| 17 |
+
"DownBlockSpatioTemporal"
|
| 18 |
+
],
|
| 19 |
+
"in_channels": 8,
|
| 20 |
+
"layers_per_block": 2,
|
| 21 |
+
"num_attention_heads": [
|
| 22 |
+
5,
|
| 23 |
+
10,
|
| 24 |
+
20,
|
| 25 |
+
20
|
| 26 |
+
],
|
| 27 |
+
"num_frames": 25,
|
| 28 |
+
"out_channels": 4,
|
| 29 |
+
"projection_class_embeddings_input_dim": 768,
|
| 30 |
+
"sample_size": 96,
|
| 31 |
+
"transformer_layers_per_block": 1,
|
| 32 |
+
"up_block_types": [
|
| 33 |
+
"UpBlockSpatioTemporal",
|
| 34 |
+
"CrossAttnUpBlockSpatioTemporal",
|
| 35 |
+
"CrossAttnUpBlockSpatioTemporal",
|
| 36 |
+
"CrossAttnUpBlockSpatioTemporal"
|
| 37 |
+
]
|
| 38 |
+
}
|
unet/diffusion_pytorch_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a013992e7f9ac1259b96f764dc753d18f51624a5b193b9895d5916d324d3ce68
|
| 3 |
+
size 6098682464
|