export MODEL_NAME="models/Diffusion_Transformer/Wan2.1-I2V-14B-720P" export DATASET_NAME="datasets/internal_datasets/" export DATASET_META_NAME="datasets/internal_datasets/metadata_control.json" # NCCL_IB_DISABLE=1 and NCCL_P2P_DISABLE=1 are used in multi nodes without RDMA. # export NCCL_IB_DISABLE=1 # export NCCL_P2P_DISABLE=1 NCCL_DEBUG=INFO accelerate launch --mixed_precision="bf16" scripts/fantasytalking/train.py \ --config_path="config/wan2.1/wan_civitai.yaml" \ --pretrained_model_name_or_path=$MODEL_NAME \ --train_data_dir=$DATASET_NAME \ --train_data_meta=$DATASET_META_NAME \ --video_sample_size=512 \ --token_sample_size=512 \ --video_sample_stride=1 \ --video_sample_n_frames=81 \ --train_batch_size=1 \ --gradient_accumulation_steps=1 \ --dataloader_num_workers=8 \ --num_train_epochs=100 \ --checkpointing_steps=50 \ --learning_rate=2e-05 \ --lr_scheduler="constant_with_warmup" \ --lr_warmup_steps=100 \ --seed=42 \ --output_dir="output_dir" \ --gradient_checkpointing \ --mixed_precision="bf16" \ --adam_weight_decay=3e-2 \ --adam_epsilon=1e-10 \ --vae_mini_batch=1 \ --max_grad_norm=0.05 \ --random_hw_adapt \ --training_with_video_token_length \ --enable_bucket \ --uniform_sampling \ --low_vram \ --transformer_path="models/FantasyTalking/fantasytalking_model.ckpt" \ --trainable_modules "processor." "proj_model."