43 / Meissonic /train /extract.sh
BryanW's picture
Upload folder using huggingface_hub
3d1c0e1 verified
accelerate launch --multi_gpu --gpu_ids '0,1,2,3,4,5,6,7' --main_process_port 25011 --num_processes 8 \
train/extract_features.py \
--csv_path /mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv \
--output_dir /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 \
--text_encoder_architecture umt5-xxl \
--video_tokenizer_model_id Cosmos-0.1-Tokenizer-DV4x8x8 \
--num_frames 17 \
--video_height 128 \
--video_width 128 \
--batch_size 64 \
--num_workers 8 \
--extract_text
# --extract_video
# python train/extract_empty_embeds.py \
# --text_encoder_architecture umt5-base \
# --output_path /path/to/empty_embeds.pt \
# --dtype float16
# python train/train_mei_video.py \
# --use_precomputed_features \
# --features_dir /path/to/extracted_features \
# --text_encoder_architecture umt5-base \
# --video_tokenizer_model_id Cosmos-1.0-Tokenizer-DV8x16x16 \
# --num_frames 16 \
# --video_height 480 \
# --video_width 848 \
# --train_batch_size 8 \
# --learning_rate 3e-4 \
# --max_train_steps 10000 \
# --output_dir ./output \
# --mixed_precision bf16
# python train/check_codebook_range.py \
# --csv_path /mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv \
# --video_tokenizer_model_id Cosmos-0.1-Tokenizer-DV4x8x8 \
# --num_frames 16 \
# --video_height 480 \
# --video_width 848 \
# --check_interval 10 \
# --max_samples 1000 # 可选:限制检查的样本数