| accelerate launch --multi_gpu --gpu_ids '0,1,2,3,4,5,6,7' --main_process_port 25011 --num_processes 8 \ | |
| train/extract_features.py \ | |
| --csv_path /mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv \ | |
| --output_dir /mnt/VideoGen/dataset/OpenVid1M/extracted_features_17_128_128 \ | |
| --text_encoder_architecture umt5-xxl \ | |
| --video_tokenizer_model_id Cosmos-0.1-Tokenizer-DV4x8x8 \ | |
| --num_frames 17 \ | |
| --video_height 128 \ | |
| --video_width 128 \ | |
| --batch_size 64 \ | |
| --num_workers 8 \ | |
| --extract_text | |
| # --extract_video | |
| # python train/extract_empty_embeds.py \ | |
| # --text_encoder_architecture umt5-base \ | |
| # --output_path /path/to/empty_embeds.pt \ | |
| # --dtype float16 | |
| # python train/train_mei_video.py \ | |
| # --use_precomputed_features \ | |
| # --features_dir /path/to/extracted_features \ | |
| # --text_encoder_architecture umt5-base \ | |
| # --video_tokenizer_model_id Cosmos-1.0-Tokenizer-DV8x16x16 \ | |
| # --num_frames 16 \ | |
| # --video_height 480 \ | |
| # --video_width 848 \ | |
| # --train_batch_size 8 \ | |
| # --learning_rate 3e-4 \ | |
| # --max_train_steps 10000 \ | |
| # --output_dir ./output \ | |
| # --mixed_precision bf16 | |
| # python train/check_codebook_range.py \ | |
| # --csv_path /mnt/VideoGen/dataset/OpenVid1M/video_reorg/OpenVid1M_reorganized.csv \ | |
| # --video_tokenizer_model_id Cosmos-0.1-Tokenizer-DV4x8x8 \ | |
| # --num_frames 16 \ | |
| # --video_height 480 \ | |
| # --video_width 848 \ | |
| # --check_interval 10 \ | |
| # --max_samples 1000 # 可选:限制检查的样本数 |