egrpo / scripts /preprocess /preprocess_flux_rlpt_embeddings.sh
studyOverflow's picture
Add files using upload-large-folder tool
45d12c1 verified
GPU_NUM=8 # 2,4,8
MODEL_PATH="./ckpt/flux"
OUTPUT_DIR="data/laion_rl_embeddings" # Updated for LAION dataset
PROMPT_DIR="/data2/dataset/laion-220k/short_captions.txt" # Path to LAION captions
echo "[INFO] Processing LAION-220k dataset captions..."
echo "[INFO] Model path: $MODEL_PATH"
echo "[INFO] Output directory: $OUTPUT_DIR"
echo "[INFO] Prompt file: $PROMPT_DIR"
torchrun --nproc_per_node=$GPU_NUM --master_port 19002 \
fastvideo/data_preprocess/preprocess_flux_embedding_rlpt.py \
--model_path $MODEL_PATH \
--output_dir $OUTPUT_DIR \
--prompt_dir $PROMPT_DIR