yitongl's picture
Add inference code and attention settings for sfp4 checkpoint-750
697fddf verified
#!/bin/bash
#SBATCH --job-name=sfp4-s0
#SBATCH --account=nvr_elm_llm
#SBATCH --partition=interactive
#SBATCH --nodes=1
#SBATCH --gres=gpu:1
#SBATCH --cpus-per-task=16
#SBATCH --mem=64G
#SBATCH --time=00:30:00
#SBATCH --output=slurm_logs/sfp4_s0_%j.out
#SBATCH --error=slurm_logs/sfp4_s0_%j.err
set -ex
cd /lustre/fsw/portfolios/nvr/projects/nvr_elm_llm/users/yitongl/code/FastVideo
source .venv/bin/activate
export PYTHONPATH=fastvideo-kernel/python:fastvideo-kernel:$PYTHONPATH
export FASTVIDEO_ATTENTION_BACKEND=SPARSE_FP4_ATTN
mkdir -p outputs_sfp4_s0
# Same prompt, seed, params as dense FP4 run
fastvideo generate \
--model-path Wan-AI/Wan2.1-T2V-1.3B-Diffusers \
--sp-size 1 --tp-size 1 --num-gpus 1 \
--dit-cpu-offload False --vae-cpu-offload False \
--text-encoder-cpu-offload True --pin-cpu-memory False \
--height 480 --width 832 --num-frames 81 \
--num-inference-steps 50 --fps 16 \
--guidance-scale 6.0 --flow-shift 8.0 \
--prompt "Will Smith casually eats noodles, his relaxed demeanor contrasting with the energetic background of a bustling street food market. The scene captures a mix of humor and authenticity. Mid-shot framing, vibrant lighting." \
--seed 1024 \
--VSA-sparsity 0.0 \
--output-path outputs_sfp4_s0/
echo "=== Done ==="
ls -lh outputs_sfp4_s0/*.mp4
echo "--- Dense FP4 reference ---"
ls -lh outputs_dense_fp4/*.mp4