LightDiffusion-Next / Dockerfile
Aatricks's picture
Deploy ZeroGPU Gradio Space snapshot
b701455
FROM node:22-bookworm-slim AS frontend-builder
WORKDIR /frontend
COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci
COPY frontend/ ./
RUN npm run build
FROM nvidia/cuda:12.8.0-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;12.0"
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update && apt-get install -y \
python3.10 \
python3.10-dev \
python3.10-venv \
python3-pip \
python3-tk \
git \
wget \
curl \
build-essential \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgomp1 \
software-properties-common \
ninja-build \
&& rm -rf /var/lib/apt/lists/*
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
WORKDIR /app
COPY requirements.txt ./
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade pip
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install uv
RUN --mount=type=cache,target=/root/.cache/uv /bin/sh -c 'set -e; \
python3 -m uv pip install --system --index-url https://download.pytorch.org/whl/cu128 \
torch torchvision "triton>=2.1.0"; \
if echo "${TORCH_CUDA_ARCH_LIST}" | grep -q "12\.0"; then \
echo "Detected compute capability 12.0 (RTX 50 series). Skipping xformers install."; \
else \
python3 -m uv pip install --system xformers; \
fi'
RUN --mount=type=cache,target=/root/.cache/uv python3 -m uv pip install --system "numpy<2.0.0"
RUN --mount=type=cache,target=/root/.cache/uv python3 -m uv pip install --system -r requirements.txt
ARG TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;12.0"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
ARG INSTALL_STABLE_FAST=0
ENV INSTALL_STABLE_FAST=${INSTALL_STABLE_FAST}
ARG INSTALL_OLLAMA=0
ENV INSTALL_OLLAMA=${INSTALL_OLLAMA}
ARG INSTALL_SAGEATTENTION=0
ENV INSTALL_SAGEATTENTION=${INSTALL_SAGEATTENTION}
ARG INSTALL_SPARGEATTN=0
ENV INSTALL_SPARGEATTN=${INSTALL_SPARGEATTN}
RUN --mount=type=cache,target=/root/.cache/pip \
--mount=type=cache,target=/build-cache/stablefast,sharing=locked /bin/sh -c ' \
if [ "${INSTALL_STABLE_FAST}" = "1" ]; then \
echo "Installing stable-fast for CUDA architectures: ${TORCH_CUDA_ARCH_LIST}"; \
export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"; \
export FORCE_CUDA=1; \
mkdir -p /build-cache/stablefast; \
python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/stablefast \
git+https://github.com/chengzeyi/stable-fast.git@main#egg=stable-fast; \
python3 -m pip install --no-build-isolation --no-index --find-links /build-cache/stablefast stable-fast; \
else \
echo "Skipping stable-fast installation (INSTALL_STABLE_FAST=${INSTALL_STABLE_FAST})"; \
fi'
RUN --mount=type=cache,target=/build-cache/ollama,sharing=locked /bin/sh -c ' \
if [ "${INSTALL_OLLAMA}" = "1" ]; then \
echo "Installing Ollama and pulling qwen3:0.6b"; \
mkdir -p /build-cache/ollama; \
curl -fsSL https://ollama.com/install.sh -o /build-cache/ollama/install.sh; \
sh /build-cache/ollama/install.sh; \
export OLLAMA_HOME=/build-cache/ollama; \
ollama serve >/tmp/ollama.log 2>&1 & \
OLLAMA_PID=$!; \
attempts=0; \
until curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; do \
attempts=$((attempts + 1)); \
if [ ${attempts} -gt 20 ]; then \
echo "Ollama failed to start"; \
kill ${OLLAMA_PID} >/dev/null 2>&1 || true; \
exit 1; \
fi; \
sleep 1; \
done; \
ollama pull qwen3:0.6b; \
kill ${OLLAMA_PID} >/dev/null 2>&1 || true; \
wait ${OLLAMA_PID} 2>/dev/null || true; \
else \
echo "Skipping Ollama installation (INSTALL_OLLAMA=${INSTALL_OLLAMA})"; \
fi'
COPY . .
COPY --from=frontend-builder /frontend/dist ./frontend/dist
RUN --mount=type=cache,target=/root/.cache/torch_extensions,sharing=locked \
--mount=type=cache,target=/build-cache/sageattention,sharing=locked /bin/sh -c ' \
if [ "${INSTALL_SAGEATTENTION}" = "1" ]; then \
if [ -d "SageAttention" ]; then \
echo "Found SageAttention - applying patch"; \
cd SageAttention; \
python3 ../docker/patch_sageattention.py; \
python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/sageattention .; \
python3 -m pip install --no-index /build-cache/sageattention/*.whl; \
cd ..; \
rm -rf SageAttention/build SageAttention/*.egg-info; \
else \
echo "SageAttention directory not found - cloning and applying patch"; \
git clone --depth 1 https://github.com/thu-ml/SageAttention /tmp/SageAttention; \
cd /tmp/SageAttention; \
python3 /app/docker/patch_sageattention.py; \
python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/sageattention .; \
python3 -m pip install --no-index /build-cache/sageattention/*.whl; \
rm -rf /tmp/SageAttention/build /tmp/SageAttention/*.egg-info; \
rm -rf /tmp/SageAttention; \
fi; \
else \
echo "Skipping SageAttention installation (INSTALL_SAGEATTENTION=${INSTALL_SAGEATTENTION})"; \
fi'
RUN --mount=type=cache,target=/root/.cache/torch_extensions,sharing=locked \
--mount=type=cache,target=/build-cache/spargeattn,sharing=locked /bin/sh -c ' \
if [ "${INSTALL_SPARGEATTN}" = "1" ]; then \
if [ -d "SpargeAttn" ]; then \
cd SpargeAttn; \
if echo "${TORCH_CUDA_ARCH_LIST}" | grep -qE "(8\.0|8\.6|8\.7|8\.9|9\.0)"; then \
echo "Building SpargeAttn for supported architectures: ${TORCH_CUDA_ARCH_LIST}"; \
python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/spargeattn .; \
python3 -m pip install --no-index /build-cache/spargeattn/*.whl; \
rm -rf build *.egg-info; \
else \
echo "Skipping SpargeAttn - architecture ${TORCH_CUDA_ARCH_LIST} not supported (requires 8.0-9.0)"; \
fi; \
cd ..; \
else \
echo "SpargeAttn directory not found - cloning and attempting build if supported"; \
git clone --depth 1 https://github.com/thu-ml/SpargeAttn /tmp/SpargeAttn; \
cd /tmp/SpargeAttn; \
if echo "${TORCH_CUDA_ARCH_LIST}" | grep -qE "(8\.0|8\.6|8\.7|8\.9|9\.0)"; then \
echo "Building cloned SpargeAttn for supported architectures: ${TORCH_CUDA_ARCH_LIST}"; \
python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/spargeattn .; \
python3 -m pip install --no-index /build-cache/spargeattn/*.whl; \
rm -rf build *.egg-info; \
else \
echo "Skipping cloned SpargeAttn - architecture ${TORCH_CUDA_ARCH_LIST} not supported (requires 8.0-9.0)"; \
fi; \
cd /app; \
rm -rf /tmp/SpargeAttn; \
fi; \
else \
echo "Skipping SpargeAttn installation (INSTALL_SPARGEATTN=${INSTALL_SPARGEATTN})"; \
fi'
RUN mkdir -p ./output/classic \
./output/Flux \
./output/HiresFix \
./output/Img2Img \
./output/Adetailer \
./include/checkpoints \
./include/clip \
./include/embeddings \
./include/ESRGAN \
./include/loras \
./include/sd1_tokenizer \
./include/text_encoder \
./include/unet \
./include/vae \
./include/vae_approx \
./include/yolos
RUN echo "42" > ./include/last_seed.txt
RUN echo "A beautiful landscape" > ./include/prompt.txt
EXPOSE 7860
ENV PORT=7860
HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
CMD curl -f http://localhost:${PORT}/health || exit 1
CMD if [ "${INSTALL_OLLAMA}" = "1" ]; then \
echo "Starting Ollama server"; \
ollama serve >/tmp/ollama_runtime.log 2>&1 & \
for attempt in $(seq 1 20); do \
if curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; then \
break; \
fi; \
sleep 1; \
done; \
fi; \
exec python3 server.py --host 0.0.0.0 --port "${PORT}"