Spaces:

Aatricks
/

LightDiffusion-Next

Running on Zero

File size: 8,616 Bytes

b701455

FROM node:22-bookworm-slim AS frontend-builder

WORKDIR /frontend

COPY frontend/package.json frontend/package-lock.json ./
RUN npm ci

COPY frontend/ ./
RUN npm run build


FROM nvidia/cuda:12.8.0-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV PYTHONDONTWRITEBYTECODE=1
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=${CUDA_HOME}/bin:${PATH}
ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;12.0"

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
    --mount=type=cache,target=/var/lib/apt,sharing=locked \
    apt-get update && apt-get install -y \
    python3.10 \
    python3.10-dev \
    python3.10-venv \
    python3-pip \
    python3-tk \
    git \
    wget \
    curl \
    build-essential \
    libgl1-mesa-glx \
    libglib2.0-0 \
    libsm6 \
    libxext6 \
    libxrender-dev \
    libgomp1 \
    software-properties-common \
    ninja-build \
    && rm -rf /var/lib/apt/lists/*

RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1

WORKDIR /app

COPY requirements.txt ./

RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade pip
RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install uv

RUN --mount=type=cache,target=/root/.cache/uv /bin/sh -c 'set -e; \
    python3 -m uv pip install --system --index-url https://download.pytorch.org/whl/cu128 \
        torch torchvision "triton>=2.1.0"; \
    if echo "${TORCH_CUDA_ARCH_LIST}" | grep -q "12\.0"; then \
        echo "Detected compute capability 12.0 (RTX 50 series). Skipping xformers install."; \
    else \
        python3 -m uv pip install --system xformers; \
    fi'

RUN --mount=type=cache,target=/root/.cache/uv python3 -m uv pip install --system "numpy<2.0.0"
RUN --mount=type=cache,target=/root/.cache/uv python3 -m uv pip install --system -r requirements.txt

ARG TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;12.0"
ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}

ARG INSTALL_STABLE_FAST=0
ENV INSTALL_STABLE_FAST=${INSTALL_STABLE_FAST}

ARG INSTALL_OLLAMA=0
ENV INSTALL_OLLAMA=${INSTALL_OLLAMA}

ARG INSTALL_SAGEATTENTION=0
ENV INSTALL_SAGEATTENTION=${INSTALL_SAGEATTENTION}

ARG INSTALL_SPARGEATTN=0
ENV INSTALL_SPARGEATTN=${INSTALL_SPARGEATTN}

RUN --mount=type=cache,target=/root/.cache/pip \
    --mount=type=cache,target=/build-cache/stablefast,sharing=locked /bin/sh -c ' \
    if [ "${INSTALL_STABLE_FAST}" = "1" ]; then \
        echo "Installing stable-fast for CUDA architectures: ${TORCH_CUDA_ARCH_LIST}"; \
        export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"; \
        export FORCE_CUDA=1; \
        mkdir -p /build-cache/stablefast; \
        python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/stablefast \
            git+https://github.com/chengzeyi/stable-fast.git@main#egg=stable-fast; \
        python3 -m pip install --no-build-isolation --no-index --find-links /build-cache/stablefast stable-fast; \
    else \
        echo "Skipping stable-fast installation (INSTALL_STABLE_FAST=${INSTALL_STABLE_FAST})"; \
    fi'

RUN --mount=type=cache,target=/build-cache/ollama,sharing=locked /bin/sh -c ' \
    if [ "${INSTALL_OLLAMA}" = "1" ]; then \
        echo "Installing Ollama and pulling qwen3:0.6b"; \
        mkdir -p /build-cache/ollama; \
        curl -fsSL https://ollama.com/install.sh -o /build-cache/ollama/install.sh; \
        sh /build-cache/ollama/install.sh; \
        export OLLAMA_HOME=/build-cache/ollama; \
        ollama serve >/tmp/ollama.log 2>&1 & \
        OLLAMA_PID=$!; \
        attempts=0; \
        until curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; do \
            attempts=$((attempts + 1)); \
            if [ ${attempts} -gt 20 ]; then \
                echo "Ollama failed to start"; \
                kill ${OLLAMA_PID} >/dev/null 2>&1 || true; \
                exit 1; \
            fi; \
            sleep 1; \
        done; \
        ollama pull qwen3:0.6b; \
        kill ${OLLAMA_PID} >/dev/null 2>&1 || true; \
        wait ${OLLAMA_PID} 2>/dev/null || true; \
    else \
        echo "Skipping Ollama installation (INSTALL_OLLAMA=${INSTALL_OLLAMA})"; \
    fi'

COPY . .
COPY --from=frontend-builder /frontend/dist ./frontend/dist

RUN --mount=type=cache,target=/root/.cache/torch_extensions,sharing=locked \
    --mount=type=cache,target=/build-cache/sageattention,sharing=locked /bin/sh -c ' \
    if [ "${INSTALL_SAGEATTENTION}" = "1" ]; then \
        if [ -d "SageAttention" ]; then \
            echo "Found SageAttention - applying patch"; \
            cd SageAttention; \
            python3 ../docker/patch_sageattention.py; \
            python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/sageattention .; \
            python3 -m pip install --no-index /build-cache/sageattention/*.whl; \
            cd ..; \
            rm -rf SageAttention/build SageAttention/*.egg-info; \
        else \
            echo "SageAttention directory not found - cloning and applying patch"; \
            git clone --depth 1 https://github.com/thu-ml/SageAttention /tmp/SageAttention; \
            cd /tmp/SageAttention; \
            python3 /app/docker/patch_sageattention.py; \
            python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/sageattention .; \
            python3 -m pip install --no-index /build-cache/sageattention/*.whl; \
            rm -rf /tmp/SageAttention/build /tmp/SageAttention/*.egg-info; \
            rm -rf /tmp/SageAttention; \
        fi; \
    else \
        echo "Skipping SageAttention installation (INSTALL_SAGEATTENTION=${INSTALL_SAGEATTENTION})"; \
    fi'

RUN --mount=type=cache,target=/root/.cache/torch_extensions,sharing=locked \
    --mount=type=cache,target=/build-cache/spargeattn,sharing=locked /bin/sh -c ' \
    if [ "${INSTALL_SPARGEATTN}" = "1" ]; then \
        if [ -d "SpargeAttn" ]; then \
            cd SpargeAttn; \
            if echo "${TORCH_CUDA_ARCH_LIST}" | grep -qE "(8\.0|8\.6|8\.7|8\.9|9\.0)"; then \
                echo "Building SpargeAttn for supported architectures: ${TORCH_CUDA_ARCH_LIST}"; \
                python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/spargeattn .; \
                python3 -m pip install --no-index /build-cache/spargeattn/*.whl; \
                rm -rf build *.egg-info; \
            else \
                echo "Skipping SpargeAttn - architecture ${TORCH_CUDA_ARCH_LIST} not supported (requires 8.0-9.0)"; \
            fi; \
            cd ..; \
        else \
            echo "SpargeAttn directory not found - cloning and attempting build if supported"; \
            git clone --depth 1 https://github.com/thu-ml/SpargeAttn /tmp/SpargeAttn; \
            cd /tmp/SpargeAttn; \
            if echo "${TORCH_CUDA_ARCH_LIST}" | grep -qE "(8\.0|8\.6|8\.7|8\.9|9\.0)"; then \
                echo "Building cloned SpargeAttn for supported architectures: ${TORCH_CUDA_ARCH_LIST}"; \
                python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/spargeattn .; \
                python3 -m pip install --no-index /build-cache/spargeattn/*.whl; \
                rm -rf build *.egg-info; \
            else \
                echo "Skipping cloned SpargeAttn - architecture ${TORCH_CUDA_ARCH_LIST} not supported (requires 8.0-9.0)"; \
            fi; \
            cd /app; \
            rm -rf /tmp/SpargeAttn; \
        fi; \
    else \
        echo "Skipping SpargeAttn installation (INSTALL_SPARGEATTN=${INSTALL_SPARGEATTN})"; \
    fi'

RUN mkdir -p ./output/classic \
    ./output/Flux \
    ./output/HiresFix \
    ./output/Img2Img \
    ./output/Adetailer \
    ./include/checkpoints \
    ./include/clip \
    ./include/embeddings \
    ./include/ESRGAN \
    ./include/loras \
    ./include/sd1_tokenizer \
    ./include/text_encoder \
    ./include/unet \
    ./include/vae \
    ./include/vae_approx \
    ./include/yolos

RUN echo "42" > ./include/last_seed.txt
RUN echo "A beautiful landscape" > ./include/prompt.txt

EXPOSE 7860

ENV PORT=7860

HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \
    CMD curl -f http://localhost:${PORT}/health || exit 1

CMD if [ "${INSTALL_OLLAMA}" = "1" ]; then \
        echo "Starting Ollama server"; \
        ollama serve >/tmp/ollama_runtime.log 2>&1 & \
        for attempt in $(seq 1 20); do \
            if curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; then \
                break; \
            fi; \
            sleep 1; \
        done; \
    fi; \
    exec python3 server.py --host 0.0.0.0 --port "${PORT}"