Spaces:
Running on Zero
Running on Zero
| FROM node:22-bookworm-slim AS frontend-builder | |
| WORKDIR /frontend | |
| COPY frontend/package.json frontend/package-lock.json ./ | |
| RUN npm ci | |
| COPY frontend/ ./ | |
| RUN npm run build | |
| FROM nvidia/cuda:12.8.0-devel-ubuntu22.04 | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV PYTHONDONTWRITEBYTECODE=1 | |
| ENV CUDA_HOME=/usr/local/cuda | |
| ENV PATH=${CUDA_HOME}/bin:${PATH} | |
| ENV LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} | |
| ENV TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;12.0" | |
| RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ | |
| --mount=type=cache,target=/var/lib/apt,sharing=locked \ | |
| apt-get update && apt-get install -y \ | |
| python3.10 \ | |
| python3.10-dev \ | |
| python3.10-venv \ | |
| python3-pip \ | |
| python3-tk \ | |
| git \ | |
| wget \ | |
| curl \ | |
| build-essential \ | |
| libgl1-mesa-glx \ | |
| libglib2.0-0 \ | |
| libsm6 \ | |
| libxext6 \ | |
| libxrender-dev \ | |
| libgomp1 \ | |
| software-properties-common \ | |
| ninja-build \ | |
| && rm -rf /var/lib/apt/lists/* | |
| RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 | |
| WORKDIR /app | |
| COPY requirements.txt ./ | |
| RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install --upgrade pip | |
| RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install uv | |
| RUN --mount=type=cache,target=/root/.cache/uv /bin/sh -c 'set -e; \ | |
| python3 -m uv pip install --system --index-url https://download.pytorch.org/whl/cu128 \ | |
| torch torchvision "triton>=2.1.0"; \ | |
| if echo "${TORCH_CUDA_ARCH_LIST}" | grep -q "12\.0"; then \ | |
| echo "Detected compute capability 12.0 (RTX 50 series). Skipping xformers install."; \ | |
| else \ | |
| python3 -m uv pip install --system xformers; \ | |
| fi' | |
| RUN --mount=type=cache,target=/root/.cache/uv python3 -m uv pip install --system "numpy<2.0.0" | |
| RUN --mount=type=cache,target=/root/.cache/uv python3 -m uv pip install --system -r requirements.txt | |
| ARG TORCH_CUDA_ARCH_LIST="8.0;8.6;8.9;9.0;12.0" | |
| ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} | |
| ARG INSTALL_STABLE_FAST=0 | |
| ENV INSTALL_STABLE_FAST=${INSTALL_STABLE_FAST} | |
| ARG INSTALL_OLLAMA=0 | |
| ENV INSTALL_OLLAMA=${INSTALL_OLLAMA} | |
| ARG INSTALL_SAGEATTENTION=0 | |
| ENV INSTALL_SAGEATTENTION=${INSTALL_SAGEATTENTION} | |
| ARG INSTALL_SPARGEATTN=0 | |
| ENV INSTALL_SPARGEATTN=${INSTALL_SPARGEATTN} | |
| RUN --mount=type=cache,target=/root/.cache/pip \ | |
| --mount=type=cache,target=/build-cache/stablefast,sharing=locked /bin/sh -c ' \ | |
| if [ "${INSTALL_STABLE_FAST}" = "1" ]; then \ | |
| echo "Installing stable-fast for CUDA architectures: ${TORCH_CUDA_ARCH_LIST}"; \ | |
| export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST}"; \ | |
| export FORCE_CUDA=1; \ | |
| mkdir -p /build-cache/stablefast; \ | |
| python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/stablefast \ | |
| git+https://github.com/chengzeyi/stable-fast.git@main#egg=stable-fast; \ | |
| python3 -m pip install --no-build-isolation --no-index --find-links /build-cache/stablefast stable-fast; \ | |
| else \ | |
| echo "Skipping stable-fast installation (INSTALL_STABLE_FAST=${INSTALL_STABLE_FAST})"; \ | |
| fi' | |
| RUN --mount=type=cache,target=/build-cache/ollama,sharing=locked /bin/sh -c ' \ | |
| if [ "${INSTALL_OLLAMA}" = "1" ]; then \ | |
| echo "Installing Ollama and pulling qwen3:0.6b"; \ | |
| mkdir -p /build-cache/ollama; \ | |
| curl -fsSL https://ollama.com/install.sh -o /build-cache/ollama/install.sh; \ | |
| sh /build-cache/ollama/install.sh; \ | |
| export OLLAMA_HOME=/build-cache/ollama; \ | |
| ollama serve >/tmp/ollama.log 2>&1 & \ | |
| OLLAMA_PID=$!; \ | |
| attempts=0; \ | |
| until curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; do \ | |
| attempts=$((attempts + 1)); \ | |
| if [ ${attempts} -gt 20 ]; then \ | |
| echo "Ollama failed to start"; \ | |
| kill ${OLLAMA_PID} >/dev/null 2>&1 || true; \ | |
| exit 1; \ | |
| fi; \ | |
| sleep 1; \ | |
| done; \ | |
| ollama pull qwen3:0.6b; \ | |
| kill ${OLLAMA_PID} >/dev/null 2>&1 || true; \ | |
| wait ${OLLAMA_PID} 2>/dev/null || true; \ | |
| else \ | |
| echo "Skipping Ollama installation (INSTALL_OLLAMA=${INSTALL_OLLAMA})"; \ | |
| fi' | |
| COPY . . | |
| COPY --from=frontend-builder /frontend/dist ./frontend/dist | |
| RUN --mount=type=cache,target=/root/.cache/torch_extensions,sharing=locked \ | |
| --mount=type=cache,target=/build-cache/sageattention,sharing=locked /bin/sh -c ' \ | |
| if [ "${INSTALL_SAGEATTENTION}" = "1" ]; then \ | |
| if [ -d "SageAttention" ]; then \ | |
| echo "Found SageAttention - applying patch"; \ | |
| cd SageAttention; \ | |
| python3 ../docker/patch_sageattention.py; \ | |
| python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/sageattention .; \ | |
| python3 -m pip install --no-index /build-cache/sageattention/*.whl; \ | |
| cd ..; \ | |
| rm -rf SageAttention/build SageAttention/*.egg-info; \ | |
| else \ | |
| echo "SageAttention directory not found - cloning and applying patch"; \ | |
| git clone --depth 1 https://github.com/thu-ml/SageAttention /tmp/SageAttention; \ | |
| cd /tmp/SageAttention; \ | |
| python3 /app/docker/patch_sageattention.py; \ | |
| python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/sageattention .; \ | |
| python3 -m pip install --no-index /build-cache/sageattention/*.whl; \ | |
| rm -rf /tmp/SageAttention/build /tmp/SageAttention/*.egg-info; \ | |
| rm -rf /tmp/SageAttention; \ | |
| fi; \ | |
| else \ | |
| echo "Skipping SageAttention installation (INSTALL_SAGEATTENTION=${INSTALL_SAGEATTENTION})"; \ | |
| fi' | |
| RUN --mount=type=cache,target=/root/.cache/torch_extensions,sharing=locked \ | |
| --mount=type=cache,target=/build-cache/spargeattn,sharing=locked /bin/sh -c ' \ | |
| if [ "${INSTALL_SPARGEATTN}" = "1" ]; then \ | |
| if [ -d "SpargeAttn" ]; then \ | |
| cd SpargeAttn; \ | |
| if echo "${TORCH_CUDA_ARCH_LIST}" | grep -qE "(8\.0|8\.6|8\.7|8\.9|9\.0)"; then \ | |
| echo "Building SpargeAttn for supported architectures: ${TORCH_CUDA_ARCH_LIST}"; \ | |
| python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/spargeattn .; \ | |
| python3 -m pip install --no-index /build-cache/spargeattn/*.whl; \ | |
| rm -rf build *.egg-info; \ | |
| else \ | |
| echo "Skipping SpargeAttn - architecture ${TORCH_CUDA_ARCH_LIST} not supported (requires 8.0-9.0)"; \ | |
| fi; \ | |
| cd ..; \ | |
| else \ | |
| echo "SpargeAttn directory not found - cloning and attempting build if supported"; \ | |
| git clone --depth 1 https://github.com/thu-ml/SpargeAttn /tmp/SpargeAttn; \ | |
| cd /tmp/SpargeAttn; \ | |
| if echo "${TORCH_CUDA_ARCH_LIST}" | grep -qE "(8\.0|8\.6|8\.7|8\.9|9\.0)"; then \ | |
| echo "Building cloned SpargeAttn for supported architectures: ${TORCH_CUDA_ARCH_LIST}"; \ | |
| python3 -m pip wheel --no-build-isolation --wheel-dir /build-cache/spargeattn .; \ | |
| python3 -m pip install --no-index /build-cache/spargeattn/*.whl; \ | |
| rm -rf build *.egg-info; \ | |
| else \ | |
| echo "Skipping cloned SpargeAttn - architecture ${TORCH_CUDA_ARCH_LIST} not supported (requires 8.0-9.0)"; \ | |
| fi; \ | |
| cd /app; \ | |
| rm -rf /tmp/SpargeAttn; \ | |
| fi; \ | |
| else \ | |
| echo "Skipping SpargeAttn installation (INSTALL_SPARGEATTN=${INSTALL_SPARGEATTN})"; \ | |
| fi' | |
| RUN mkdir -p ./output/classic \ | |
| ./output/Flux \ | |
| ./output/HiresFix \ | |
| ./output/Img2Img \ | |
| ./output/Adetailer \ | |
| ./include/checkpoints \ | |
| ./include/clip \ | |
| ./include/embeddings \ | |
| ./include/ESRGAN \ | |
| ./include/loras \ | |
| ./include/sd1_tokenizer \ | |
| ./include/text_encoder \ | |
| ./include/unet \ | |
| ./include/vae \ | |
| ./include/vae_approx \ | |
| ./include/yolos | |
| RUN echo "42" > ./include/last_seed.txt | |
| RUN echo "A beautiful landscape" > ./include/prompt.txt | |
| EXPOSE 7860 | |
| ENV PORT=7860 | |
| HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ | |
| CMD curl -f http://localhost:${PORT}/health || exit 1 | |
| CMD if [ "${INSTALL_OLLAMA}" = "1" ]; then \ | |
| echo "Starting Ollama server"; \ | |
| ollama serve >/tmp/ollama_runtime.log 2>&1 & \ | |
| for attempt in $(seq 1 20); do \ | |
| if curl -fsS http://127.0.0.1:11434/api/version >/dev/null 2>&1; then \ | |
| break; \ | |
| fi; \ | |
| sleep 1; \ | |
| done; \ | |
| fi; \ | |
| exec python3 server.py --host 0.0.0.0 --port "${PORT}" | |