# ============================================================ # Fish Speech API Server — HuggingFace Spaces Dockerfile # Target: GPU Space (T4 or better) on HuggingFace # Port: 7860 (required by HF Spaces) # ============================================================ FROM nvidia/cuda:12.6.1-devel-ubuntu22.04 # HF Spaces runs containers as UID 1000 ARG USERNAME=user ARG USER_UID=1000 ARG USER_GID=1000 ENV DEBIAN_FRONTEND=noninteractive \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ # Where the model will be downloaded at startup MODEL_DIR=/app/models/fish-speech-1.5 \ # HF Spaces requires the app to bind to 7860 APP_PORT=7860 \ APP_BIND=0.0.0.0 \ APP_WORKERS=1 WORKDIR /app # ── System dependencies ────────────────────────────────────── RUN apt-get update -q && \ apt-get install -fyq \ bash git cmake curl \ portaudio19-dev \ python3 python3-pip \ time && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* # ── Python dependencies ────────────────────────────────────── # Clone the repo so we get the submodule (fish_speech) too. # We pin to main; swap for a specific commit/tag for reproducibility. RUN git clone --recurse-submodules \ https://github.com/EvilFreelancer/docker-fish-speech-server.git \ /app RUN pip install --no-cache-dir -r /app/requirements.txt && \ pip install --no-cache-dir "huggingface_hub[cli]" # ── Non-root user (required by HF Spaces) ──────────────────── RUN groupadd --gid ${USER_GID} ${USERNAME} && \ useradd --uid ${USER_UID} --gid ${USER_GID} -m ${USERNAME} && \ mkdir -p /app/models && \ chown -R ${USER_UID}:${USER_GID} /app USER ${USERNAME} # ── Entrypoint: download model on first run, then start API ── # HF Spaces GPU Spaces provide CUDA at runtime via the host driver. # The model download is deferred to startup so the Docker image # itself stays small and HF caching can be reused across restarts. COPY --chown=${USER_UID}:${USER_GID} entrypoint_hf.sh /app/entrypoint_hf.sh RUN chmod +x /app/entrypoint_hf.sh EXPOSE 7860 ENTRYPOINT ["/app/entrypoint_hf.sh"]