Ana

Paused

App Files Files Community

OrbitMC commited on 11 days ago

Commit

5edd9d0

verified ·

1 Parent(s): 7ef6c85

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +60 -49

Dockerfile CHANGED Viewed

@@ -1,63 +1,74 @@
-FROM python:3.11-slim
-# HuggingFace Spaces runs as root by default on free tier
-# but expects port 7860
-ENV PORT=7860
-ENV PYTHONUNBUFFERED=1
-ENV PYTHONDONTWRITEBYTECODE=1
-ENV HF_HOME=/app/cache
-ENV TRANSFORMERS_CACHE=/app/cache
-ENV SENTENCE_TRANSFORMERS_HOME=/app/cache
-WORKDIR /app
-# ── System deps needed to install pre-built llama-cpp-python wheel ──
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl wget ca-certificates \
     && rm -rf /var/lib/apt/lists/*
-# ── Python deps ──
-# Install everything EXCEPT llama-cpp-python first (fast)
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
-# ── llama-cpp-python: pre-built CPU wheel (no compile, seconds not minutes) ──
-RUN pip install --no-cache-dir llama-cpp-python \
-    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
-# ── KittenTTS ──
-RUN pip install --no-cache-dir \
-    https://github.com/KittenML/KittenTTS/releases/download/0.8/kittentts-0.8.0-py3-none-any.whl
-# ── Pre-download the GGUF model at BUILD time so startup is instant ──
-# This bakes the model into the image layer — no download on first run
-RUN python - <<'EOF'
-from huggingface_hub import hf_hub_download
-import os
-path = hf_hub_download(
-    repo_id="unsloth/Qwen3.5-0.8B-GGUF",
-    filename="Qwen3.5-0.8B-UD-Q2_K_XL.gguf",
-    cache_dir="/app/cache"
-)
-print(f"Model cached at: {path}")
-EOF
-# ── Pre-download embeddings model at BUILD time ──
-RUN python - <<'EOF'
-from sentence_transformers import SentenceTransformer
-SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",
-                    cache_folder="/app/cache")
-print("Embeddings model cached.")
-EOF
-# ── Copy app source ──
-COPY app.py .
-COPY static/ static/
-# ── Create runtime dirs ──
-RUN mkdir -p /app/database/vector_store \
-             /app/database/learning_data \
-             /app/database/chats_data
 EXPOSE 7860

+# ─────────────────────────────────────────────────────────────────────────────
+# Kitten Chat – HuggingFace Docker Space (free CPU)
+# LLM  : gemma-3-270m-it-F16.gguf  via llama-cpp-python (CPU-only)
+# TTS  : Kokoro ONNX + "kiki" voice (af_kore)
+# UI   : Flask (no Gradio)
+#
+# Space requirements:
+#   • Place gemma-3-270m-it-F16.gguf in the repo root (or set MODEL_PATH env)
+#   • kokoro model files are downloaded automatically at build time
+#
+# HuggingFace Space config (add to README.md front-matter):
+#   ---
+#   title: Kitten Chat
+#   emoji: 🐱
+#   colorFrom: purple
+#   colorTo: pink
+#   sdk: docker
+#   app_port: 7860
+#   ---
+# ─────────────────────────────────────────────────────────────────────────────
+FROM python:3.11-slim
+# ── System deps ───────────────────────────────────────────────────────────────
 RUN apt-get update && apt-get install -y --no-install-recommends \
+        build-essential \
+        cmake \
+        espeak-ng \
+        libsndfile1 \
+        wget \
+        ca-certificates \
     && rm -rf /var/lib/apt/lists/*
+# ── Python deps ───────────────────────────────────────────────────────────────
+# llama-cpp-python – CPU-only wheel (no CUDA, keeps image small)
+ENV CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_CUDA=OFF"
+ENV FORCE_CMAKE=1
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install --no-cache-dir -r /tmp/requirements.txt
+# ── App ───────────────────────────────────────────────────────────────────────
+WORKDIR /app
+# Download Kokoro ONNX model + voices at build time
+# Using the int8 quantized model – much smaller & faster on CPU
+RUN mkdir -p /app/models && \
+    wget -q -O /app/models/kokoro-v1.0.int8.onnx \
+        "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.int8.onnx" && \
+    wget -q -O /app/models/voices-v1.0.bin \
+        "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
+# Copy user-supplied GGUF model (must exist in build context)
+# If you prefer to download at runtime, remove this line and set MODEL_PATH
+COPY gemma-3-270m-it-F16.gguf /app/models/gemma-3-270m-it-F16.gguf
+# Copy application
+COPY app.py /app/app.py
+# ── Environment ───────────────────────────────────────────────────────────────
+ENV MODEL_PATH=/app/models/gemma-3-270m-it-F16.gguf
+ENV ONNX_MODEL=/app/models/kokoro-v1.0.int8.onnx
+ENV VOICES_BIN=/app/models/voices-v1.0.bin
+ENV TTS_VOICE=af_kore
+ENV PORT=7860
+# Use all available CPU cores
+ENV OMP_NUM_THREADS=4
+# HuggingFace Spaces runs as non-root user 1000
+RUN useradd -m -u 1000 hfuser && chown -R hfuser:hfuser /app
+USER 1000
 EXPOSE 7860