OrbitMC commited on
Commit
5edd9d0
Β·
verified Β·
1 Parent(s): 7ef6c85

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +60 -49
Dockerfile CHANGED
@@ -1,63 +1,74 @@
1
- FROM python:3.11-slim
2
-
3
- # HuggingFace Spaces runs as root by default on free tier
4
- # but expects port 7860
5
- ENV PORT=7860
6
- ENV PYTHONUNBUFFERED=1
7
- ENV PYTHONDONTWRITEBYTECODE=1
8
- ENV HF_HOME=/app/cache
9
- ENV TRANSFORMERS_CACHE=/app/cache
10
- ENV SENTENCE_TRANSFORMERS_HOME=/app/cache
 
 
 
 
 
 
 
 
 
 
11
 
12
- WORKDIR /app
13
 
14
- # ── System deps needed to install pre-built llama-cpp-python wheel ──
15
  RUN apt-get update && apt-get install -y --no-install-recommends \
16
- curl wget ca-certificates \
 
 
 
 
 
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
- # ── Python deps ──
20
- # Install everything EXCEPT llama-cpp-python first (fast)
21
- COPY requirements.txt .
22
- RUN pip install --no-cache-dir -r requirements.txt
 
 
 
23
 
24
- # ── llama-cpp-python: pre-built CPU wheel (no compile, seconds not minutes) ──
25
- RUN pip install --no-cache-dir llama-cpp-python \
26
- --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
27
 
28
- # ── KittenTTS ──
29
- RUN pip install --no-cache-dir \
30
- https://github.com/KittenML/KittenTTS/releases/download/0.8/kittentts-0.8.0-py3-none-any.whl
 
 
 
 
31
 
32
- # ── Pre-download the GGUF model at BUILD time so startup is instant ──
33
- # This bakes the model into the image layer β€” no download on first run
34
- RUN python - <<'EOF'
35
- from huggingface_hub import hf_hub_download
36
- import os
37
- path = hf_hub_download(
38
- repo_id="unsloth/Qwen3.5-0.8B-GGUF",
39
- filename="Qwen3.5-0.8B-UD-Q2_K_XL.gguf",
40
- cache_dir="/app/cache"
41
- )
42
- print(f"Model cached at: {path}")
43
- EOF
44
 
45
- # ── Pre-download embeddings model at BUILD time ──
46
- RUN python - <<'EOF'
47
- from sentence_transformers import SentenceTransformer
48
- SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",
49
- cache_folder="/app/cache")
50
- print("Embeddings model cached.")
51
- EOF
52
 
53
- # ── Copy app source ──
54
- COPY app.py .
55
- COPY static/ static/
 
 
 
 
 
56
 
57
- # ── Create runtime dirs ──
58
- RUN mkdir -p /app/database/vector_store \
59
- /app/database/learning_data \
60
- /app/database/chats_data
61
 
62
  EXPOSE 7860
63
 
 
1
+ # ─────────────────────────────────────────────────────────────────────────────
2
+ # Kitten Chat – HuggingFace Docker Space (free CPU)
3
+ # LLM : gemma-3-270m-it-F16.gguf via llama-cpp-python (CPU-only)
4
+ # TTS : Kokoro ONNX + "kiki" voice (af_kore)
5
+ # UI : Flask (no Gradio)
6
+ #
7
+ # Space requirements:
8
+ # β€’ Place gemma-3-270m-it-F16.gguf in the repo root (or set MODEL_PATH env)
9
+ # β€’ kokoro model files are downloaded automatically at build time
10
+ #
11
+ # HuggingFace Space config (add to README.md front-matter):
12
+ # ---
13
+ # title: Kitten Chat
14
+ # emoji: 🐱
15
+ # colorFrom: purple
16
+ # colorTo: pink
17
+ # sdk: docker
18
+ # app_port: 7860
19
+ # ---
20
+ # ─────────────────────────────────────────────────────────────────────────────
21
 
22
+ FROM python:3.11-slim
23
 
24
+ # ── System deps ───────────────────────────────────────────────────────────────
25
  RUN apt-get update && apt-get install -y --no-install-recommends \
26
+ build-essential \
27
+ cmake \
28
+ espeak-ng \
29
+ libsndfile1 \
30
+ wget \
31
+ ca-certificates \
32
  && rm -rf /var/lib/apt/lists/*
33
 
34
+ # ── Python deps ───────────────────────────────────────────────────────────────
35
+ # llama-cpp-python – CPU-only wheel (no CUDA, keeps image small)
36
+ ENV CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_CUDA=OFF"
37
+ ENV FORCE_CMAKE=1
38
+
39
+ COPY requirements.txt /tmp/requirements.txt
40
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
41
 
42
+ # ── App ───────────────────────────────────────────────────────────────────────
43
+ WORKDIR /app
 
44
 
45
+ # Download Kokoro ONNX model + voices at build time
46
+ # Using the int8 quantized model – much smaller & faster on CPU
47
+ RUN mkdir -p /app/models && \
48
+ wget -q -O /app/models/kokoro-v1.0.int8.onnx \
49
+ "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.int8.onnx" && \
50
+ wget -q -O /app/models/voices-v1.0.bin \
51
+ "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
52
 
53
+ # Copy user-supplied GGUF model (must exist in build context)
54
+ # If you prefer to download at runtime, remove this line and set MODEL_PATH
55
+ COPY gemma-3-270m-it-F16.gguf /app/models/gemma-3-270m-it-F16.gguf
 
 
 
 
 
 
 
 
 
56
 
57
+ # Copy application
58
+ COPY app.py /app/app.py
 
 
 
 
 
59
 
60
+ # ── Environment ───────────────────────────────────────────────────────────────
61
+ ENV MODEL_PATH=/app/models/gemma-3-270m-it-F16.gguf
62
+ ENV ONNX_MODEL=/app/models/kokoro-v1.0.int8.onnx
63
+ ENV VOICES_BIN=/app/models/voices-v1.0.bin
64
+ ENV TTS_VOICE=af_kore
65
+ ENV PORT=7860
66
+ # Use all available CPU cores
67
+ ENV OMP_NUM_THREADS=4
68
 
69
+ # HuggingFace Spaces runs as non-root user 1000
70
+ RUN useradd -m -u 1000 hfuser && chown -R hfuser:hfuser /app
71
+ USER 1000
 
72
 
73
  EXPOSE 7860
74