OrbitMC commited on
Commit
5b51ec0
Β·
verified Β·
1 Parent(s): 646f5d4

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +49 -27
Dockerfile CHANGED
@@ -1,41 +1,63 @@
1
- FROM python:3.10-slim
2
 
3
- # Install system dependencies
4
- RUN apt-get update && apt-get install -y \
5
- libsndfile1 build-essential git curl && \
6
- rm -rf /var/lib/apt/lists/*
 
 
 
 
7
 
8
- RUN useradd -m -u 1000 jarvis
9
- USER jarvis
10
- ENV HOME=/home/jarvis \
11
- PATH=/home/jarvis/.local/bin:$PATH \
12
- HF_HOME=/home/jarvis/.cache/huggingface
13
 
14
- WORKDIR $HOME/app
 
 
 
15
 
16
- # Force binary-only installs and use the specific wheel index for llama-cpp
17
- # This prevents it from even trying to compile
18
- ENV CMAKE_ARGS="-DLLAMA_METAL=off"
19
- ENV FORCE_CMAKE=1
20
 
21
- RUN pip install --no-cache-dir --upgrade pip
 
 
22
 
 
23
  RUN pip install --no-cache-dir \
24
- flask pydantic numpy transformers accelerate \
25
- sentence-transformers faiss-cpu soundfile \
26
- langchain-huggingface langchain-community \
27
- langchain-text-splitters huggingface_hub
28
 
29
- # THIS IS THE KEY FIX: Install the specific CPU wheel directly without building
30
- RUN pip install --no-cache-dir llama-cpp-python \
31
- --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
 
 
 
 
 
 
 
 
 
32
 
33
- RUN pip install --no-cache-dir https://github.com/KittenML/KittenTTS/releases/download/0.8/kittentts-0.8.0-py3-none-any.whl
 
 
 
 
 
 
34
 
35
- COPY --chown=jarvis:jarvis . $HOME/app
 
 
36
 
37
- # Pre-download models
38
- RUN python app.py --setup
 
 
39
 
40
  EXPOSE 7860
41
 
 
1
+ FROM python:3.11-slim
2
 
3
+ # HuggingFace Spaces runs as root by default on free tier
4
+ # but expects port 7860
5
+ ENV PORT=7860
6
+ ENV PYTHONUNBUFFERED=1
7
+ ENV PYTHONDONTWRITEBYTECODE=1
8
+ ENV HF_HOME=/app/cache
9
+ ENV TRANSFORMERS_CACHE=/app/cache
10
+ ENV SENTENCE_TRANSFORMERS_HOME=/app/cache
11
 
12
+ WORKDIR /app
 
 
 
 
13
 
14
+ # ── System deps needed to install pre-built llama-cpp-python wheel ──
15
+ RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ curl wget ca-certificates \
17
+ && rm -rf /var/lib/apt/lists/*
18
 
19
+ # ── Python deps ──
20
+ # Install everything EXCEPT llama-cpp-python first (fast)
21
+ COPY requirements.txt .
22
+ RUN pip install --no-cache-dir -r requirements.txt
23
 
24
+ # ── llama-cpp-python: pre-built CPU wheel (no compile, seconds not minutes) ──
25
+ RUN pip install --no-cache-dir llama-cpp-python \
26
+ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu
27
 
28
+ # ── KittenTTS ──
29
  RUN pip install --no-cache-dir \
30
+ https://github.com/KittenML/KittenTTS/releases/download/0.8/kittentts-0.8.0-py3-none-any.whl
 
 
 
31
 
32
+ # ── Pre-download the GGUF model at BUILD time so startup is instant ──
33
+ # This bakes the model into the image layer β€” no download on first run
34
+ RUN python - <<'EOF'
35
+ from huggingface_hub import hf_hub_download
36
+ import os
37
+ path = hf_hub_download(
38
+ repo_id="unsloth/Qwen3.5-0.8B-GGUF",
39
+ filename="Qwen3.5-0.8B-UD-Q2_K_XL.gguf",
40
+ cache_dir="/app/cache"
41
+ )
42
+ print(f"Model cached at: {path}")
43
+ EOF
44
 
45
+ # ── Pre-download embeddings model at BUILD time ──
46
+ RUN python - <<'EOF'
47
+ from sentence_transformers import SentenceTransformer
48
+ SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2",
49
+ cache_folder="/app/cache")
50
+ print("Embeddings model cached.")
51
+ EOF
52
 
53
+ # ── Copy app source ──
54
+ COPY app.py .
55
+ COPY static/ static/
56
 
57
+ # ── Create runtime dirs ──
58
+ RUN mkdir -p /app/database/vector_store \
59
+ /app/database/learning_data \
60
+ /app/database/chats_data
61
 
62
  EXPOSE 7860
63