therandomuser03 commited on
Commit
4044503
Β·
1 Parent(s): befb434

fix: remove Ollama from Dockerfile, use Groq API instead

Browse files
Files changed (1) hide show
  1. Dockerfile +14 -34
Dockerfile CHANGED
@@ -1,12 +1,11 @@
1
  # ─────────────────────────────────────────────────────────────────────────────
2
- # PsyPredict β€” Backend Dockerfile for Hugging Face Spaces (CPU / Docker SDK)
3
  #
4
  # Architecture:
5
- # - Ollama binary installed inside the container (serves Phi-3.5 on port 11434)
6
  # - FastAPI app served by Uvicorn on port 7860 (HF Spaces standard port)
7
- # - start.sh orchestrates: Ollama β†’ model pull β†’ Uvicorn
8
- # - ML assets (Keras face model + CSV) are downloaded at BUILD time via gdown
9
- # - DistilBERT + Crisis classifier are downloaded at BUILD time from HF Hub
10
  # - HF_HUB_OFFLINE=1 at runtime so the container starts offline-capable
11
  # ─────────────────────────────────────────────────────────────────────────────
12
 
@@ -16,62 +15,43 @@ WORKDIR /app
16
 
17
  # ── 1. System dependencies ────────────────────────────────────────────────────
18
  # libgl1 + libglib2.0-0: OpenCV headless needs these
19
- # curl + ca-certificates: needed to download Ollama install script
20
  RUN apt-get update && apt-get install -y --no-install-recommends \
21
  libgl1 \
22
  libglib2.0-0 \
23
  curl \
24
  ca-certificates \
25
- zstd \
26
  && rm -rf /var/lib/apt/lists/*
27
 
28
- # ── 2. Install Ollama binary ──────────────────────────────────────────────────
29
- # Uses the official install script β€” places `ollama` binary in /usr/local/bin
30
- RUN curl -fsSL https://ollama.com/install.sh | sh
31
-
32
- # ── 3. PyTorch CPU-only (separate layer β€” ~800MB, caches very well) ───────────
33
  RUN pip install --no-cache-dir \
34
  torch --index-url https://download.pytorch.org/whl/cpu
35
 
36
- # ── 4. Install remaining Python dependencies ──────────────────────────────────
37
- # Note: torch is already installed above; pip will skip it when it hits
38
- # the torch line in requirements.txt (version constraint already satisfied).
39
  COPY requirements.txt .
40
  RUN pip install --no-cache-dir -r requirements.txt
41
 
42
- # ── 5. Copy application source code ──────────────────────────────────────────
43
  COPY . .
44
 
45
- # ── 6. Download ML assets at BUILD time ──────────────────────────────────────
46
  # Downloads:
47
  # - app/ml_assets/emotion_model_trained.h5 (Keras CNN face model, ~4MB, Google Drive)
48
  # - app/ml_assets/MEDICATION.csv (remedy database, Google Drive)
49
  # - app/ml_assets/distilbert_model/ (DistilBERT emotion classifier, ~260MB, HF Hub)
50
  # - app/ml_assets/crisis_model/ (MiniLM zero-shot classifier, ~130MB, HF Hub)
51
- #
52
- # Skips files that already exist in the build context (e.g. haarcascade XML).
53
- # HF_HUB_OFFLINE must be 0 here so transformers can reach HuggingFace.
54
  ENV HF_HUB_OFFLINE=0
55
  RUN python download_models.py
56
 
57
- # ── 7. Runtime environment ────────────────────────────────────────────────────
58
  ENV PYTHONPATH=/app
59
- # Ollama runs locally inside the container
60
- ENV OLLAMA_BASE_URL=http://localhost:11434
61
- ENV OLLAMA_MODEL=phi3.5:3.8b-mini-instruct-q4_0
62
- ENV OLLAMA_TIMEOUT_S=300
63
- ENV OLLAMA_RETRIES=2
64
- # All HF models were baked in at build time β€” go offline for faster startup
65
  ENV HF_HUB_OFFLINE=1
66
  ENV LOG_LEVEL=INFO
67
  ENV RATE_LIMIT=30/minute
68
 
69
- # ── 8. Expose HF Spaces standard port ──────��─────────────────────────────────
70
  EXPOSE 7860
71
 
72
- # ── 9. Startup script ─────────────────────────────────────────────────────────
73
- # start.sh: starts Ollama daemon β†’ pulls Phi-3.5 model β†’ launches Uvicorn
74
- COPY start.sh /start.sh
75
- RUN chmod +x /start.sh
76
-
77
- CMD ["/start.sh"]
 
1
  # ─────────────────────────────────────────────────────────────────────────────
2
+ # PsyPredict β€” Backend Dockerfile for Hugging Face Spaces
3
  #
4
  # Architecture:
5
+ # - LLM inference via Groq API (no Ollama needed)
6
  # - FastAPI app served by Uvicorn on port 7860 (HF Spaces standard port)
7
+ # - ML assets (Keras face model + CSV) downloaded at BUILD time via gdown
8
+ # - DistilBERT + Crisis classifier downloaded at BUILD time from HF Hub
 
9
  # - HF_HUB_OFFLINE=1 at runtime so the container starts offline-capable
10
  # ─────────────────────────────────────────────────────────────────────────────
11
 
 
15
 
16
  # ── 1. System dependencies ────────────────────────────────────────────────────
17
  # libgl1 + libglib2.0-0: OpenCV headless needs these
 
18
  RUN apt-get update && apt-get install -y --no-install-recommends \
19
  libgl1 \
20
  libglib2.0-0 \
21
  curl \
22
  ca-certificates \
 
23
  && rm -rf /var/lib/apt/lists/*
24
 
25
+ # ── 2. PyTorch CPU-only (separate layer β€” ~800MB, caches very well) ───────────
 
 
 
 
26
  RUN pip install --no-cache-dir \
27
  torch --index-url https://download.pytorch.org/whl/cpu
28
 
29
+ # ── 3. Install remaining Python dependencies ──────────────────────────────────
 
 
30
  COPY requirements.txt .
31
  RUN pip install --no-cache-dir -r requirements.txt
32
 
33
+ # ── 4. Copy application source code ──────────────────────────────────────────
34
  COPY . .
35
 
36
+ # ── 5. Download ML assets at BUILD time ──────────────────────────────────────
37
  # Downloads:
38
  # - app/ml_assets/emotion_model_trained.h5 (Keras CNN face model, ~4MB, Google Drive)
39
  # - app/ml_assets/MEDICATION.csv (remedy database, Google Drive)
40
  # - app/ml_assets/distilbert_model/ (DistilBERT emotion classifier, ~260MB, HF Hub)
41
  # - app/ml_assets/crisis_model/ (MiniLM zero-shot classifier, ~130MB, HF Hub)
 
 
 
42
  ENV HF_HUB_OFFLINE=0
43
  RUN python download_models.py
44
 
45
+ # ── 6. Runtime environment ────────────────────────────────────────────────────
46
  ENV PYTHONPATH=/app
47
+ ENV OLLAMA_TIMEOUT_S=30
48
+ ENV OLLAMA_RETRIES=3
 
 
 
 
49
  ENV HF_HUB_OFFLINE=1
50
  ENV LOG_LEVEL=INFO
51
  ENV RATE_LIMIT=30/minute
52
 
53
+ # ── 7. Expose HF Spaces standard port ────────────────────────────────────────
54
  EXPOSE 7860
55
 
56
+ # ── 8. Launch FastAPI directly (no Ollama needed) ────────────────────────────
57
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]