Spaces:

therandomuser03
/

psypredict-backend

Sleeping

App Files Files Community

therandomuser03 commited on Mar 14

Commit

4044503

1 Parent(s): befb434

fix: remove Ollama from Dockerfile, use Groq API instead

Browse files

Files changed (1) hide show

Dockerfile +14 -34

Dockerfile CHANGED Viewed

@@ -1,12 +1,11 @@
 # ─────────────────────────────────────────────────────────────────────────────
-# PsyPredict — Backend Dockerfile for Hugging Face Spaces (CPU / Docker SDK)
 #
 # Architecture:
-#   - Ollama binary installed inside the container (serves Phi-3.5 on port 11434)
 #   - FastAPI app served by Uvicorn on port 7860 (HF Spaces standard port)
-#   - start.sh orchestrates: Ollama → model pull → Uvicorn
-#   - ML assets (Keras face model + CSV) are downloaded at BUILD time via gdown
-#   - DistilBERT + Crisis classifier are downloaded at BUILD time from HF Hub
 #   - HF_HUB_OFFLINE=1 at runtime so the container starts offline-capable
 # ─────────────────────────────────────────────────────────────────────────────
@@ -16,62 +15,43 @@ WORKDIR /app
 # ── 1. System dependencies ────────────────────────────────────────────────────
 # libgl1 + libglib2.0-0: OpenCV headless needs these
-# curl + ca-certificates: needed to download Ollama install script
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libgl1 \
     libglib2.0-0 \
     curl \
     ca-certificates \
-    zstd \
     && rm -rf /var/lib/apt/lists/*
-# ── 2. Install Ollama binary ──────────────────────────────────────────────────
-# Uses the official install script — places `ollama` binary in /usr/local/bin
-RUN curl -fsSL https://ollama.com/install.sh | sh
-# ── 3. PyTorch CPU-only (separate layer — ~800MB, caches very well) ───────────
 RUN pip install --no-cache-dir \
     torch --index-url https://download.pytorch.org/whl/cpu
-# ── 4. Install remaining Python dependencies ──────────────────────────────────
-# Note: torch is already installed above; pip will skip it when it hits
-# the torch line in requirements.txt (version constraint already satisfied).
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# ── 5. Copy application source code ──────────────────────────────────────────
 COPY . .
-# ── 6. Download ML assets at BUILD time ──────────────────────────────────────
 # Downloads:
 #   - app/ml_assets/emotion_model_trained.h5  (Keras CNN face model, ~4MB, Google Drive)
 #   - app/ml_assets/MEDICATION.csv            (remedy database, Google Drive)
 #   - app/ml_assets/distilbert_model/         (DistilBERT emotion classifier, ~260MB, HF Hub)
 #   - app/ml_assets/crisis_model/             (MiniLM zero-shot classifier, ~130MB, HF Hub)
-#
-# Skips files that already exist in the build context (e.g. haarcascade XML).
-# HF_HUB_OFFLINE must be 0 here so transformers can reach HuggingFace.
 ENV HF_HUB_OFFLINE=0
 RUN python download_models.py
-# ── 7. Runtime environment ────────────────────────────────────────────────────
 ENV PYTHONPATH=/app
-# Ollama runs locally inside the container
-ENV OLLAMA_BASE_URL=http://localhost:11434
-ENV OLLAMA_MODEL=phi3.5:3.8b-mini-instruct-q4_0
-ENV OLLAMA_TIMEOUT_S=300
-ENV OLLAMA_RETRIES=2
-# All HF models were baked in at build time — go offline for faster startup
 ENV HF_HUB_OFFLINE=1
 ENV LOG_LEVEL=INFO
 ENV RATE_LIMIT=30/minute
-# ── 8. Expose HF Spaces standard port ──────��─────────────────────────────────
 EXPOSE 7860
-# ── 9. Startup script ─────────────────────────────────────────────────────────
-# start.sh: starts Ollama daemon → pulls Phi-3.5 model → launches Uvicorn
-COPY start.sh /start.sh
-RUN chmod +x /start.sh
-CMD ["/start.sh"]

 # ─────────────────────────────────────────────────────────────────────────────
+# PsyPredict — Backend Dockerfile for Hugging Face Spaces
 #
 # Architecture:
+#   - LLM inference via Groq API (no Ollama needed)
 #   - FastAPI app served by Uvicorn on port 7860 (HF Spaces standard port)
+#   - ML assets (Keras face model + CSV) downloaded at BUILD time via gdown
+#   - DistilBERT + Crisis classifier downloaded at BUILD time from HF Hub
 #   - HF_HUB_OFFLINE=1 at runtime so the container starts offline-capable
 # ─────────────────────────────────────────────────────────────────────────────
 # ── 1. System dependencies ────────────────────────────────────────────────────
 # libgl1 + libglib2.0-0: OpenCV headless needs these
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libgl1 \
     libglib2.0-0 \
     curl \
     ca-certificates \
     && rm -rf /var/lib/apt/lists/*
+# ── 2. PyTorch CPU-only (separate layer — ~800MB, caches very well) ───────────
 RUN pip install --no-cache-dir \
     torch --index-url https://download.pytorch.org/whl/cpu
+# ── 3. Install remaining Python dependencies ──────────────────────────────────
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# ── 4. Copy application source code ──────────────────────────────────────────
 COPY . .
+# ── 5. Download ML assets at BUILD time ──────────────────────────────────────
 # Downloads:
 #   - app/ml_assets/emotion_model_trained.h5  (Keras CNN face model, ~4MB, Google Drive)
 #   - app/ml_assets/MEDICATION.csv            (remedy database, Google Drive)
 #   - app/ml_assets/distilbert_model/         (DistilBERT emotion classifier, ~260MB, HF Hub)
 #   - app/ml_assets/crisis_model/             (MiniLM zero-shot classifier, ~130MB, HF Hub)
 ENV HF_HUB_OFFLINE=0
 RUN python download_models.py
+# ── 6. Runtime environment ────────────────────────────────────────────────────
 ENV PYTHONPATH=/app
+ENV OLLAMA_TIMEOUT_S=30
+ENV OLLAMA_RETRIES=3
 ENV HF_HUB_OFFLINE=1
 ENV LOG_LEVEL=INFO
 ENV RATE_LIMIT=30/minute
+# ── 7. Expose HF Spaces standard port ────────────────────────────────────────
 EXPOSE 7860
+# ── 8. Launch FastAPI directly (no Ollama needed) ────────────────────────────
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]