Spaces:

khushalcodiste
/

gemme4

Running

khushalcodiste commited on 6 days ago

Commit

aec0cd6

1 Parent(s): e1be1d0

feat: huh

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,37 +1,30 @@
-# Base image
-FROM python:3.10-slim
-# Install system dependencies (including curl for healthcheck)
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    curl \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-# Create user (HF requirement)
-RUN useradd -m -u 1000 user
-# Set working directory
-WORKDIR /home/user/app
-# Copy requirements first (for caching)
-COPY --chown=user requirements.txt .
-# Install dependencies
-RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt
-# Copy app
-COPY --chown=user . .
-RUN pip install git+https://github.com/huggingface/transformers.git
-# Download model during build (before switching to user)
-# This bakes the model into the image for faster startup
-RUN python download_model.py
-# Switch to user
-USER user
-# Expose port (default 7860 for HuggingFace Spaces, but configurable)
 EXPOSE 7860
-# Run FastAPI with APP_PORT environment variable (default 7860)
-CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${APP_PORT:-7860}"]

+FROM ghcr.io/ggml-org/llama.cpp:full
+WORKDIR /app
+RUN apt update && apt install -y python3 python3-pip python3-venv
+RUN python3 -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+RUN pip install -U pip huggingface_hub
+# Download Gemma 4 GGUF model (Q5_K_XL quantization - good balance of speed/quality)
+RUN python3 -c 'from huggingface_hub import hf_hub_download; \
+    repo="unsloth/gemma-4-E4B-it-GGUF"; \
+    hf_hub_download(repo_id=repo, filename="gemma-4-E4B-it-UD-Q5_K_XL.gguf", local_dir="/app"); \
+    hf_hub_download(repo_id=repo, filename="mmproj-BF16.gguf", local_dir="/app")'
+# Expose port (7860 for HuggingFace Spaces)
 EXPOSE 7860
+# Run llama.cpp server
+CMD ["--server", \
+     "-m", "/app/gemma-4-E4B-it-UD-Q5_K_XL.gguf", \
+     "--mmproj", "/app/mmproj-BF16.gguf", \
+     "--host", "0.0.0.0", \
+     "--port", "7860", \
+     "-t", "2", \
+     "--cache-type-k", "q8_0", \
+     "--cache-type-v", "iq4_nl", \
+     "-c", "128000", \
+     "-n", "38912"]

docker-compose.yml CHANGED Viewed

@@ -5,20 +5,12 @@ services:
     build: .
     container_name: gemma4-api
     ports:
-      - "${APP_PORT:-7860}:${APP_PORT:-7860}"
     environment:
-      - MODEL_NAME=${MODEL_NAME:-onnx-community/gemma-4-E2B-it-ONNX}
-      - APP_PORT=${APP_PORT:-7860}
-      - LOG_LEVEL=${LOG_LEVEL:-INFO}
-      - HF_HOME=/home/user/.cache/huggingface
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:${APP_PORT:-7860}/"]
       interval: 30s
       timeout: 10s
       retries: 3
       start_period: 60s
-    volumes:
-      - model_cache:/home/user/.cache/huggingface
-volumes:
-  model_cache:

     build: .
     container_name: gemma4-api
     ports:
+      - "7860:7860"
     environment:
+      - LOG_LEVEL=INFO
     healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:7860/health"]
       interval: 30s
       timeout: 10s
       retries: 3
       start_period: 60s