khushalcodiste commited on
Commit
aec0cd6
·
1 Parent(s): e1be1d0

feat: huh

Browse files
Files changed (2) hide show
  1. Dockerfile +23 -30
  2. docker-compose.yml +3 -11
Dockerfile CHANGED
@@ -1,37 +1,30 @@
1
- # Base image
2
- FROM python:3.10-slim
3
 
4
- # Install system dependencies (including curl for healthcheck)
5
- RUN apt-get update && apt-get install -y --no-install-recommends \
6
- curl \
7
- git \
8
- && rm -rf /var/lib/apt/lists/*
9
 
10
- # Create user (HF requirement)
11
- RUN useradd -m -u 1000 user
 
12
 
13
- # Set working directory
14
- WORKDIR /home/user/app
15
 
16
- # Copy requirements first (for caching)
17
- COPY --chown=user requirements.txt .
 
 
 
18
 
19
- # Install dependencies
20
- RUN pip install --no-cache-dir --upgrade pip && \
21
- pip install --no-cache-dir -r requirements.txt
22
-
23
- # Copy app
24
- COPY --chown=user . .
25
- RUN pip install git+https://github.com/huggingface/transformers.git
26
- # Download model during build (before switching to user)
27
- # This bakes the model into the image for faster startup
28
- RUN python download_model.py
29
-
30
- # Switch to user
31
- USER user
32
-
33
- # Expose port (default 7860 for HuggingFace Spaces, but configurable)
34
  EXPOSE 7860
35
 
36
- # Run FastAPI with APP_PORT environment variable (default 7860)
37
- CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${APP_PORT:-7860}"]
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/ggml-org/llama.cpp:full
 
2
 
3
+ WORKDIR /app
 
 
 
 
4
 
5
+ RUN apt update && apt install -y python3 python3-pip python3-venv
6
+ RUN python3 -m venv /opt/venv
7
+ ENV PATH="/opt/venv/bin:$PATH"
8
 
9
+ RUN pip install -U pip huggingface_hub
 
10
 
11
+ # Download Gemma 4 GGUF model (Q5_K_XL quantization - good balance of speed/quality)
12
+ RUN python3 -c 'from huggingface_hub import hf_hub_download; \
13
+ repo="unsloth/gemma-4-E4B-it-GGUF"; \
14
+ hf_hub_download(repo_id=repo, filename="gemma-4-E4B-it-UD-Q5_K_XL.gguf", local_dir="/app"); \
15
+ hf_hub_download(repo_id=repo, filename="mmproj-BF16.gguf", local_dir="/app")'
16
 
17
+ # Expose port (7860 for HuggingFace Spaces)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  EXPOSE 7860
19
 
20
+ # Run llama.cpp server
21
+ CMD ["--server", \
22
+ "-m", "/app/gemma-4-E4B-it-UD-Q5_K_XL.gguf", \
23
+ "--mmproj", "/app/mmproj-BF16.gguf", \
24
+ "--host", "0.0.0.0", \
25
+ "--port", "7860", \
26
+ "-t", "2", \
27
+ "--cache-type-k", "q8_0", \
28
+ "--cache-type-v", "iq4_nl", \
29
+ "-c", "128000", \
30
+ "-n", "38912"]
docker-compose.yml CHANGED
@@ -5,20 +5,12 @@ services:
5
  build: .
6
  container_name: gemma4-api
7
  ports:
8
- - "${APP_PORT:-7860}:${APP_PORT:-7860}"
9
  environment:
10
- - MODEL_NAME=${MODEL_NAME:-onnx-community/gemma-4-E2B-it-ONNX}
11
- - APP_PORT=${APP_PORT:-7860}
12
- - LOG_LEVEL=${LOG_LEVEL:-INFO}
13
- - HF_HOME=/home/user/.cache/huggingface
14
  healthcheck:
15
- test: ["CMD", "curl", "-f", "http://localhost:${APP_PORT:-7860}/"]
16
  interval: 30s
17
  timeout: 10s
18
  retries: 3
19
  start_period: 60s
20
- volumes:
21
- - model_cache:/home/user/.cache/huggingface
22
-
23
- volumes:
24
- model_cache:
 
5
  build: .
6
  container_name: gemma4-api
7
  ports:
8
+ - "7860:7860"
9
  environment:
10
+ - LOG_LEVEL=INFO
 
 
 
11
  healthcheck:
12
+ test: ["CMD", "curl", "-f", "http://localhost:7860/health"]
13
  interval: 30s
14
  timeout: 10s
15
  retries: 3
16
  start_period: 60s