naimulislam commited on
Commit
41a728e
·
verified ·
1 Parent(s): 0426a93

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +27 -22
Dockerfile CHANGED
@@ -1,34 +1,39 @@
1
  FROM python:3.11-slim
2
 
3
- WORKDIR /app
4
-
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  cmake \
8
- git \
9
  curl \
 
 
10
  && rm -rf /var/lib/apt/lists/*
11
 
 
 
 
 
 
 
 
12
  COPY requirements.txt .
13
- RUN pip install --no-cache-dir -r requirements.txt
14
-
15
- # Download Qwen3-0.6B GGUF model
16
- RUN python -c "from huggingface_hub import hf_hub_download; \
17
- import os; \
18
- os.makedirs('/app/models', exist_ok=True); \
19
- hf_hub_download( \
20
- repo_id='unsloth/Qwen3-0.6B-GGUF', \
21
- filename='Qwen3-0.6B-Q4_K_M.gguf', \
22
- local_dir='/app/models', \
23
- local_dir_use_symlinks=False, \
24
- resume_download=True \
25
- ); \
26
- size = os.path.getsize('/app/models/Qwen3-0.6B-Q4_K_M.gguf'); \
27
- print(f'Model size: {size/1024/1024:.1f} MB'); \
28
- assert size > 300000000, f'Model download incomplete: {size} bytes'"
29
-
30
- COPY . .
31
 
 
 
 
 
 
32
  EXPOSE 7860
33
 
34
- CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
 
 
 
 
 
 
1
  FROM python:3.11-slim
2
 
3
+ # Install build dependencies for llama-cpp-python
 
4
  RUN apt-get update && apt-get install -y \
5
  build-essential \
6
  cmake \
 
7
  curl \
8
+ git \
9
+ wget \
10
  && rm -rf /var/lib/apt/lists/*
11
 
12
+ # Create app directory
13
+ WORKDIR /app
14
+
15
+ # Create model directory
16
+ RUN mkdir -p /app/models
17
+
18
+ # Copy requirements first for caching
19
  COPY requirements.txt .
20
+ RUN pip install --no-cache-dir --upgrade pip && \
21
+ pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Download the GGUF model (Q4_K_M quantization - good balance of quality and speed)
24
+ RUN wget -q "https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q4_K_M.gguf" \
25
+ -O /app/models/Qwen3-0.6B-Q4_K_M.gguf
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ # Copy application code
28
+ COPY server.py .
29
+ COPY templates/ templates/
30
+
31
+ # HuggingFace Spaces uses port 7860
32
  EXPOSE 7860
33
 
34
+ # Set environment variables
35
+ ENV PYTHONUNBUFFERED=1
36
+ ENV MODEL_PATH=/app/models/Qwen3-0.6B-Q4_K_M.gguf
37
+
38
+ # Run the server
39
+ CMD ["python", "server.py"]