File size: 1,334 Bytes
f09fb4b 1b50d66 f09fb4b 7ef800a 1b50d66 f09fb4b 7ef800a f09fb4b 1b50d66 7ef800a 1b50d66 7ef800a 1b50d66 7ef800a 1b50d66 f09fb4b 8910367 f09fb4b 8910367 f09fb4b 1b50d66 f09fb4b 7ef800a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
FROM python:3.10-slim
WORKDIR /app
# Install build dependencies for llama-cpp-python with OpenBLAS
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
curl \
libopenblas-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements
COPY requirements.txt .
# Install Python dependencies with OpenBLAS for faster CPU inference
ENV CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS"
RUN pip install --no-cache-dir -r requirements.txt
# Download models - 7B (quality) and 1.5B (speed)
RUN mkdir -p /app/models && \
echo "Downloading Qwen2.5-Coder-7B (quality model)..." && \
curl -L -o /app/models/qwen2.5-coder-7b-instruct-q4_k_m.gguf \
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf" && \
echo "Downloading Qwen2.5-Coder-1.5B (fast model)..." && \
curl -L -o /app/models/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf \
"https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf"
# Copy application code and static files
COPY app.py .
COPY static/ ./static/
# Performance environment variables
ENV N_CTX=4096
ENV N_THREADS=4
ENV N_BATCH=512
ENV USE_MLOCK=true
ENV USE_MMAP=true
# Expose port
EXPOSE 7860
# Run the application
CMD ["python", "app.py"]
|