FROM python:3.10-slim # 1. Install Build Tools (REQUIRED for llama.cpp compilation) # We added 'build-essential' and 'cmake' back to fix the build error. RUN apt-get update && apt-get install -y \ build-essential \ cmake \ curl \ git \ && rm -rf /var/lib/apt/lists/* # 2. Install AI Engine RUN pip install --no-cache-dir huggingface_hub "llama-cpp-python[server]>=0.2.82" # 3. Setup User (Security) RUN useradd -m -u 1000 user USER user ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH WORKDIR $HOME/app # 4. Download Model (Qwen 2.5 Coder 7B - Quantized) RUN huggingface-cli download Qwen/Qwen2.5-Coder-7B-Instruct-GGUF \ qwen2.5-coder-7b-instruct-q4_k_m.gguf \ --local-dir . \ --local-dir-use-symlinks False # 5. Expose Port EXPOSE 7860 # 6. RUN COMMAND (Lightweight Pro Config) # Kept the optimized threading/context settings CMD ["python3", "-m", "llama_cpp.server", \ "--model", "qwen2.5-coder-7b-instruct-q4_k_m.gguf", \ "--host", "0.0.0.0", \ "--port", "7860", \ "--n_threads", "2", \ "--n_ctx", "16384", \ "--n_batch", "512", \ "--chat_format", "chatml"]