| # Use pre-built GPU image from Docker Hub | |
| # Build this image locally with: docker buildx build --platform linux/amd64 -f Dockerfile.base.gpu -t techdaskalos/cybersecchatbot:latest-gpu . --push | |
| FROM techdaskalos/cybersecchatbot:gpu | |
| # Environment variables (already set in base image, but can override) | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV MODEL_REPO=daskalos-apps/phi4-cybersec-Q4_K_M | |
| ENV MODEL_FILENAME=phi4-mini-instruct-Q4_K_M.gguf | |
| ENV USE_RAG=false | |
| ENV CACHE_ENABLED=true | |
| # GPU configuration - offload all layers to GPU | |
| ENV N_GPU_LAYERS=35 | |
| # Concurrent request handling - 10 model instances for 10 concurrent users | |
| ENV MODEL_POOL_SIZE=10 | |
| # Set Hugging Face cache to /data for persistence and write permissions | |
| ENV HF_HOME=/data/huggingface | |
| # Ensure all required directories exist and are writable | |
| RUN mkdir -p /data /app/models /app/knowledge_db /data/huggingface/hub /data/huggingface/transformers && \ | |
| chmod -R 777 /data /app/models /app/knowledge_db | |
| # Copy test interface (needed for /test endpoint) | |
| COPY test_interface.html /app/ | |
| COPY main.py /app/ | |
| EXPOSE 8000 | |
| CMD ["python", "main.py"] | |