# Use pre-built GPU image from Docker Hub
# Build this image locally with: docker buildx build --platform linux/amd64 -f Dockerfile.base.gpu -t techdaskalos/cybersecchatbot:latest-gpu . --push
FROM techdaskalos/cybersecchatbot:gpu

# Environment variables (already set in base image, but can override)
ENV PYTHONUNBUFFERED=1
ENV MODEL_REPO=daskalos-apps/phi4-cybersec-Q4_K_M
ENV MODEL_FILENAME=phi4-mini-instruct-Q4_K_M.gguf
ENV USE_RAG=false
ENV CACHE_ENABLED=true

# GPU configuration - offload all layers to GPU
ENV N_GPU_LAYERS=35

# Concurrent request handling - 10 model instances for 10 concurrent users
ENV MODEL_POOL_SIZE=10

# Set Hugging Face cache to /data for persistence and write permissions
ENV HF_HOME=/data/huggingface

# Ensure all required directories exist and are writable
RUN mkdir -p /data /app/models /app/knowledge_db /data/huggingface/hub /data/huggingface/transformers && \
    chmod -R 777 /data /app/models /app/knowledge_db

# Copy test interface (needed for /test endpoint)
COPY test_interface.html /app/
COPY main.py /app/

EXPOSE 8000
CMD ["python", "main.py"]