# ============================================================
# OncoAgent — Production Dockerfile
# Hardware: AMD Instinct MI300X / ROCm 7.2
# Serves: vLLM (Qwen3.5-9B + Qwen3.6-27B) + Gradio UI
# ============================================================

# Base image: vLLM optimized for ROCm
FROM rocm/vllm:latest

# System environment
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV GRADIO_SERVER_NAME="0.0.0.0"
ENV GRADIO_SERVER_PORT=7860

# ROCm / PyTorch environment
ENV HSA_OVERRIDE_GFX_VERSION=9.4.2
ENV PYTORCH_ROCM_ARCH="gfx942"

# OncoAgent model configuration
ENV TIER1_MODEL_ID="Qwen/Qwen3.5-9B"
ENV TIER2_MODEL_ID="Qwen/Qwen3.6-27B"
ENV BASE_MODEL_ID="Qwen/Qwen3.5-9B"
ENV VLLM_API_BASE="http://localhost:8000/v1"
ENV VLLM_API_KEY="EMPTY"
ENV USE_LOCAL_ADAPTERS="false"
ENV DEVICE="cuda"
ENV TENSOR_PARALLEL_SIZE=1

WORKDIR /app

# System dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    build-essential \
    supervisor \
    && rm -rf /var/lib/apt/lists/*

# Python dependencies
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt

# Application code
COPY . /app/

# Make deploy scripts executable
RUN chmod +x deploy/start_vllm.sh

# Supervisor config to run vLLM + Gradio simultaneously
RUN cat > /etc/supervisor/conf.d/oncoagent.conf <<'EOF'
[supervisord]
nodaemon=true
logfile=/var/log/supervisord.log

[program:vllm]
command=bash /app/deploy/start_vllm.sh tier1
directory=/app
autostart=true
autorestart=true
stdout_logfile=/var/log/vllm.log
stderr_logfile=/var/log/vllm_err.log
priority=10

[program:gradio]
command=python /app/ui/app.py
directory=/app
autostart=true
autorestart=true
stdout_logfile=/var/log/gradio.log
stderr_logfile=/var/log/gradio_err.log
priority=20
startsecs=30
EOF

# Expose ports: Gradio (7860) + vLLM API (8000)
EXPOSE 7860 8000

# Start both services via supervisor
CMD ["supervisord", "-c", "/etc/supervisor/conf.d/oncoagent.conf"]