| |
| |
| |
| |
| |
|
|
| |
| FROM rocm/vllm:latest |
|
|
| |
| ENV DEBIAN_FRONTEND=noninteractive |
| ENV PYTHONUNBUFFERED=1 |
| ENV GRADIO_SERVER_NAME="0.0.0.0" |
| ENV GRADIO_SERVER_PORT=7860 |
|
|
| |
| ENV HSA_OVERRIDE_GFX_VERSION=9.4.2 |
| ENV PYTORCH_ROCM_ARCH="gfx942" |
|
|
| |
| ENV TIER1_MODEL_ID="Qwen/Qwen3.5-9B" |
| ENV TIER2_MODEL_ID="Qwen/Qwen3.6-27B" |
| ENV BASE_MODEL_ID="Qwen/Qwen3.5-9B" |
| ENV VLLM_API_BASE="http://localhost:8000/v1" |
| ENV VLLM_API_KEY="EMPTY" |
| ENV USE_LOCAL_ADAPTERS="false" |
| ENV DEVICE="cuda" |
| ENV TENSOR_PARALLEL_SIZE=1 |
|
|
| WORKDIR /app |
|
|
| |
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| git \ |
| build-essential \ |
| supervisor \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| |
| COPY requirements.txt /app/ |
| RUN pip install --no-cache-dir -r requirements.txt |
|
|
| |
| COPY . /app/ |
|
|
| |
| RUN chmod +x deploy/start_vllm.sh |
|
|
| |
| RUN cat > /etc/supervisor/conf.d/oncoagent.conf <<'EOF' |
| [supervisord] |
| nodaemon=true |
| logfile=/var/log/supervisord.log |
|
|
| [program:vllm] |
| command=bash /app/deploy/start_vllm.sh tier1 |
| directory=/app |
| autostart=true |
| autorestart=true |
| stdout_logfile=/var/log/vllm.log |
| stderr_logfile=/var/log/vllm_err.log |
| priority=10 |
|
|
| [program:gradio] |
| command=python /app/ui/app.py |
| directory=/app |
| autostart=true |
| autorestart=true |
| stdout_logfile=/var/log/gradio.log |
| stderr_logfile=/var/log/gradio_err.log |
| priority=20 |
| startsecs=30 |
| EOF |
|
|
| |
| EXPOSE 7860 8000 |
|
|
| |
| CMD ["supervisord", "-c", "/etc/supervisor/conf.d/oncoagent.conf"] |
|
|