File size: 1,486 Bytes
c57d186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40

# --- Stage 1: Build React frontend ---
FROM node:20-alpine AS frontend
WORKDIR /app/frontend
COPY frontend/package*.json ./
RUN npm ci
COPY frontend/ ./
RUN npm run build

# --- Stage 2: GPU-enabled Python backend ---
# Requires NVIDIA Container Toolkit on host and runtime flag: --gpus all
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 AS backend

ENV DEBIAN_FRONTEND=noninteractive             PYTHONDONTWRITEBYTECODE=1             PYTHONUNBUFFERED=1             PIP_NO_CACHE_DIR=1

# Install Python and system deps
RUN apt-get update && apt-get install -y --no-install-recommends             python3 python3-pip python3-venv git &&             rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Install CUDA-enabled PyTorch (cu121)
RUN python3 -m pip install --upgrade pip &&             python3 -m pip install --index-url https://download.pytorch.org/whl/cu121 torch==2.4.1+cu121

# Install remaining Python deps
COPY backend/requirements.txt /app/backend/requirements.txt
RUN python3 -m pip install -r /app/backend/requirements.txt

# Copy backend code
COPY backend/ /app/backend/

# Copy frontend build to a static dir served by FastAPI
RUN mkdir -p /app/static
COPY --from=frontend /app/frontend/dist/ /app/static/

ENV STATIC_DIR=/app/static             MODEL_ID=FractalAIResearch/Fathom-R1-14B             PIPELINE_TASK=text-generation             QUANTIZE=auto

EXPOSE 8000

CMD ["uvicorn", "app.main:app", "--app-dir", "/app/backend", "--host", "0.0.0.0", "--port", "8000"]