FROM python:3.10-slim

WORKDIR /app

ENV PYTHONUNBUFFERED=1 \
    HF_HUB_DISABLE_SYMLINKS_WARNING=1

RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    build-essential \
 && rm -rf /var/lib/apt/lists/*

RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir \
      "torch==2.3.0" \
      torchvision \
      fastapi \
      uvicorn[standard] \
      "huggingface-hub>=0.23.2" \
      "transformers==4.44.2" \
      accelerate \
      einops \
      timm \
      pillow \
      requests

# Tạo stub module flash_attn để thoả mãn yêu cầu import của model mà không cần CUDA
RUN mkdir -p /usr/local/lib/python3.10/site-packages/flash_attn && \
    printf "def _not_available(*args, **kwargs):\n    raise ImportError('flash_attn is not available in this CPU-only build')\n\n__all__ = ['_not_available']\n" > /usr/local/lib/python3.10/site-packages/flash_attn/__init__.py

COPY app.py /app/app.py

EXPOSE 7860

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]