FROM python:3.10-slim WORKDIR /app ENV PYTHONUNBUFFERED=1 \ HF_HUB_DISABLE_SYMLINKS_WARNING=1 RUN apt-get update && apt-get install -y --no-install-recommends \ git \ build-essential \ && rm -rf /var/lib/apt/lists/* RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir \ "torch==2.3.0" \ torchvision \ fastapi \ uvicorn[standard] \ "huggingface-hub>=0.23.2" \ "transformers==4.44.2" \ accelerate \ einops \ timm \ pillow \ requests # Tạo stub module flash_attn để thoả mãn yêu cầu import của model mà không cần CUDA RUN mkdir -p /usr/local/lib/python3.10/site-packages/flash_attn && \ printf "def _not_available(*args, **kwargs):\n raise ImportError('flash_attn is not available in this CPU-only build')\n\n__all__ = ['_not_available']\n" > /usr/local/lib/python3.10/site-packages/flash_attn/__init__.py COPY app.py /app/app.py EXPOSE 7860 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]