FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONUNBUFFERED=1 RUN apt-get update && apt-get install -y --no-install-recommends \ python3 python3-pip python3-dev git curl ca-certificates \ && rm -rf /var/lib/apt/lists/* # Gemma4 (model_type="gemma4") is only available in the Transformers git dev # branch. Installing from PyPI WILL fail at runtime with: # "The checkpoint you are trying to load has model type `gemma4` but # Transformers does not recognize this architecture." # Keep this install line pointed at git+https until gemma4 lands in a release. RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir \ "torch>=2.4.0" \ "git+https://github.com/huggingface/transformers.git" \ "accelerate>=1.0.0" \ "huggingface_hub" \ "sentencepiece" "protobuf" \ "gradio>=5.0" \ "fastapi" "uvicorn" "httpx" "requests" \ "Pillow" "PyMuPDF" "openai" WORKDIR /app COPY . /app EXPOSE 7860 CMD ["python3", "app.py"]