FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 # Set environment variables ENV DEBIAN_FRONTEND=noninteractive \ HF_HOME=/tmp/hf_cache \ TRANSFORMERS_CACHE=/tmp/hf_cache \ TORCH_HOME=/tmp/hf_cache \ TRITON_DISABLE="1" \ BNB_DISABLE_TRITON="1" \ USE_TORCH="1" \ BITSANDBYTES_NOWELCOME="1" \ HF_TOKEN="" # Install system dependencies including Python and pip RUN apt-get update && apt-get install -y \ git wget curl python3 python3-pip python3-dev \ && rm -rf /var/lib/apt/lists/* # Make sure pip is properly linked RUN ln -sf /usr/bin/python3 /usr/bin/python && \ ln -sf /usr/bin/pip3 /usr/bin/pip RUN pip install --no-cache-dir sentencepiece # Set working directory WORKDIR /app # Create cache directories with proper permissions RUN mkdir -p /tmp/hf_cache && chmod -R 777 /tmp/hf_cache # Install packages directly RUN python -m pip install --no-cache-dir --upgrade pip setuptools wheel RUN python -m pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 RUN python -m pip install --no-cache-dir transformers accelerate fastapi uvicorn huggingface_hub protobuf RUN python -m pip install --no-cache-dir scipy bitsandbytes RUN pip install --no-cache-dir python-multipart # Expose API port EXPOSE 7860 # Copy API script COPY app.py /app/ # Run FastAPI server CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]