Spaces:
Sleeping
Sleeping
| # Use NVIDIA CUDA image for GPU acceleration | |
| FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 | |
| # Set environment variables | |
| ENV DEBIAN_FRONTEND=noninteractive \ | |
| HF_HOME=/tmp/hf_cache \ | |
| TRANSFORMERS_CACHE=/tmp/hf_cache \ | |
| TORCH_HOME=/tmp/hf_cache \ | |
| TRITON_DISABLE="1" \ | |
| BNB_DISABLE_TRITON="1" \ | |
| USE_TORCH="1" \ | |
| BITSANDBYTES_NOWELCOME="1" | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| git wget curl python3 python3-pip python3-venv \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Set working directory | |
| WORKDIR /app | |
| # Create cache directories with proper permissions | |
| RUN mkdir -p /tmp/hf_cache && chmod -R 777 /tmp/hf_cache | |
| RUN mkdir -p /.triton && chmod 777 /.triton | |
| # Install packages directly (no virtual environment) | |
| RUN pip install --no-cache-dir --upgrade pip setuptools wheel | |
| RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 | |
| RUN pip install --no-cache-dir transformers accelerate fastapi uvicorn huggingface_hub protobuf | |
| RUN pip install --no-cache-dir scipy bitsandbytes | |
| # Expose API port | |
| EXPOSE 7860 | |
| # Copy API script | |
| COPY app.py /app/ | |
| # Run FastAPI server | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |