fnmodel / Dockerfile
aeb56
Switch to vLLM for high-performance, stable inference
310eb95
FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
ENV CUDA_HOME=/usr/local/cuda
ENV PATH="${CUDA_HOME}/bin:${PATH}"
ENV LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"
# Install system dependencies
RUN apt-get update && apt-get install -y \
python3.10 \
python3-pip \
git \
wget \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip
RUN pip3 install --upgrade pip
# Create user with UID 1000 (Hugging Face Spaces default)
RUN useradd -m -u 1000 user
# Set working directory
WORKDIR /app
# Copy requirements first for better caching
COPY requirements.txt .
# Install Python dependencies
RUN pip3 install --no-cache-dir -r requirements.txt
# Copy application files
COPY . .
# Set ownership and permissions for user
RUN chown -R user:user /app && \
chmod -R 755 /app
# Expose ports
EXPOSE 7860
EXPOSE 8000
# Set HuggingFace cache directory
ENV HF_HOME=/app/cache
ENV TRANSFORMERS_CACHE=/app/cache
# Switch to non-root user
USER user
# Run the application
CMD ["python3", "app.py"]