Spaces:

optiviseapp
/

fnmodel

Paused

fnmodel / Dockerfile

aeb56

Switch to vLLM for high-performance, stable inference

310eb95 3 months ago

1.1 kB

	FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04

	# Set environment variables
	ENV DEBIAN_FRONTEND=noninteractive
	ENV PYTHONUNBUFFERED=1
	ENV CUDA_HOME=/usr/local/cuda
	ENV PATH="${CUDA_HOME}/bin:${PATH}"
	ENV LD_LIBRARY_PATH="${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}"

	# Install system dependencies
	RUN apt-get update && apt-get install -y \
	python3.10 \
	python3-pip \
	git \
	wget \
	&& rm -rf /var/lib/apt/lists/*

	# Upgrade pip
	RUN pip3 install --upgrade pip

	# Create user with UID 1000 (Hugging Face Spaces default)
	RUN useradd -m -u 1000 user

	# Set working directory
	WORKDIR /app

	# Copy requirements first for better caching
	COPY requirements.txt .

	# Install Python dependencies
	RUN pip3 install --no-cache-dir -r requirements.txt

	# Copy application files
	COPY . .

	# Set ownership and permissions for user
	RUN chown -R user:user /app && \
	chmod -R 755 /app

	# Expose ports
	EXPOSE 7860
	EXPOSE 8000

	# Set HuggingFace cache directory
	ENV HF_HOME=/app/cache
	ENV TRANSFORMERS_CACHE=/app/cache

	# Switch to non-root user
	USER user

	# Run the application
	CMD ["python3", "app.py"]