rajkumarrawal's picture
Initial commit
2ec0d39
# Use Python 3.11 as base image (recommended for Spaces)
FROM python:3.11-slim
# Set environment variables for optimal performance
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
DEBIAN_FRONTEND=noninteractive \
CUDA_VISIBLE_DEVICES=""
# Install system dependencies for optimal performance
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
libgl1-mesa-glx \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgomp1 \
libjpeg62-turbo-dev \
zlib1g-dev \
libfreetype6-dev \
liblcms2-dev \
libwebp-dev \
tcl8.6-dev \
tk8.6-dev \
python3-tk \
libharfbuzz-dev \
libfribidi-dev \
libxcb1-dev \
libx11-dev \
&& rm -rf /var/lib/apt/lists/*
# Create non-root user for security
RUN groupadd -r appuser && useradd -r -g appuser appuser
# Set working directory
WORKDIR /app
# Copy requirements first for better layer caching
COPY requirements.txt .
# Install Python dependencies with optimizations
RUN pip install --upgrade pip setuptools wheel && \
pip install -r requirements.txt && \
pip cache purge && \
# Install additional system-specific optimizations
pip install nvidia-ml-py3 psutil || true
# Copy application code
COPY . .
# Ensure proper file permissions
RUN chown -R appuser:appuser /app && \
chmod -R 755 /app
# Switch to non-root user
USER appuser
# Create directories for logs and cache
RUN mkdir -p /tmp/logs /tmp/cache /tmp/models && \
chmod 755 /tmp/logs /tmp/cache /tmp/models
# Set up environment variables
ENV APP_ENV=production \
LOG_LEVEL=INFO \
MAX_CONCURRENT_REQUESTS=5 \
CACHE_TTL=3600 \
MODEL_CACHE_SIZE=1000 \
METRICS_RETENTION_DAYS=7 \
HOST=0.0.0.0 \
PORT=7860
# Health check for monitoring
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Expose port
EXPOSE 7860
# Set startup command
CMD ["python", "app.py"]
# Multi-stage build optimization notes:
# This Dockerfile uses a single-stage build for simplicity
# For production deployments with larger models, consider:
# 1. Pre-building model cache in build stage
# 2. Using multi-stage builds to separate build and runtime dependencies
# 3. Adding model quantization for faster loading
# 4. Implementing layer caching strategies for updates
# CUDA support can be enabled by using:
# FROM nvidia/cuda:11.8-devel-ubuntu20.04 as cuda-base
# Then install CUDA runtime and PyTorch with CUDA support
# Performance optimizations implemented:
# 1. Non-root user for security
# 2. Efficient layer caching with requirements.txt first
# 3. System dependencies optimized for ML workloads
# 4. Health checks for monitoring
# 5. Environment variables for runtime configuration
# 6. Proper file permissions and directory structure
# 7. Graceful handling of optional dependencies