mutisya's picture
Deploy Polyglot backend with quantized models
d7086f8 verified
FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
ffmpeg \
libsndfile1 \
sox \
espeak \
espeak-data \
libespeak1 \
libespeak-dev \
wget \
gnupg \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy scripts
COPY download_code.py .
COPY preload_models.py .
COPY startup.sh .
# Make startup script executable
RUN chmod +x startup.sh
# Note: Application code will be downloaded at runtime from private code space
# The startup script will check for CODE_SPACE_ID and HUGGING_FACE_HUB_TOKEN
# environment variables and download the code before starting the server
# Set environment variables for caching
ENV HF_HOME=/app/.cache
ENV TRANSFORMERS_CACHE=/app/.cache
ENV NLTK_DATA=/app/nltk_data
ENV NUMBA_CACHE_DIR=/app/.cache/numba
ENV PYTHONPATH=/app
ENV PORT=7860
# Create cache directories
RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
RUN mkdir -p $NLTK_DATA && chmod -R 777 $NLTK_DATA
RUN mkdir -p $NUMBA_CACHE_DIR && chmod -R 777 $NUMBA_CACHE_DIR
# Set write permissions on /app for runtime code download
RUN chmod -R 777 /app
# Create data directories for learning service
RUN mkdir -p /app/data/learning/users && chmod -R 777 /app/data
# Download models using HF token from environment
# HuggingFace Spaces automatically provides HUGGING_FACE_HUB_TOKEN (already defined above)
RUN python preload_models.py $HUGGING_FACE_HUB_TOKEN || echo "Model preload skipped - will download on first use"
# Expose port 7860 (HuggingFace Spaces standard)
EXPOSE 7860
# Run the startup script (which downloads code and starts the application)
CMD ["./startup.sh"]