| # Use an official Python runtime as a parent image | |
| FROM python:3.10-slim-buster | |
| # Set the working directory in the container | |
| WORKDIR /app | |
| # Install system dependencies needed for MLC LLM (e.g., git, cmake for build tools if compiling on the fly) | |
| # These are necessary for MLC LLM's build process if models are compiled within the container | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git \ | |
| build-essential \ | |
| cmake \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Copy the requirements file and install Python dependencies | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Install torch specifically for CUDA if using GPU, otherwise use CPU | |
| # For HuggingFace Spaces with GPU, CUDA 11.8 is a common and recommended version. | |
| # Users might need to adjust 'cu118' to 'cpu' or a different CUDA version based on their target hardware. | |
| # MLC-LLM might have its own torch dependency, ensure compatibility or remove this line if MLC-LLM handles it. | |
| RUN pip uninstall -y torch && pip install torch==2.1.0 --extra-index-url https://download.pytorch.org/whl/cu118 | |
| # Copy the Flask application files | |
| COPY app.py . | |
| # Copy the model artifacts. This assumes model_artifacts exists and is populated. | |
| # For large models, consider using git-lfs for HuggingFace Spaces or downloading at runtime | |
| # if the model is too large for the Docker image or needs dynamic loading. | |
| COPY model_artifacts ./model_artifacts | |
| # Expose the port the app runs on | |
| EXPOSE 5000 | |
| # Define environment variables for MLC LLM model paths | |
| ENV MLC_MODEL_ARTIFACTS_DIR="./model_artifacts" | |
| ENV MLC_MODEL_NAME="Llama-2-7b-chat-hf-q4f16_1" # Ensure this matches your downloaded model | |
| # Command to run the Flask application | |
| # Using Flask's built-in server for simplicity in development and small deployments. | |
| # For production-grade deployments, consider a WSGI server like Gunicorn (e.g., gunicorn --bind 0.0.0.0:5000 app:app) | |
| CMD ["python", "app.py"] | |