Spaces:

AEUPH
/

Q-WAN

Build error

App Files Files Community

Q-WAN / Dockerfile

AEUPH

Update Dockerfile

e7d05ba verified 5 months ago

raw

history blame contribute delete

1.96 kB

	# Use an official Python runtime as a parent image
	FROM python:3.10-slim-buster

	# Set the working directory in the container
	WORKDIR /app

	# Install system dependencies needed for MLC LLM (e.g., git, cmake for build tools if compiling on the fly)
	# These are necessary for MLC LLM's build process if models are compiled within the container
	RUN apt-get update && apt-get install -y --no-install-recommends \
	git \
	build-essential \
	cmake \
	&& rm -rf /var/lib/apt/lists/*

	# Copy the requirements file and install Python dependencies
	COPY requirements.txt .
	RUN pip install --no-cache-dir -r requirements.txt

	# Install torch specifically for CUDA if using GPU, otherwise use CPU
	# For HuggingFace Spaces with GPU, CUDA 11.8 is a common and recommended version.
	# Users might need to adjust 'cu118' to 'cpu' or a different CUDA version based on their target hardware.
	# MLC-LLM might have its own torch dependency, ensure compatibility or remove this line if MLC-LLM handles it.
	RUN pip uninstall -y torch && pip install torch==2.1.0 --extra-index-url https://download.pytorch.org/whl/cu118

	# Copy the Flask application files
	COPY app.py .

	# Copy the model artifacts. This assumes model_artifacts exists and is populated.
	# For large models, consider using git-lfs for HuggingFace Spaces or downloading at runtime
	# if the model is too large for the Docker image or needs dynamic loading.
	COPY model_artifacts ./model_artifacts

	# Expose the port the app runs on
	EXPOSE 5000

	# Define environment variables for MLC LLM model paths
	ENV MLC_MODEL_ARTIFACTS_DIR="./model_artifacts"
	ENV MLC_MODEL_NAME="Llama-2-7b-chat-hf-q4f16_1" # Ensure this matches your downloaded model

	# Command to run the Flask application
	# Using Flask's built-in server for simplicity in development and small deployments.
	# For production-grade deployments, consider a WSGI server like Gunicorn (e.g., gunicorn --bind 0.0.0.0:5000 app:app)
	CMD ["python", "app.py"]