# Use an official Python runtime as the base image FROM python:3.10-slim # Set environment variables for non-interactive installation ENV DEBIAN_FRONTEND=noninteractive # Install system dependencies and other tools RUN apt-get update && apt-get install -y \ git \ cmake \ build-essential \ libomp-dev \ python3-dev \ wget \ curl \ libtorch-dev \ libboost-all-dev \ git-lfs \ && rm -rf /var/lib/apt/lists/* # Install Git LFS and initialize it RUN git lfs install # Set the working directory WORKDIR /content # Step 1: Clone and set up ExecuTorch RUN git clone https://github.com/pytorch/executorch && \ cd executorch && \ git checkout dfbf6fd53546eb86e18f2e5cc693d70a82e3b03f && \ git submodule sync && \ git submodule update --init # Step 2: Install ExecuTorch requirements (verify and handle errors) RUN python3 -m pip install --upgrade pip && \ pip install torch && \ if [ -f "/content/executorch/requirements.txt" ]; then \ python3 -m pip install --no-cache-dir -r /content/executorch/requirements.txt; \ else \ echo "No requirements.txt found, skipping..."; \ fi # Step 3: Install ExecuTorch dependencies with error handling RUN cd /content/executorch && \ if [ -f "install_requirements.sh" ]; then \ bash ./install_requirements.sh --pybind || echo "Warning: Failed to execute install_requirements.sh"; \ else \ echo "install_requirements.sh not found, skipping..."; \ fi && \ cd /content/executorch/examples/models/llama && \ if [ -f "install_requirements.sh" ]; then \ bash ./install_requirements.sh || echo "Warning: Failed to execute llama/install_requirements.sh"; \ else \ echo "llama/install_requirements.sh not found, skipping..."; \ fi # Step 4: Clone the model repository from Hugging Face RUN git clone https://huggingface.co/executorch-community/Llama-3.2-1B-Instruct-SpinQuant_INT4_EO8-ET /content/llama-model && \ cd /content/llama-model && \ git lfs pull # Step 5: Install FastAPI and other necessary Python packages RUN python3 -m pip install fastapi uvicorn pydantic # Step 6: Copy the FastAPI application and model runner script into the container COPY app.py /content/app.py # Expose the port FastAPI will run on EXPOSE 7860 # Step 7: Set up the entry point to run the FastAPI server CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]