# Start from a basic Python 3.10 image FROM python:3.10-slim # Set frontend to noninteractive to avoid prompts ENV DEBIAN_FRONTEND=noninteractive # Install system dependencies needed for the pre-built wheel to work RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ && rm -rf /var/lib/apt/lists/* # Set the working directory inside the container WORKDIR /app # Copy the requirements file first COPY ./requirements.txt /app/requirements.txt # Install Python packages RUN pip install --no-cache-dir --upgrade pip # This line is correct and fixed your build timeout RUN pip install --no-cache-dir -r /app/requirements.txt \ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # Copy the main application file COPY ./app.py /app/app.py # Expose the port that FastAPI (uvicorn) will run on EXPOSE 7860 # --- THIS IS THE FIX --- # Instead of just running the script, we tell uvicorn to run the "app" # object inside the "app.py" file. This starts the server. CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]