# Use standard Python image FROM python:3.10 # Set working directory WORKDIR /app # Install wget RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/* # Download the Model (Gemma 1B Q8) RUN wget "https://huggingface.co/unsloth/gemma-3-1b-it-GGUF/resolve/main/gemma-3-1b-it-Q8_0.gguf" -O /gemma-3-1b-it-Q8_0.gguf # Install FastAPI and Uvicorn RUN pip install --no-cache-dir fastapi uvicorn pydantic sse-starlette # CRITICAL STEP: Install pre-built llama-cpp-python for CPU # This skips the "Building wheel" step that freezes your space RUN pip install llama-cpp-python \ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu # Copy your main.py file COPY main.py /app/main.py # Give permissions RUN chmod 777 /app/main.py # Run the server CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]