| # Use a slim Python image | |
| FROM python:3.10-slim | |
| # Install g++ and OpenMP for the C++ code | |
| RUN apt-get update && apt-get install -y \ | |
| g++ \ | |
| libomp-dev \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Set the working directory | |
| WORKDIR /app | |
| # Copy all your files into the container | |
| COPY . /app | |
| # Compile the C++ inference engine. | |
| # IMPORTANT: I removed `-mavx2` for compatibility on Hugging Face's free tier. | |
| # It can cause issues on some cloud CPUs. We are generating a Linux binary called 'inference'. | |
| RUN g++ -O3 -march=native -fopenmp -mfma -std=c++17 inference.cpp -o inference -lm | |
| # Make the compiled binary executable | |
| RUN chmod +x inference | |
| # Install your Python requirements | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Create a user to run the app (HF requirement for some spaces) | |
| RUN useradd -m appuser && chown -R appuser /app | |
| USER appuser | |
| # Expose port 7860 (Hugging Face standard) | |
| EXPOSE 7860 | |
| # Command to run your FastAPI app | |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"] |