| |
| |
| |
| FROM ubuntu:22.04 AS builder |
|
|
| RUN apt-get update && apt-get install -y g++ libgomp1 |
|
|
| WORKDIR /build |
| COPY inference.cpp . |
|
|
| |
| |
| |
| |
| RUN g++ -O3 -mavx2 -mfma -fopenmp \ |
| -ffast-math -funroll-loops -flto \ |
| -fno-math-errno \ |
| -std=c++17 \ |
| -o inference inference.cpp -lm && \ |
| echo "✅ inference binary compiled" && \ |
| ls -lh inference |
|
|
| |
| |
| |
| FROM python:3.11-slim |
|
|
| ENV PYTHONUNBUFFERED=1 |
| ENV HF_REPO_ID=NOT-OMEGA/NanoMind |
|
|
| |
| |
| |
| ENV N_ENGINES=3 |
| ENV OMP_NUM_THREADS=1 |
|
|
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| libgomp1 \ |
| libstdc++6 \ |
| curl \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| WORKDIR /app |
|
|
| COPY requirements.txt . |
| RUN pip install --no-cache-dir -r requirements.txt |
|
|
| |
| COPY --from=builder /build/inference . |
|
|
| |
| COPY main.py index.html ./ |
|
|
| |
| COPY model.bin tokenizer.bin ./ |
|
|
| RUN chmod +x inference && \ |
| useradd -m -u 1000 appuser && \ |
| chown -R appuser:appuser /app |
|
|
| USER appuser |
|
|
| HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \ |
| CMD curl -f http://localhost:7860/health || exit 1 |
|
|
| EXPOSE 7860 |
| CMD ["python", "main.py"] |