feat: Add Docker deployment files
Browse files- docker/Dockerfile +40 -0
docker/Dockerfile
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MiniMind Max2 - Efficient Edge LLM
|
| 2 |
+
# Docker Hub: sultanafariabd/minimind-max2
|
| 3 |
+
|
| 4 |
+
FROM python:3.11-slim
|
| 5 |
+
|
| 6 |
+
LABEL maintainer="MiniMind Team <contact@minimind.ai>"
|
| 7 |
+
LABEL org.opencontainers.image.title="MiniMind Max2"
|
| 8 |
+
LABEL org.opencontainers.image.description="Efficient LLM with MoE (8 experts, 25% activation) + GQA"
|
| 9 |
+
LABEL org.opencontainers.image.version="1.0.0"
|
| 10 |
+
LABEL org.opencontainers.image.source="https://huggingface.co/fariasultana/MiniMind"
|
| 11 |
+
LABEL org.opencontainers.image.licenses="Apache-2.0"
|
| 12 |
+
LABEL ai.model.architecture="MoE+GQA"
|
| 13 |
+
LABEL ai.model.parameters="500M-3B"
|
| 14 |
+
LABEL ai.model.active_ratio="25%"
|
| 15 |
+
|
| 16 |
+
ENV PYTHONUNBUFFERED=1
|
| 17 |
+
ENV MODEL_VARIANT=max2-nano
|
| 18 |
+
ENV PORT=8000
|
| 19 |
+
|
| 20 |
+
WORKDIR /app
|
| 21 |
+
|
| 22 |
+
# Install dependencies
|
| 23 |
+
RUN pip install --no-cache-dir \
|
| 24 |
+
torch>=2.1.0 \
|
| 25 |
+
numpy>=1.24.0 \
|
| 26 |
+
fastapi>=0.100.0 \
|
| 27 |
+
uvicorn>=0.23.0 \
|
| 28 |
+
safetensors>=0.4.0 \
|
| 29 |
+
pydantic>=2.0.0
|
| 30 |
+
|
| 31 |
+
# Copy application
|
| 32 |
+
COPY serve.py /app/
|
| 33 |
+
COPY model_info.json /app/
|
| 34 |
+
|
| 35 |
+
EXPOSE 8000
|
| 36 |
+
|
| 37 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s \
|
| 38 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
|
| 39 |
+
|
| 40 |
+
CMD ["python", "serve.py"]
|