|
|
""" |
|
|
Docker/OCI Integration for MiniMind Max2 |
|
|
Package and distribute models via Docker Hub and OCI-compliant registries. |
|
|
""" |
|
|
|
|
|
from dataclasses import dataclass, field |
|
|
from typing import List, Optional, Dict, Any |
|
|
from pathlib import Path |
|
|
import json |
|
|
import os |
|
|
import subprocess |
|
|
import hashlib |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class DockerConfig: |
|
|
"""Configuration for Docker model packaging.""" |
|
|
|
|
|
registry: str = "docker.io" |
|
|
username: str = "" |
|
|
repository: str = "minimind-max2" |
|
|
tag: str = "latest" |
|
|
|
|
|
|
|
|
model_variant: str = "max2-nano" |
|
|
model_format: str = "safetensors" |
|
|
|
|
|
|
|
|
base_image: str = "python:3.11-slim" |
|
|
expose_port: int = 8000 |
|
|
enable_api: bool = True |
|
|
|
|
|
|
|
|
oci_artifact: bool = False |
|
|
media_type: str = "application/vnd.minimind.model" |
|
|
|
|
|
|
|
|
class DockerfileGenerator: |
|
|
"""Generate Dockerfiles for MiniMind models.""" |
|
|
|
|
|
DOCKERFILE_TEMPLATE = '''# MiniMind Max2 - {variant} |
|
|
# Efficient edge-deployed language model with MoE architecture |
|
|
|
|
|
FROM {base_image} |
|
|
|
|
|
LABEL maintainer="MiniMind Team" |
|
|
LABEL org.opencontainers.image.title="MiniMind Max2 - {variant}" |
|
|
LABEL org.opencontainers.image.description="Efficient LLM with MoE (8 experts, 25% activation)" |
|
|
LABEL org.opencontainers.image.version="{version}" |
|
|
LABEL org.opencontainers.image.source="https://huggingface.co/fariasultana/MiniMind" |
|
|
LABEL ai.model.architecture="MoE+GQA" |
|
|
LABEL ai.model.parameters="{params}" |
|
|
LABEL ai.model.format="{format}" |
|
|
|
|
|
# Set environment |
|
|
ENV PYTHONUNBUFFERED=1 |
|
|
ENV MODEL_VARIANT={variant} |
|
|
ENV MODEL_FORMAT={format} |
|
|
|
|
|
WORKDIR /app |
|
|
|
|
|
# Install dependencies |
|
|
RUN pip install --no-cache-dir \\ |
|
|
torch>=2.1.0 \\ |
|
|
numpy>=1.24.0 \\ |
|
|
fastapi>=0.100.0 \\ |
|
|
uvicorn>=0.23.0 \\ |
|
|
safetensors>=0.4.0 \\ |
|
|
huggingface_hub>=0.19.0 |
|
|
|
|
|
# Copy model files |
|
|
COPY model/ /app/model/ |
|
|
COPY configs/ /app/configs/ |
|
|
COPY capabilities/ /app/capabilities/ |
|
|
COPY optimization/ /app/optimization/ |
|
|
COPY weights/ /app/weights/ |
|
|
COPY serve.py /app/serve.py |
|
|
|
|
|
# Expose API port |
|
|
EXPOSE {port} |
|
|
|
|
|
# Health check |
|
|
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \\ |
|
|
CMD curl -f http://localhost:{port}/health || exit 1 |
|
|
|
|
|
# Run API server |
|
|
CMD ["python", "serve.py"] |
|
|
''' |
|
|
|
|
|
SERVE_SCRIPT = '''#!/usr/bin/env python3 |
|
|
"""MiniMind Max2 API Server""" |
|
|
|
|
|
from fastapi import FastAPI, HTTPException |
|
|
from fastapi.middleware.cors import CORSMiddleware |
|
|
from pydantic import BaseModel |
|
|
from typing import Optional, List |
|
|
import torch |
|
|
import os |
|
|
import json |
|
|
|
|
|
# Model configuration |
|
|
MODEL_VARIANT = os.getenv("MODEL_VARIANT", "max2-nano") |
|
|
MODEL_FORMAT = os.getenv("MODEL_FORMAT", "safetensors") |
|
|
|
|
|
app = FastAPI( |
|
|
title="MiniMind Max2 API", |
|
|
description="Efficient edge-deployed LLM with MoE architecture", |
|
|
version="1.0.0", |
|
|
) |
|
|
|
|
|
app.add_middleware( |
|
|
CORSMiddleware, |
|
|
allow_origins=["*"], |
|
|
allow_methods=["*"], |
|
|
allow_headers=["*"], |
|
|
) |
|
|
|
|
|
# Request/Response models |
|
|
class GenerateRequest(BaseModel): |
|
|
prompt: str |
|
|
max_tokens: int = 100 |
|
|
temperature: float = 0.7 |
|
|
top_p: float = 0.95 |
|
|
thinking_mode: str = "interleaved" |
|
|
|
|
|
class GenerateResponse(BaseModel): |
|
|
text: str |
|
|
thinking: Optional[str] = None |
|
|
tokens_generated: int |
|
|
model: str |
|
|
|
|
|
class ModelInfo(BaseModel): |
|
|
name: str |
|
|
variant: str |
|
|
architecture: str |
|
|
parameters: str |
|
|
active_ratio: float |
|
|
format: str |
|
|
|
|
|
# Global model placeholder |
|
|
model = None |
|
|
|
|
|
@app.on_event("startup") |
|
|
async def load_model(): |
|
|
global model |
|
|
print(f"Loading MiniMind {MODEL_VARIANT}...") |
|
|
# In production, load actual model here |
|
|
model = {"loaded": True, "variant": MODEL_VARIANT} |
|
|
print("Model loaded successfully!") |
|
|
|
|
|
@app.get("/health") |
|
|
async def health(): |
|
|
return {"status": "healthy", "model_loaded": model is not None} |
|
|
|
|
|
@app.get("/info", response_model=ModelInfo) |
|
|
async def info(): |
|
|
params_map = { |
|
|
"max2-nano": "500M (125M active)", |
|
|
"max2-lite": "1.5B (375M active)", |
|
|
"max2-pro": "3B (750M active)", |
|
|
} |
|
|
return ModelInfo( |
|
|
name="MiniMind Max2", |
|
|
variant=MODEL_VARIANT, |
|
|
architecture="MoE (8 experts, top-2) + GQA (4:1)", |
|
|
parameters=params_map.get(MODEL_VARIANT, "Unknown"), |
|
|
active_ratio=0.25, |
|
|
format=MODEL_FORMAT, |
|
|
) |
|
|
|
|
|
@app.post("/generate", response_model=GenerateResponse) |
|
|
async def generate(request: GenerateRequest): |
|
|
if model is None: |
|
|
raise HTTPException(status_code=503, detail="Model not loaded") |
|
|
|
|
|
# Simulated generation with thinking |
|
|
thinking = None |
|
|
if request.thinking_mode != "hidden": |
|
|
thinking = f"""<Thinking> |
|
|
<step> Analyzing prompt: "{request.prompt[:30]}..." |
|
|
<step> Using MoE with top-2 expert routing |
|
|
<step> Generating with temperature={request.temperature} |
|
|
<conclude> Response ready |
|
|
</Thinking>""" |
|
|
|
|
|
# Placeholder response |
|
|
response_text = f"[MiniMind {MODEL_VARIANT}] Response to: {request.prompt}" |
|
|
|
|
|
return GenerateResponse( |
|
|
text=response_text, |
|
|
thinking=thinking, |
|
|
tokens_generated=len(response_text.split()), |
|
|
model=MODEL_VARIANT, |
|
|
) |
|
|
|
|
|
@app.get("/capabilities") |
|
|
async def capabilities(): |
|
|
return { |
|
|
"reasoning": ["chain-of-thought", "interleaved-thinking", "sequential-thinking"], |
|
|
"vision": ["image-caption", "vqa"], |
|
|
"coding": ["completion", "fim", "refactor"], |
|
|
"agentic": ["function-calling", "tool-use"], |
|
|
"export": ["gguf", "onnx", "tflite", "qnn"], |
|
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
|
import uvicorn |
|
|
port = int(os.getenv("PORT", 8000)) |
|
|
uvicorn.run(app, host="0.0.0.0", port=port) |
|
|
''' |
|
|
|
|
|
@classmethod |
|
|
def generate( |
|
|
cls, |
|
|
config: DockerConfig, |
|
|
output_dir: str, |
|
|
) -> Dict[str, str]: |
|
|
"""Generate Dockerfile and supporting files.""" |
|
|
output_path = Path(output_dir) |
|
|
output_path.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
params_map = { |
|
|
"max2-nano": "500M", |
|
|
"max2-lite": "1.5B", |
|
|
"max2-pro": "3B", |
|
|
} |
|
|
|
|
|
|
|
|
dockerfile = cls.DOCKERFILE_TEMPLATE.format( |
|
|
variant=config.model_variant, |
|
|
base_image=config.base_image, |
|
|
version="1.0.0", |
|
|
params=params_map.get(config.model_variant, "Unknown"), |
|
|
format=config.model_format, |
|
|
port=config.expose_port, |
|
|
) |
|
|
|
|
|
dockerfile_path = output_path / "Dockerfile" |
|
|
with open(dockerfile_path, 'w') as f: |
|
|
f.write(dockerfile) |
|
|
|
|
|
|
|
|
serve_path = output_path / "serve.py" |
|
|
with open(serve_path, 'w') as f: |
|
|
f.write(cls.SERVE_SCRIPT) |
|
|
|
|
|
|
|
|
dockerignore = """ |
|
|
__pycache__/ |
|
|
*.py[cod] |
|
|
*.so |
|
|
.git/ |
|
|
.venv/ |
|
|
*.egg-info/ |
|
|
.pytest_cache/ |
|
|
*.log |
|
|
*.tmp |
|
|
""" |
|
|
dockerignore_path = output_path / ".dockerignore" |
|
|
with open(dockerignore_path, 'w') as f: |
|
|
f.write(dockerignore) |
|
|
|
|
|
return { |
|
|
"dockerfile": str(dockerfile_path), |
|
|
"serve_script": str(serve_path), |
|
|
"dockerignore": str(dockerignore_path), |
|
|
} |
|
|
|
|
|
|
|
|
class DockerBuilder: |
|
|
"""Build and push Docker images.""" |
|
|
|
|
|
def __init__(self, config: DockerConfig): |
|
|
self.config = config |
|
|
|
|
|
def login(self, password: str) -> bool: |
|
|
"""Login to Docker registry.""" |
|
|
try: |
|
|
result = subprocess.run( |
|
|
["docker", "login", "-u", self.config.username, "--password-stdin"], |
|
|
input=password.encode(), |
|
|
capture_output=True, |
|
|
text=False, |
|
|
) |
|
|
return result.returncode == 0 |
|
|
except Exception as e: |
|
|
print(f"Login failed: {e}") |
|
|
return False |
|
|
|
|
|
def build(self, context_dir: str, no_cache: bool = False) -> bool: |
|
|
"""Build Docker image.""" |
|
|
image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}" |
|
|
|
|
|
cmd = ["docker", "build", "-t", image_tag] |
|
|
if no_cache: |
|
|
cmd.append("--no-cache") |
|
|
cmd.append(context_dir) |
|
|
|
|
|
try: |
|
|
result = subprocess.run(cmd, capture_output=True, text=True) |
|
|
if result.returncode == 0: |
|
|
print(f"Built: {image_tag}") |
|
|
return True |
|
|
else: |
|
|
print(f"Build failed: {result.stderr}") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"Build error: {e}") |
|
|
return False |
|
|
|
|
|
def push(self) -> bool: |
|
|
"""Push image to registry.""" |
|
|
image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}" |
|
|
|
|
|
try: |
|
|
result = subprocess.run( |
|
|
["docker", "push", image_tag], |
|
|
capture_output=True, |
|
|
text=True, |
|
|
) |
|
|
if result.returncode == 0: |
|
|
print(f"Pushed: {image_tag}") |
|
|
return True |
|
|
else: |
|
|
print(f"Push failed: {result.stderr}") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"Push error: {e}") |
|
|
return False |
|
|
|
|
|
def tag(self, new_tag: str) -> bool: |
|
|
"""Tag image with additional tag.""" |
|
|
source = f"{self.config.username}/{self.config.repository}:{self.config.tag}" |
|
|
target = f"{self.config.username}/{self.config.repository}:{new_tag}" |
|
|
|
|
|
try: |
|
|
result = subprocess.run( |
|
|
["docker", "tag", source, target], |
|
|
capture_output=True, |
|
|
text=True, |
|
|
) |
|
|
return result.returncode == 0 |
|
|
except Exception as e: |
|
|
print(f"Tag error: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
class OCIArtifactBuilder: |
|
|
"""Build OCI Artifacts for model distribution.""" |
|
|
|
|
|
def __init__(self, config: DockerConfig): |
|
|
self.config = config |
|
|
|
|
|
def package_model( |
|
|
self, |
|
|
model_path: str, |
|
|
output_path: str, |
|
|
) -> str: |
|
|
"""Package model as OCI artifact.""" |
|
|
|
|
|
model_file = Path(model_path) |
|
|
model_hash = self._compute_sha256(model_path) |
|
|
|
|
|
manifest = { |
|
|
"schemaVersion": 2, |
|
|
"mediaType": "application/vnd.oci.image.manifest.v1+json", |
|
|
"config": { |
|
|
"mediaType": "application/vnd.minimind.model.config.v1+json", |
|
|
"size": 0, |
|
|
"digest": f"sha256:{model_hash[:64]}", |
|
|
}, |
|
|
"layers": [ |
|
|
{ |
|
|
"mediaType": self.config.media_type, |
|
|
"size": model_file.stat().st_size, |
|
|
"digest": f"sha256:{model_hash}", |
|
|
"annotations": { |
|
|
"org.opencontainers.image.title": model_file.name, |
|
|
"ai.model.variant": self.config.model_variant, |
|
|
"ai.model.format": self.config.model_format, |
|
|
}, |
|
|
} |
|
|
], |
|
|
"annotations": { |
|
|
"org.opencontainers.image.title": f"MiniMind {self.config.model_variant}", |
|
|
"org.opencontainers.image.description": "Efficient edge LLM with MoE", |
|
|
"ai.model.architecture": "MoE+GQA", |
|
|
}, |
|
|
} |
|
|
|
|
|
manifest_path = Path(output_path) / "manifest.json" |
|
|
manifest_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
with open(manifest_path, 'w') as f: |
|
|
json.dump(manifest, f, indent=2) |
|
|
|
|
|
return str(manifest_path) |
|
|
|
|
|
def _compute_sha256(self, file_path: str) -> str: |
|
|
"""Compute SHA256 hash of file.""" |
|
|
sha256 = hashlib.sha256() |
|
|
with open(file_path, 'rb') as f: |
|
|
for chunk in iter(lambda: f.read(8192), b''): |
|
|
sha256.update(chunk) |
|
|
return sha256.hexdigest() |
|
|
|
|
|
|
|
|
def create_docker_package( |
|
|
model_dir: str, |
|
|
output_dir: str, |
|
|
username: str, |
|
|
repository: str = "minimind-max2", |
|
|
variant: str = "max2-nano", |
|
|
tag: str = "latest", |
|
|
) -> Dict[str, Any]: |
|
|
""" |
|
|
Create complete Docker package for MiniMind model. |
|
|
|
|
|
Args: |
|
|
model_dir: Directory containing model files |
|
|
output_dir: Output directory for Docker files |
|
|
username: Docker Hub username |
|
|
repository: Repository name |
|
|
variant: Model variant |
|
|
tag: Image tag |
|
|
|
|
|
Returns: |
|
|
Dictionary with paths to generated files |
|
|
""" |
|
|
config = DockerConfig( |
|
|
username=username, |
|
|
repository=repository, |
|
|
model_variant=variant, |
|
|
tag=tag, |
|
|
) |
|
|
|
|
|
|
|
|
generator = DockerfileGenerator() |
|
|
files = generator.generate(config, output_dir) |
|
|
|
|
|
return { |
|
|
"config": config, |
|
|
"files": files, |
|
|
"image_tag": f"{username}/{repository}:{tag}", |
|
|
} |
|
|
|