feat: Add Docker/OCI integration

Browse files

Files changed (1) hide show

deployment/docker.py +445 -0

deployment/docker.py ADDED Viewed

	@@ -0,0 +1,445 @@

+"""
+Docker/OCI Integration for MiniMind Max2
+Package and distribute models via Docker Hub and OCI-compliant registries.
+"""
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from pathlib import Path
+import json
+import os
+import subprocess
+import hashlib
+@dataclass
+class DockerConfig:
+    """Configuration for Docker model packaging."""
+    # Registry settings
+    registry: str = "docker.io"
+    username: str = ""
+    repository: str = "minimind-max2"
+    tag: str = "latest"
+    # Model settings
+    model_variant: str = "max2-nano"  # max2-nano, max2-lite, max2-pro
+    model_format: str = "safetensors"  # safetensors, gguf, onnx
+    # Image settings
+    base_image: str = "python:3.11-slim"
+    expose_port: int = 8000
+    enable_api: bool = True
+    # OCI Artifact settings
+    oci_artifact: bool = False
+    media_type: str = "application/vnd.minimind.model"
+class DockerfileGenerator:
+    """Generate Dockerfiles for MiniMind models."""
+    DOCKERFILE_TEMPLATE = '''# MiniMind Max2 - {variant}
+# Efficient edge-deployed language model with MoE architecture
+FROM {base_image}
+LABEL maintainer="MiniMind Team"
+LABEL org.opencontainers.image.title="MiniMind Max2 - {variant}"
+LABEL org.opencontainers.image.description="Efficient LLM with MoE (8 experts, 25% activation)"
+LABEL org.opencontainers.image.version="{version}"
+LABEL org.opencontainers.image.source="https://huggingface.co/fariasultana/MiniMind"
+LABEL ai.model.architecture="MoE+GQA"
+LABEL ai.model.parameters="{params}"
+LABEL ai.model.format="{format}"
+# Set environment
+ENV PYTHONUNBUFFERED=1
+ENV MODEL_VARIANT={variant}
+ENV MODEL_FORMAT={format}
+WORKDIR /app
+# Install dependencies
+RUN pip install --no-cache-dir \\
+    torch>=2.1.0 \\
+    numpy>=1.24.0 \\
+    fastapi>=0.100.0 \\
+    uvicorn>=0.23.0 \\
+    safetensors>=0.4.0 \\
+    huggingface_hub>=0.19.0
+# Copy model files
+COPY model/ /app/model/
+COPY configs/ /app/configs/
+COPY capabilities/ /app/capabilities/
+COPY optimization/ /app/optimization/
+COPY weights/ /app/weights/
+COPY serve.py /app/serve.py
+# Expose API port
+EXPOSE {port}
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \\
+    CMD curl -f http://localhost:{port}/health || exit 1
+# Run API server
+CMD ["python", "serve.py"]
+'''
+    SERVE_SCRIPT = '''#!/usr/bin/env python3
+"""MiniMind Max2 API Server"""
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List
+import torch
+import os
+import json
+# Model configuration
+MODEL_VARIANT = os.getenv("MODEL_VARIANT", "max2-nano")
+MODEL_FORMAT = os.getenv("MODEL_FORMAT", "safetensors")
+app = FastAPI(
+    title="MiniMind Max2 API",
+    description="Efficient edge-deployed LLM with MoE architecture",
+    version="1.0.0",
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Request/Response models
+class GenerateRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 100
+    temperature: float = 0.7
+    top_p: float = 0.95
+    thinking_mode: str = "interleaved"
+class GenerateResponse(BaseModel):
+    text: str
+    thinking: Optional[str] = None
+    tokens_generated: int
+    model: str
+class ModelInfo(BaseModel):
+    name: str
+    variant: str
+    architecture: str
+    parameters: str
+    active_ratio: float
+    format: str
+# Global model placeholder
+model = None
+@app.on_event("startup")
+async def load_model():
+    global model
+    print(f"Loading MiniMind {MODEL_VARIANT}...")
+    # In production, load actual model here
+    model = {"loaded": True, "variant": MODEL_VARIANT}
+    print("Model loaded successfully!")
+@app.get("/health")
+async def health():
+    return {"status": "healthy", "model_loaded": model is not None}
+@app.get("/info", response_model=ModelInfo)
+async def info():
+    params_map = {
+        "max2-nano": "500M (125M active)",
+        "max2-lite": "1.5B (375M active)",
+        "max2-pro": "3B (750M active)",
+    }
+    return ModelInfo(
+        name="MiniMind Max2",
+        variant=MODEL_VARIANT,
+        architecture="MoE (8 experts, top-2) + GQA (4:1)",
+        parameters=params_map.get(MODEL_VARIANT, "Unknown"),
+        active_ratio=0.25,
+        format=MODEL_FORMAT,
+    )
+@app.post("/generate", response_model=GenerateResponse)
+async def generate(request: GenerateRequest):
+    if model is None:
+        raise HTTPException(status_code=503, detail="Model not loaded")
+    # Simulated generation with thinking
+    thinking = None
+    if request.thinking_mode != "hidden":
+        thinking = f"""<Thinking>
+<step> Analyzing prompt: "{request.prompt[:30]}..."
+<step> Using MoE with top-2 expert routing
+<step> Generating with temperature={request.temperature}
+<conclude> Response ready
+</Thinking>"""
+    # Placeholder response
+    response_text = f"[MiniMind {MODEL_VARIANT}] Response to: {request.prompt}"
+    return GenerateResponse(
+        text=response_text,
+        thinking=thinking,
+        tokens_generated=len(response_text.split()),
+        model=MODEL_VARIANT,
+    )
+@app.get("/capabilities")
+async def capabilities():
+    return {
+        "reasoning": ["chain-of-thought", "interleaved-thinking", "sequential-thinking"],
+        "vision": ["image-caption", "vqa"],
+        "coding": ["completion", "fim", "refactor"],
+        "agentic": ["function-calling", "tool-use"],
+        "export": ["gguf", "onnx", "tflite", "qnn"],
+    }
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.getenv("PORT", 8000))
+    uvicorn.run(app, host="0.0.0.0", port=port)
+'''
+    @classmethod
+    def generate(
+        cls,
+        config: DockerConfig,
+        output_dir: str,
+    ) -> Dict[str, str]:
+        """Generate Dockerfile and supporting files."""
+        output_path = Path(output_dir)
+        output_path.mkdir(parents=True, exist_ok=True)
+        # Parameters by variant
+        params_map = {
+            "max2-nano": "500M",
+            "max2-lite": "1.5B",
+            "max2-pro": "3B",
+        }
+        # Generate Dockerfile
+        dockerfile = cls.DOCKERFILE_TEMPLATE.format(
+            variant=config.model_variant,
+            base_image=config.base_image,
+            version="1.0.0",
+            params=params_map.get(config.model_variant, "Unknown"),
+            format=config.model_format,
+            port=config.expose_port,
+        )
+        dockerfile_path = output_path / "Dockerfile"
+        with open(dockerfile_path, 'w') as f:
+            f.write(dockerfile)
+        # Generate serve script
+        serve_path = output_path / "serve.py"
+        with open(serve_path, 'w') as f:
+            f.write(cls.SERVE_SCRIPT)
+        # Generate .dockerignore
+        dockerignore = """
+__pycache__/
+*.py[cod]
+*.so
+.git/
+.venv/
+*.egg-info/
+.pytest_cache/
+*.log
+*.tmp
+"""
+        dockerignore_path = output_path / ".dockerignore"
+        with open(dockerignore_path, 'w') as f:
+            f.write(dockerignore)
+        return {
+            "dockerfile": str(dockerfile_path),
+            "serve_script": str(serve_path),
+            "dockerignore": str(dockerignore_path),
+        }
+class DockerBuilder:
+    """Build and push Docker images."""
+    def __init__(self, config: DockerConfig):
+        self.config = config
+    def login(self, password: str) -> bool:
+        """Login to Docker registry."""
+        try:
+            result = subprocess.run(
+                ["docker", "login", "-u", self.config.username, "--password-stdin"],
+                input=password.encode(),
+                capture_output=True,
+                text=False,
+            )
+            return result.returncode == 0
+        except Exception as e:
+            print(f"Login failed: {e}")
+            return False
+    def build(self, context_dir: str, no_cache: bool = False) -> bool:
+        """Build Docker image."""
+        image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}"
+        cmd = ["docker", "build", "-t", image_tag]
+        if no_cache:
+            cmd.append("--no-cache")
+        cmd.append(context_dir)
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            if result.returncode == 0:
+                print(f"Built: {image_tag}")
+                return True
+            else:
+                print(f"Build failed: {result.stderr}")
+                return False
+        except Exception as e:
+            print(f"Build error: {e}")
+            return False
+    def push(self) -> bool:
+        """Push image to registry."""
+        image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}"
+        try:
+            result = subprocess.run(
+                ["docker", "push", image_tag],
+                capture_output=True,
+                text=True,
+            )
+            if result.returncode == 0:
+                print(f"Pushed: {image_tag}")
+                return True
+            else:
+                print(f"Push failed: {result.stderr}")
+                return False
+        except Exception as e:
+            print(f"Push error: {e}")
+            return False
+    def tag(self, new_tag: str) -> bool:
+        """Tag image with additional tag."""
+        source = f"{self.config.username}/{self.config.repository}:{self.config.tag}"
+        target = f"{self.config.username}/{self.config.repository}:{new_tag}"
+        try:
+            result = subprocess.run(
+                ["docker", "tag", source, target],
+                capture_output=True,
+                text=True,
+            )
+            return result.returncode == 0
+        except Exception as e:
+            print(f"Tag error: {e}")
+            return False
+class OCIArtifactBuilder:
+    """Build OCI Artifacts for model distribution."""
+    def __init__(self, config: DockerConfig):
+        self.config = config
+    def package_model(
+        self,
+        model_path: str,
+        output_path: str,
+    ) -> str:
+        """Package model as OCI artifact."""
+        # Create OCI manifest
+        model_file = Path(model_path)
+        model_hash = self._compute_sha256(model_path)
+        manifest = {
+            "schemaVersion": 2,
+            "mediaType": "application/vnd.oci.image.manifest.v1+json",
+            "config": {
+                "mediaType": "application/vnd.minimind.model.config.v1+json",
+                "size": 0,
+                "digest": f"sha256:{model_hash[:64]}",
+            },
+            "layers": [
+                {
+                    "mediaType": self.config.media_type,
+                    "size": model_file.stat().st_size,
+                    "digest": f"sha256:{model_hash}",
+                    "annotations": {
+                        "org.opencontainers.image.title": model_file.name,
+                        "ai.model.variant": self.config.model_variant,
+                        "ai.model.format": self.config.model_format,
+                    },
+                }
+            ],
+            "annotations": {
+                "org.opencontainers.image.title": f"MiniMind {self.config.model_variant}",
+                "org.opencontainers.image.description": "Efficient edge LLM with MoE",
+                "ai.model.architecture": "MoE+GQA",
+            },
+        }
+        manifest_path = Path(output_path) / "manifest.json"
+        manifest_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(manifest_path, 'w') as f:
+            json.dump(manifest, f, indent=2)
+        return str(manifest_path)
+    def _compute_sha256(self, file_path: str) -> str:
+        """Compute SHA256 hash of file."""
+        sha256 = hashlib.sha256()
+        with open(file_path, 'rb') as f:
+            for chunk in iter(lambda: f.read(8192), b''):
+                sha256.update(chunk)
+        return sha256.hexdigest()
+def create_docker_package(
+    model_dir: str,
+    output_dir: str,
+    username: str,
+    repository: str = "minimind-max2",
+    variant: str = "max2-nano",
+    tag: str = "latest",
+) -> Dict[str, Any]:
+    """
+    Create complete Docker package for MiniMind model.
+    Args:
+        model_dir: Directory containing model files
+        output_dir: Output directory for Docker files
+        username: Docker Hub username
+        repository: Repository name
+        variant: Model variant
+        tag: Image tag
+    Returns:
+        Dictionary with paths to generated files
+    """
+    config = DockerConfig(
+        username=username,
+        repository=repository,
+        model_variant=variant,
+        tag=tag,
+    )
+    # Generate Dockerfile and scripts
+    generator = DockerfileGenerator()
+    files = generator.generate(config, output_dir)
+    return {
+        "config": config,
+        "files": files,
+        "image_tag": f"{username}/{repository}:{tag}",
+    }