File size: 12,805 Bytes

5a3c5d8

"""
Docker/OCI Integration for MiniMind Max2
Package and distribute models via Docker Hub and OCI-compliant registries.
"""

from dataclasses import dataclass, field
from typing import List, Optional, Dict, Any
from pathlib import Path
import json
import os
import subprocess
import hashlib


@dataclass
class DockerConfig:
    """Configuration for Docker model packaging."""
    # Registry settings
    registry: str = "docker.io"
    username: str = ""
    repository: str = "minimind-max2"
    tag: str = "latest"

    # Model settings
    model_variant: str = "max2-nano"  # max2-nano, max2-lite, max2-pro
    model_format: str = "safetensors"  # safetensors, gguf, onnx

    # Image settings
    base_image: str = "python:3.11-slim"
    expose_port: int = 8000
    enable_api: bool = True

    # OCI Artifact settings
    oci_artifact: bool = False
    media_type: str = "application/vnd.minimind.model"


class DockerfileGenerator:
    """Generate Dockerfiles for MiniMind models."""

    DOCKERFILE_TEMPLATE = '''# MiniMind Max2 - {variant}
# Efficient edge-deployed language model with MoE architecture

FROM {base_image}

LABEL maintainer="MiniMind Team"
LABEL org.opencontainers.image.title="MiniMind Max2 - {variant}"
LABEL org.opencontainers.image.description="Efficient LLM with MoE (8 experts, 25% activation)"
LABEL org.opencontainers.image.version="{version}"
LABEL org.opencontainers.image.source="https://huggingface.co/fariasultana/MiniMind"
LABEL ai.model.architecture="MoE+GQA"
LABEL ai.model.parameters="{params}"
LABEL ai.model.format="{format}"

# Set environment
ENV PYTHONUNBUFFERED=1
ENV MODEL_VARIANT={variant}
ENV MODEL_FORMAT={format}

WORKDIR /app

# Install dependencies
RUN pip install --no-cache-dir \\
    torch>=2.1.0 \\
    numpy>=1.24.0 \\
    fastapi>=0.100.0 \\
    uvicorn>=0.23.0 \\
    safetensors>=0.4.0 \\
    huggingface_hub>=0.19.0

# Copy model files
COPY model/ /app/model/
COPY configs/ /app/configs/
COPY capabilities/ /app/capabilities/
COPY optimization/ /app/optimization/
COPY weights/ /app/weights/
COPY serve.py /app/serve.py

# Expose API port
EXPOSE {port}

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \\
    CMD curl -f http://localhost:{port}/health || exit 1

# Run API server
CMD ["python", "serve.py"]
'''

    SERVE_SCRIPT = '''#!/usr/bin/env python3
"""MiniMind Max2 API Server"""

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional, List
import torch
import os
import json

# Model configuration
MODEL_VARIANT = os.getenv("MODEL_VARIANT", "max2-nano")
MODEL_FORMAT = os.getenv("MODEL_FORMAT", "safetensors")

app = FastAPI(
    title="MiniMind Max2 API",
    description="Efficient edge-deployed LLM with MoE architecture",
    version="1.0.0",
)

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)

# Request/Response models
class GenerateRequest(BaseModel):
    prompt: str
    max_tokens: int = 100
    temperature: float = 0.7
    top_p: float = 0.95
    thinking_mode: str = "interleaved"

class GenerateResponse(BaseModel):
    text: str
    thinking: Optional[str] = None
    tokens_generated: int
    model: str

class ModelInfo(BaseModel):
    name: str
    variant: str
    architecture: str
    parameters: str
    active_ratio: float
    format: str

# Global model placeholder
model = None

@app.on_event("startup")
async def load_model():
    global model
    print(f"Loading MiniMind {MODEL_VARIANT}...")
    # In production, load actual model here
    model = {"loaded": True, "variant": MODEL_VARIANT}
    print("Model loaded successfully!")

@app.get("/health")
async def health():
    return {"status": "healthy", "model_loaded": model is not None}

@app.get("/info", response_model=ModelInfo)
async def info():
    params_map = {
        "max2-nano": "500M (125M active)",
        "max2-lite": "1.5B (375M active)",
        "max2-pro": "3B (750M active)",
    }
    return ModelInfo(
        name="MiniMind Max2",
        variant=MODEL_VARIANT,
        architecture="MoE (8 experts, top-2) + GQA (4:1)",
        parameters=params_map.get(MODEL_VARIANT, "Unknown"),
        active_ratio=0.25,
        format=MODEL_FORMAT,
    )

@app.post("/generate", response_model=GenerateResponse)
async def generate(request: GenerateRequest):
    if model is None:
        raise HTTPException(status_code=503, detail="Model not loaded")

    # Simulated generation with thinking
    thinking = None
    if request.thinking_mode != "hidden":
        thinking = f"""<Thinking>
<step> Analyzing prompt: "{request.prompt[:30]}..."
<step> Using MoE with top-2 expert routing
<step> Generating with temperature={request.temperature}
<conclude> Response ready
</Thinking>"""

    # Placeholder response
    response_text = f"[MiniMind {MODEL_VARIANT}] Response to: {request.prompt}"

    return GenerateResponse(
        text=response_text,
        thinking=thinking,
        tokens_generated=len(response_text.split()),
        model=MODEL_VARIANT,
    )

@app.get("/capabilities")
async def capabilities():
    return {
        "reasoning": ["chain-of-thought", "interleaved-thinking", "sequential-thinking"],
        "vision": ["image-caption", "vqa"],
        "coding": ["completion", "fim", "refactor"],
        "agentic": ["function-calling", "tool-use"],
        "export": ["gguf", "onnx", "tflite", "qnn"],
    }

if __name__ == "__main__":
    import uvicorn
    port = int(os.getenv("PORT", 8000))
    uvicorn.run(app, host="0.0.0.0", port=port)
'''

    @classmethod
    def generate(
        cls,
        config: DockerConfig,
        output_dir: str,
    ) -> Dict[str, str]:
        """Generate Dockerfile and supporting files."""
        output_path = Path(output_dir)
        output_path.mkdir(parents=True, exist_ok=True)

        # Parameters by variant
        params_map = {
            "max2-nano": "500M",
            "max2-lite": "1.5B",
            "max2-pro": "3B",
        }

        # Generate Dockerfile
        dockerfile = cls.DOCKERFILE_TEMPLATE.format(
            variant=config.model_variant,
            base_image=config.base_image,
            version="1.0.0",
            params=params_map.get(config.model_variant, "Unknown"),
            format=config.model_format,
            port=config.expose_port,
        )

        dockerfile_path = output_path / "Dockerfile"
        with open(dockerfile_path, 'w') as f:
            f.write(dockerfile)

        # Generate serve script
        serve_path = output_path / "serve.py"
        with open(serve_path, 'w') as f:
            f.write(cls.SERVE_SCRIPT)

        # Generate .dockerignore
        dockerignore = """
__pycache__/
*.py[cod]
*.so
.git/
.venv/
*.egg-info/
.pytest_cache/
*.log
*.tmp
"""
        dockerignore_path = output_path / ".dockerignore"
        with open(dockerignore_path, 'w') as f:
            f.write(dockerignore)

        return {
            "dockerfile": str(dockerfile_path),
            "serve_script": str(serve_path),
            "dockerignore": str(dockerignore_path),
        }


class DockerBuilder:
    """Build and push Docker images."""

    def __init__(self, config: DockerConfig):
        self.config = config

    def login(self, password: str) -> bool:
        """Login to Docker registry."""
        try:
            result = subprocess.run(
                ["docker", "login", "-u", self.config.username, "--password-stdin"],
                input=password.encode(),
                capture_output=True,
                text=False,
            )
            return result.returncode == 0
        except Exception as e:
            print(f"Login failed: {e}")
            return False

    def build(self, context_dir: str, no_cache: bool = False) -> bool:
        """Build Docker image."""
        image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}"

        cmd = ["docker", "build", "-t", image_tag]
        if no_cache:
            cmd.append("--no-cache")
        cmd.append(context_dir)

        try:
            result = subprocess.run(cmd, capture_output=True, text=True)
            if result.returncode == 0:
                print(f"Built: {image_tag}")
                return True
            else:
                print(f"Build failed: {result.stderr}")
                return False
        except Exception as e:
            print(f"Build error: {e}")
            return False

    def push(self) -> bool:
        """Push image to registry."""
        image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}"

        try:
            result = subprocess.run(
                ["docker", "push", image_tag],
                capture_output=True,
                text=True,
            )
            if result.returncode == 0:
                print(f"Pushed: {image_tag}")
                return True
            else:
                print(f"Push failed: {result.stderr}")
                return False
        except Exception as e:
            print(f"Push error: {e}")
            return False

    def tag(self, new_tag: str) -> bool:
        """Tag image with additional tag."""
        source = f"{self.config.username}/{self.config.repository}:{self.config.tag}"
        target = f"{self.config.username}/{self.config.repository}:{new_tag}"

        try:
            result = subprocess.run(
                ["docker", "tag", source, target],
                capture_output=True,
                text=True,
            )
            return result.returncode == 0
        except Exception as e:
            print(f"Tag error: {e}")
            return False


class OCIArtifactBuilder:
    """Build OCI Artifacts for model distribution."""

    def __init__(self, config: DockerConfig):
        self.config = config

    def package_model(
        self,
        model_path: str,
        output_path: str,
    ) -> str:
        """Package model as OCI artifact."""
        # Create OCI manifest
        model_file = Path(model_path)
        model_hash = self._compute_sha256(model_path)

        manifest = {
            "schemaVersion": 2,
            "mediaType": "application/vnd.oci.image.manifest.v1+json",
            "config": {
                "mediaType": "application/vnd.minimind.model.config.v1+json",
                "size": 0,
                "digest": f"sha256:{model_hash[:64]}",
            },
            "layers": [
                {
                    "mediaType": self.config.media_type,
                    "size": model_file.stat().st_size,
                    "digest": f"sha256:{model_hash}",
                    "annotations": {
                        "org.opencontainers.image.title": model_file.name,
                        "ai.model.variant": self.config.model_variant,
                        "ai.model.format": self.config.model_format,
                    },
                }
            ],
            "annotations": {
                "org.opencontainers.image.title": f"MiniMind {self.config.model_variant}",
                "org.opencontainers.image.description": "Efficient edge LLM with MoE",
                "ai.model.architecture": "MoE+GQA",
            },
        }

        manifest_path = Path(output_path) / "manifest.json"
        manifest_path.parent.mkdir(parents=True, exist_ok=True)
        with open(manifest_path, 'w') as f:
            json.dump(manifest, f, indent=2)

        return str(manifest_path)

    def _compute_sha256(self, file_path: str) -> str:
        """Compute SHA256 hash of file."""
        sha256 = hashlib.sha256()
        with open(file_path, 'rb') as f:
            for chunk in iter(lambda: f.read(8192), b''):
                sha256.update(chunk)
        return sha256.hexdigest()


def create_docker_package(
    model_dir: str,
    output_dir: str,
    username: str,
    repository: str = "minimind-max2",
    variant: str = "max2-nano",
    tag: str = "latest",
) -> Dict[str, Any]:
    """
    Create complete Docker package for MiniMind model.

    Args:
        model_dir: Directory containing model files
        output_dir: Output directory for Docker files
        username: Docker Hub username
        repository: Repository name
        variant: Model variant
        tag: Image tag

    Returns:
        Dictionary with paths to generated files
    """
    config = DockerConfig(
        username=username,
        repository=repository,
        model_variant=variant,
        tag=tag,
    )

    # Generate Dockerfile and scripts
    generator = DockerfileGenerator()
    files = generator.generate(config, output_dir)

    return {
        "config": config,
        "files": files,
        "image_tag": f"{username}/{repository}:{tag}",
    }