""" Docker/OCI Integration for MiniMind Max2 Package and distribute models via Docker Hub and OCI-compliant registries. """ from dataclasses import dataclass, field from typing import List, Optional, Dict, Any from pathlib import Path import json import os import subprocess import hashlib @dataclass class DockerConfig: """Configuration for Docker model packaging.""" # Registry settings registry: str = "docker.io" username: str = "" repository: str = "minimind-max2" tag: str = "latest" # Model settings model_variant: str = "max2-nano" # max2-nano, max2-lite, max2-pro model_format: str = "safetensors" # safetensors, gguf, onnx # Image settings base_image: str = "python:3.11-slim" expose_port: int = 8000 enable_api: bool = True # OCI Artifact settings oci_artifact: bool = False media_type: str = "application/vnd.minimind.model" class DockerfileGenerator: """Generate Dockerfiles for MiniMind models.""" DOCKERFILE_TEMPLATE = '''# MiniMind Max2 - {variant} # Efficient edge-deployed language model with MoE architecture FROM {base_image} LABEL maintainer="MiniMind Team" LABEL org.opencontainers.image.title="MiniMind Max2 - {variant}" LABEL org.opencontainers.image.description="Efficient LLM with MoE (8 experts, 25% activation)" LABEL org.opencontainers.image.version="{version}" LABEL org.opencontainers.image.source="https://huggingface.co/fariasultana/MiniMind" LABEL ai.model.architecture="MoE+GQA" LABEL ai.model.parameters="{params}" LABEL ai.model.format="{format}" # Set environment ENV PYTHONUNBUFFERED=1 ENV MODEL_VARIANT={variant} ENV MODEL_FORMAT={format} WORKDIR /app # Install dependencies RUN pip install --no-cache-dir \\ torch>=2.1.0 \\ numpy>=1.24.0 \\ fastapi>=0.100.0 \\ uvicorn>=0.23.0 \\ safetensors>=0.4.0 \\ huggingface_hub>=0.19.0 # Copy model files COPY model/ /app/model/ COPY configs/ /app/configs/ COPY capabilities/ /app/capabilities/ COPY optimization/ /app/optimization/ COPY weights/ /app/weights/ COPY serve.py /app/serve.py # Expose API port EXPOSE {port} # Health check HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \\ CMD curl -f http://localhost:{port}/health || exit 1 # Run API server CMD ["python", "serve.py"] ''' SERVE_SCRIPT = '''#!/usr/bin/env python3 """MiniMind Max2 API Server""" from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from typing import Optional, List import torch import os import json # Model configuration MODEL_VARIANT = os.getenv("MODEL_VARIANT", "max2-nano") MODEL_FORMAT = os.getenv("MODEL_FORMAT", "safetensors") app = FastAPI( title="MiniMind Max2 API", description="Efficient edge-deployed LLM with MoE architecture", version="1.0.0", ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) # Request/Response models class GenerateRequest(BaseModel): prompt: str max_tokens: int = 100 temperature: float = 0.7 top_p: float = 0.95 thinking_mode: str = "interleaved" class GenerateResponse(BaseModel): text: str thinking: Optional[str] = None tokens_generated: int model: str class ModelInfo(BaseModel): name: str variant: str architecture: str parameters: str active_ratio: float format: str # Global model placeholder model = None @app.on_event("startup") async def load_model(): global model print(f"Loading MiniMind {MODEL_VARIANT}...") # In production, load actual model here model = {"loaded": True, "variant": MODEL_VARIANT} print("Model loaded successfully!") @app.get("/health") async def health(): return {"status": "healthy", "model_loaded": model is not None} @app.get("/info", response_model=ModelInfo) async def info(): params_map = { "max2-nano": "500M (125M active)", "max2-lite": "1.5B (375M active)", "max2-pro": "3B (750M active)", } return ModelInfo( name="MiniMind Max2", variant=MODEL_VARIANT, architecture="MoE (8 experts, top-2) + GQA (4:1)", parameters=params_map.get(MODEL_VARIANT, "Unknown"), active_ratio=0.25, format=MODEL_FORMAT, ) @app.post("/generate", response_model=GenerateResponse) async def generate(request: GenerateRequest): if model is None: raise HTTPException(status_code=503, detail="Model not loaded") # Simulated generation with thinking thinking = None if request.thinking_mode != "hidden": thinking = f""" Analyzing prompt: "{request.prompt[:30]}..." Using MoE with top-2 expert routing Generating with temperature={request.temperature} Response ready """ # Placeholder response response_text = f"[MiniMind {MODEL_VARIANT}] Response to: {request.prompt}" return GenerateResponse( text=response_text, thinking=thinking, tokens_generated=len(response_text.split()), model=MODEL_VARIANT, ) @app.get("/capabilities") async def capabilities(): return { "reasoning": ["chain-of-thought", "interleaved-thinking", "sequential-thinking"], "vision": ["image-caption", "vqa"], "coding": ["completion", "fim", "refactor"], "agentic": ["function-calling", "tool-use"], "export": ["gguf", "onnx", "tflite", "qnn"], } if __name__ == "__main__": import uvicorn port = int(os.getenv("PORT", 8000)) uvicorn.run(app, host="0.0.0.0", port=port) ''' @classmethod def generate( cls, config: DockerConfig, output_dir: str, ) -> Dict[str, str]: """Generate Dockerfile and supporting files.""" output_path = Path(output_dir) output_path.mkdir(parents=True, exist_ok=True) # Parameters by variant params_map = { "max2-nano": "500M", "max2-lite": "1.5B", "max2-pro": "3B", } # Generate Dockerfile dockerfile = cls.DOCKERFILE_TEMPLATE.format( variant=config.model_variant, base_image=config.base_image, version="1.0.0", params=params_map.get(config.model_variant, "Unknown"), format=config.model_format, port=config.expose_port, ) dockerfile_path = output_path / "Dockerfile" with open(dockerfile_path, 'w') as f: f.write(dockerfile) # Generate serve script serve_path = output_path / "serve.py" with open(serve_path, 'w') as f: f.write(cls.SERVE_SCRIPT) # Generate .dockerignore dockerignore = """ __pycache__/ *.py[cod] *.so .git/ .venv/ *.egg-info/ .pytest_cache/ *.log *.tmp """ dockerignore_path = output_path / ".dockerignore" with open(dockerignore_path, 'w') as f: f.write(dockerignore) return { "dockerfile": str(dockerfile_path), "serve_script": str(serve_path), "dockerignore": str(dockerignore_path), } class DockerBuilder: """Build and push Docker images.""" def __init__(self, config: DockerConfig): self.config = config def login(self, password: str) -> bool: """Login to Docker registry.""" try: result = subprocess.run( ["docker", "login", "-u", self.config.username, "--password-stdin"], input=password.encode(), capture_output=True, text=False, ) return result.returncode == 0 except Exception as e: print(f"Login failed: {e}") return False def build(self, context_dir: str, no_cache: bool = False) -> bool: """Build Docker image.""" image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}" cmd = ["docker", "build", "-t", image_tag] if no_cache: cmd.append("--no-cache") cmd.append(context_dir) try: result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode == 0: print(f"Built: {image_tag}") return True else: print(f"Build failed: {result.stderr}") return False except Exception as e: print(f"Build error: {e}") return False def push(self) -> bool: """Push image to registry.""" image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}" try: result = subprocess.run( ["docker", "push", image_tag], capture_output=True, text=True, ) if result.returncode == 0: print(f"Pushed: {image_tag}") return True else: print(f"Push failed: {result.stderr}") return False except Exception as e: print(f"Push error: {e}") return False def tag(self, new_tag: str) -> bool: """Tag image with additional tag.""" source = f"{self.config.username}/{self.config.repository}:{self.config.tag}" target = f"{self.config.username}/{self.config.repository}:{new_tag}" try: result = subprocess.run( ["docker", "tag", source, target], capture_output=True, text=True, ) return result.returncode == 0 except Exception as e: print(f"Tag error: {e}") return False class OCIArtifactBuilder: """Build OCI Artifacts for model distribution.""" def __init__(self, config: DockerConfig): self.config = config def package_model( self, model_path: str, output_path: str, ) -> str: """Package model as OCI artifact.""" # Create OCI manifest model_file = Path(model_path) model_hash = self._compute_sha256(model_path) manifest = { "schemaVersion": 2, "mediaType": "application/vnd.oci.image.manifest.v1+json", "config": { "mediaType": "application/vnd.minimind.model.config.v1+json", "size": 0, "digest": f"sha256:{model_hash[:64]}", }, "layers": [ { "mediaType": self.config.media_type, "size": model_file.stat().st_size, "digest": f"sha256:{model_hash}", "annotations": { "org.opencontainers.image.title": model_file.name, "ai.model.variant": self.config.model_variant, "ai.model.format": self.config.model_format, }, } ], "annotations": { "org.opencontainers.image.title": f"MiniMind {self.config.model_variant}", "org.opencontainers.image.description": "Efficient edge LLM with MoE", "ai.model.architecture": "MoE+GQA", }, } manifest_path = Path(output_path) / "manifest.json" manifest_path.parent.mkdir(parents=True, exist_ok=True) with open(manifest_path, 'w') as f: json.dump(manifest, f, indent=2) return str(manifest_path) def _compute_sha256(self, file_path: str) -> str: """Compute SHA256 hash of file.""" sha256 = hashlib.sha256() with open(file_path, 'rb') as f: for chunk in iter(lambda: f.read(8192), b''): sha256.update(chunk) return sha256.hexdigest() def create_docker_package( model_dir: str, output_dir: str, username: str, repository: str = "minimind-max2", variant: str = "max2-nano", tag: str = "latest", ) -> Dict[str, Any]: """ Create complete Docker package for MiniMind model. Args: model_dir: Directory containing model files output_dir: Output directory for Docker files username: Docker Hub username repository: Repository name variant: Model variant tag: Image tag Returns: Dictionary with paths to generated files """ config = DockerConfig( username=username, repository=repository, model_variant=variant, tag=tag, ) # Generate Dockerfile and scripts generator = DockerfileGenerator() files = generator.generate(config, output_dir) return { "config": config, "files": files, "image_tag": f"{username}/{repository}:{tag}", }