feat: Add Docker/OCI integration

5a3c5d8 verified 23 days ago

12.8 kB

	"""
	Docker/OCI Integration for MiniMind Max2
	Package and distribute models via Docker Hub and OCI-compliant registries.
	"""

	from dataclasses import dataclass, field
	from typing import List, Optional, Dict, Any
	from pathlib import Path
	import json
	import os
	import subprocess
	import hashlib


	@dataclass
	class DockerConfig:
	"""Configuration for Docker model packaging."""
	# Registry settings
	registry: str = "docker.io"
	username: str = ""
	repository: str = "minimind-max2"
	tag: str = "latest"

	# Model settings
	model_variant: str = "max2-nano" # max2-nano, max2-lite, max2-pro
	model_format: str = "safetensors" # safetensors, gguf, onnx

	# Image settings
	base_image: str = "python:3.11-slim"
	expose_port: int = 8000
	enable_api: bool = True

	# OCI Artifact settings
	oci_artifact: bool = False
	media_type: str = "application/vnd.minimind.model"


	class DockerfileGenerator:
	"""Generate Dockerfiles for MiniMind models."""

	DOCKERFILE_TEMPLATE = '''# MiniMind Max2 - {variant}
	# Efficient edge-deployed language model with MoE architecture

	FROM {base_image}

	LABEL maintainer="MiniMind Team"
	LABEL org.opencontainers.image.title="MiniMind Max2 - {variant}"
	LABEL org.opencontainers.image.description="Efficient LLM with MoE (8 experts, 25% activation)"
	LABEL org.opencontainers.image.version="{version}"
	LABEL org.opencontainers.image.source="https://huggingface.co/fariasultana/MiniMind"
	LABEL ai.model.architecture="MoE+GQA"
	LABEL ai.model.parameters="{params}"
	LABEL ai.model.format="{format}"

	# Set environment
	ENV PYTHONUNBUFFERED=1
	ENV MODEL_VARIANT={variant}
	ENV MODEL_FORMAT={format}

	WORKDIR /app

	# Install dependencies
	RUN pip install --no-cache-dir \\
	torch>=2.1.0 \\
	numpy>=1.24.0 \\
	fastapi>=0.100.0 \\
	uvicorn>=0.23.0 \\
	safetensors>=0.4.0 \\
	huggingface_hub>=0.19.0

	# Copy model files
	COPY model/ /app/model/
	COPY configs/ /app/configs/
	COPY capabilities/ /app/capabilities/
	COPY optimization/ /app/optimization/
	COPY weights/ /app/weights/
	COPY serve.py /app/serve.py

	# Expose API port
	EXPOSE {port}

	# Health check
	HEALTHCHECK --interval=30s --timeout=10s --start-period=60s \\
	CMD curl -f http://localhost:{port}/health \|\| exit 1

	# Run API server
	CMD ["python", "serve.py"]
	'''

	SERVE_SCRIPT = '''#!/usr/bin/env python3
	"""MiniMind Max2 API Server"""

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	from typing import Optional, List
	import torch
	import os
	import json

	# Model configuration
	MODEL_VARIANT = os.getenv("MODEL_VARIANT", "max2-nano")
	MODEL_FORMAT = os.getenv("MODEL_FORMAT", "safetensors")

	app = FastAPI(
	title="MiniMind Max2 API",
	description="Efficient edge-deployed LLM with MoE architecture",
	version="1.0.0",
	)

	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Request/Response models
	class GenerateRequest(BaseModel):
	prompt: str
	max_tokens: int = 100
	temperature: float = 0.7
	top_p: float = 0.95
	thinking_mode: str = "interleaved"

	class GenerateResponse(BaseModel):
	text: str
	thinking: Optional[str] = None
	tokens_generated: int
	model: str

	class ModelInfo(BaseModel):
	name: str
	variant: str
	architecture: str
	parameters: str
	active_ratio: float
	format: str

	# Global model placeholder
	model = None

	@app.on_event("startup")
	async def load_model():
	global model
	print(f"Loading MiniMind {MODEL_VARIANT}...")
	# In production, load actual model here
	model = {"loaded": True, "variant": MODEL_VARIANT}
	print("Model loaded successfully!")

	@app.get("/health")
	async def health():
	return {"status": "healthy", "model_loaded": model is not None}

	@app.get("/info", response_model=ModelInfo)
	async def info():
	params_map = {
	"max2-nano": "500M (125M active)",
	"max2-lite": "1.5B (375M active)",
	"max2-pro": "3B (750M active)",
	}
	return ModelInfo(
	name="MiniMind Max2",
	variant=MODEL_VARIANT,
	architecture="MoE (8 experts, top-2) + GQA (4:1)",
	parameters=params_map.get(MODEL_VARIANT, "Unknown"),
	active_ratio=0.25,
	format=MODEL_FORMAT,
	)

	@app.post("/generate", response_model=GenerateResponse)
	async def generate(request: GenerateRequest):
	if model is None:
	raise HTTPException(status_code=503, detail="Model not loaded")

	# Simulated generation with thinking
	thinking = None
	if request.thinking_mode != "hidden":
	thinking = f"""<Thinking>
	<step> Analyzing prompt: "{request.prompt[:30]}..."
	<step> Using MoE with top-2 expert routing
	<step> Generating with temperature={request.temperature}
	<conclude> Response ready
	</Thinking>"""

	# Placeholder response
	response_text = f"[MiniMind {MODEL_VARIANT}] Response to: {request.prompt}"

	return GenerateResponse(
	text=response_text,
	thinking=thinking,
	tokens_generated=len(response_text.split()),
	model=MODEL_VARIANT,
	)

	@app.get("/capabilities")
	async def capabilities():
	return {
	"reasoning": ["chain-of-thought", "interleaved-thinking", "sequential-thinking"],
	"vision": ["image-caption", "vqa"],
	"coding": ["completion", "fim", "refactor"],
	"agentic": ["function-calling", "tool-use"],
	"export": ["gguf", "onnx", "tflite", "qnn"],
	}

	if __name__ == "__main__":
	import uvicorn
	port = int(os.getenv("PORT", 8000))
	uvicorn.run(app, host="0.0.0.0", port=port)
	'''

	@classmethod
	def generate(
	cls,
	config: DockerConfig,
	output_dir: str,
	) -> Dict[str, str]:
	"""Generate Dockerfile and supporting files."""
	output_path = Path(output_dir)
	output_path.mkdir(parents=True, exist_ok=True)

	# Parameters by variant
	params_map = {
	"max2-nano": "500M",
	"max2-lite": "1.5B",
	"max2-pro": "3B",
	}

	# Generate Dockerfile
	dockerfile = cls.DOCKERFILE_TEMPLATE.format(
	variant=config.model_variant,
	base_image=config.base_image,
	version="1.0.0",
	params=params_map.get(config.model_variant, "Unknown"),
	format=config.model_format,
	port=config.expose_port,
	)

	dockerfile_path = output_path / "Dockerfile"
	with open(dockerfile_path, 'w') as f:
	f.write(dockerfile)

	# Generate serve script
	serve_path = output_path / "serve.py"
	with open(serve_path, 'w') as f:
	f.write(cls.SERVE_SCRIPT)

	# Generate .dockerignore
	dockerignore = """
	__pycache__/
	*.py[cod]
	*.so
	.git/
	.venv/
	*.egg-info/
	.pytest_cache/
	*.log
	*.tmp
	"""
	dockerignore_path = output_path / ".dockerignore"
	with open(dockerignore_path, 'w') as f:
	f.write(dockerignore)

	return {
	"dockerfile": str(dockerfile_path),
	"serve_script": str(serve_path),
	"dockerignore": str(dockerignore_path),
	}


	class DockerBuilder:
	"""Build and push Docker images."""

	def __init__(self, config: DockerConfig):
	self.config = config

	def login(self, password: str) -> bool:
	"""Login to Docker registry."""
	try:
	result = subprocess.run(
	["docker", "login", "-u", self.config.username, "--password-stdin"],
	input=password.encode(),
	capture_output=True,
	text=False,
	)
	return result.returncode == 0
	except Exception as e:
	print(f"Login failed: {e}")
	return False

	def build(self, context_dir: str, no_cache: bool = False) -> bool:
	"""Build Docker image."""
	image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}"

	cmd = ["docker", "build", "-t", image_tag]
	if no_cache:
	cmd.append("--no-cache")
	cmd.append(context_dir)

	try:
	result = subprocess.run(cmd, capture_output=True, text=True)
	if result.returncode == 0:
	print(f"Built: {image_tag}")
	return True
	else:
	print(f"Build failed: {result.stderr}")
	return False
	except Exception as e:
	print(f"Build error: {e}")
	return False

	def push(self) -> bool:
	"""Push image to registry."""
	image_tag = f"{self.config.username}/{self.config.repository}:{self.config.tag}"

	try:
	result = subprocess.run(
	["docker", "push", image_tag],
	capture_output=True,
	text=True,
	)
	if result.returncode == 0:
	print(f"Pushed: {image_tag}")
	return True
	else:
	print(f"Push failed: {result.stderr}")
	return False
	except Exception as e:
	print(f"Push error: {e}")
	return False

	def tag(self, new_tag: str) -> bool:
	"""Tag image with additional tag."""
	source = f"{self.config.username}/{self.config.repository}:{self.config.tag}"
	target = f"{self.config.username}/{self.config.repository}:{new_tag}"

	try:
	result = subprocess.run(
	["docker", "tag", source, target],
	capture_output=True,
	text=True,
	)
	return result.returncode == 0
	except Exception as e:
	print(f"Tag error: {e}")
	return False


	class OCIArtifactBuilder:
	"""Build OCI Artifacts for model distribution."""

	def __init__(self, config: DockerConfig):
	self.config = config

	def package_model(
	self,
	model_path: str,
	output_path: str,
	) -> str:
	"""Package model as OCI artifact."""
	# Create OCI manifest
	model_file = Path(model_path)
	model_hash = self._compute_sha256(model_path)

	manifest = {
	"schemaVersion": 2,
	"mediaType": "application/vnd.oci.image.manifest.v1+json",
	"config": {
	"mediaType": "application/vnd.minimind.model.config.v1+json",
	"size": 0,
	"digest": f"sha256:{model_hash[:64]}",
	},
	"layers": [
	{
	"mediaType": self.config.media_type,
	"size": model_file.stat().st_size,
	"digest": f"sha256:{model_hash}",
	"annotations": {
	"org.opencontainers.image.title": model_file.name,
	"ai.model.variant": self.config.model_variant,
	"ai.model.format": self.config.model_format,
	},
	}
	],
	"annotations": {
	"org.opencontainers.image.title": f"MiniMind {self.config.model_variant}",
	"org.opencontainers.image.description": "Efficient edge LLM with MoE",
	"ai.model.architecture": "MoE+GQA",
	},
	}

	manifest_path = Path(output_path) / "manifest.json"
	manifest_path.parent.mkdir(parents=True, exist_ok=True)
	with open(manifest_path, 'w') as f:
	json.dump(manifest, f, indent=2)

	return str(manifest_path)

	def _compute_sha256(self, file_path: str) -> str:
	"""Compute SHA256 hash of file."""
	sha256 = hashlib.sha256()
	with open(file_path, 'rb') as f:
	for chunk in iter(lambda: f.read(8192), b''):
	sha256.update(chunk)
	return sha256.hexdigest()


	def create_docker_package(
	model_dir: str,
	output_dir: str,
	username: str,
	repository: str = "minimind-max2",
	variant: str = "max2-nano",
	tag: str = "latest",
	) -> Dict[str, Any]:
	"""
	Create complete Docker package for MiniMind model.

	Args:
	model_dir: Directory containing model files
	output_dir: Output directory for Docker files
	username: Docker Hub username
	repository: Repository name
	variant: Model variant
	tag: Image tag

	Returns:
	Dictionary with paths to generated files
	"""
	config = DockerConfig(
	username=username,
	repository=repository,
	model_variant=variant,
	tag=tag,
	)

	# Generate Dockerfile and scripts
	generator = DockerfileGenerator()
	files = generator.generate(config, output_dir)

	return {
	"config": config,
	"files": files,
	"image_tag": f"{username}/{repository}:{tag}",
	}