""" MINDI 1.5 Vision-Coder — Inference Pipeline End-to-end inference: takes a user prompt, runs through the agent pipeline, and returns generated Next.js + Tailwind + TypeScript code. """ from __future__ import annotations from pathlib import Path from typing import Optional import torch from transformers import AutoTokenizer class InferencePipeline: """Inference pipeline for MINDI 1.5 code generation.""" def __init__( self, model: Optional[object] = None, tokenizer: Optional[AutoTokenizer] = None, device: Optional[str] = None, max_new_tokens: int = 4096, ) -> None: self.model = model self.tokenizer = tokenizer self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") self.max_new_tokens = max_new_tokens def generate( self, prompt: str, temperature: float = 0.7, top_p: float = 0.95, top_k: int = 50, ) -> str: """Generate code from a user prompt.""" if self.model is None or self.tokenizer is None: raise RuntimeError("Model and tokenizer must be loaded before inference.") inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) with torch.inference_mode(): outputs = self.model.generate( **inputs, max_new_tokens=self.max_new_tokens, temperature=temperature, top_p=top_p, top_k=top_k, do_sample=True, pad_token_id=self.tokenizer.eos_token_id, ) generated = outputs[0][inputs["input_ids"].shape[1]:] return self.tokenizer.decode(generated, skip_special_tokens=False) @classmethod def from_checkpoint( cls, checkpoint_dir: Path, base_model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", ) -> "InferencePipeline": """Load an inference pipeline from a saved checkpoint.""" from src.model.code_model import MindiCodeModel model_wrapper = MindiCodeModel(model_name=base_model_name) model_wrapper.load_base_model() model_wrapper.load_adapter(checkpoint_dir) tokenizer = AutoTokenizer.from_pretrained( base_model_name, trust_remote_code=True ) return cls( model=model_wrapper.peft_model, tokenizer=tokenizer, )