Faaz
feat: initial project scaffold for MINDI 1.5 Vision-Coder
553fbf7
"""
MINDI 1.5 Vision-Coder — Inference Pipeline
End-to-end inference: takes a user prompt, runs through the agent
pipeline, and returns generated Next.js + Tailwind + TypeScript code.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional
import torch
from transformers import AutoTokenizer
class InferencePipeline:
"""Inference pipeline for MINDI 1.5 code generation."""
def __init__(
self,
model: Optional[object] = None,
tokenizer: Optional[AutoTokenizer] = None,
device: Optional[str] = None,
max_new_tokens: int = 4096,
) -> None:
self.model = model
self.tokenizer = tokenizer
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.max_new_tokens = max_new_tokens
def generate(
self,
prompt: str,
temperature: float = 0.7,
top_p: float = 0.95,
top_k: int = 50,
) -> str:
"""Generate code from a user prompt."""
if self.model is None or self.tokenizer is None:
raise RuntimeError("Model and tokenizer must be loaded before inference.")
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
with torch.inference_mode():
outputs = self.model.generate(
**inputs,
max_new_tokens=self.max_new_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[1]:]
return self.tokenizer.decode(generated, skip_special_tokens=False)
@classmethod
def from_checkpoint(
cls,
checkpoint_dir: Path,
base_model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
) -> "InferencePipeline":
"""Load an inference pipeline from a saved checkpoint."""
from src.model.code_model import MindiCodeModel
model_wrapper = MindiCodeModel(model_name=base_model_name)
model_wrapper.load_base_model()
model_wrapper.load_adapter(checkpoint_dir)
tokenizer = AutoTokenizer.from_pretrained(
base_model_name, trust_remote_code=True
)
return cls(
model=model_wrapper.peft_model,
tokenizer=tokenizer,
)