File size: 2,445 Bytes
553fbf7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | """
MINDI 1.5 Vision-Coder — Inference Pipeline
End-to-end inference: takes a user prompt, runs through the agent
pipeline, and returns generated Next.js + Tailwind + TypeScript code.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional
import torch
from transformers import AutoTokenizer
class InferencePipeline:
"""Inference pipeline for MINDI 1.5 code generation."""
def __init__(
self,
model: Optional[object] = None,
tokenizer: Optional[AutoTokenizer] = None,
device: Optional[str] = None,
max_new_tokens: int = 4096,
) -> None:
self.model = model
self.tokenizer = tokenizer
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.max_new_tokens = max_new_tokens
def generate(
self,
prompt: str,
temperature: float = 0.7,
top_p: float = 0.95,
top_k: int = 50,
) -> str:
"""Generate code from a user prompt."""
if self.model is None or self.tokenizer is None:
raise RuntimeError("Model and tokenizer must be loaded before inference.")
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
with torch.inference_mode():
outputs = self.model.generate(
**inputs,
max_new_tokens=self.max_new_tokens,
temperature=temperature,
top_p=top_p,
top_k=top_k,
do_sample=True,
pad_token_id=self.tokenizer.eos_token_id,
)
generated = outputs[0][inputs["input_ids"].shape[1]:]
return self.tokenizer.decode(generated, skip_special_tokens=False)
@classmethod
def from_checkpoint(
cls,
checkpoint_dir: Path,
base_model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
) -> "InferencePipeline":
"""Load an inference pipeline from a saved checkpoint."""
from src.model.code_model import MindiCodeModel
model_wrapper = MindiCodeModel(model_name=base_model_name)
model_wrapper.load_base_model()
model_wrapper.load_adapter(checkpoint_dir)
tokenizer = AutoTokenizer.from_pretrained(
base_model_name, trust_remote_code=True
)
return cls(
model=model_wrapper.peft_model,
tokenizer=tokenizer,
)
|