File size: 2,445 Bytes
553fbf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
"""
MINDI 1.5 Vision-Coder — Inference Pipeline

End-to-end inference: takes a user prompt, runs through the agent
pipeline, and returns generated Next.js + Tailwind + TypeScript code.
"""

from __future__ import annotations

from pathlib import Path
from typing import Optional

import torch
from transformers import AutoTokenizer


class InferencePipeline:
    """Inference pipeline for MINDI 1.5 code generation."""

    def __init__(
        self,
        model: Optional[object] = None,
        tokenizer: Optional[AutoTokenizer] = None,
        device: Optional[str] = None,
        max_new_tokens: int = 4096,
    ) -> None:
        self.model = model
        self.tokenizer = tokenizer
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.max_new_tokens = max_new_tokens

    def generate(
        self,
        prompt: str,
        temperature: float = 0.7,
        top_p: float = 0.95,
        top_k: int = 50,
    ) -> str:
        """Generate code from a user prompt."""
        if self.model is None or self.tokenizer is None:
            raise RuntimeError("Model and tokenizer must be loaded before inference.")

        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)

        with torch.inference_mode():
            outputs = self.model.generate(
                **inputs,
                max_new_tokens=self.max_new_tokens,
                temperature=temperature,
                top_p=top_p,
                top_k=top_k,
                do_sample=True,
                pad_token_id=self.tokenizer.eos_token_id,
            )

        generated = outputs[0][inputs["input_ids"].shape[1]:]
        return self.tokenizer.decode(generated, skip_special_tokens=False)

    @classmethod
    def from_checkpoint(
        cls,
        checkpoint_dir: Path,
        base_model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
    ) -> "InferencePipeline":
        """Load an inference pipeline from a saved checkpoint."""
        from src.model.code_model import MindiCodeModel

        model_wrapper = MindiCodeModel(model_name=base_model_name)
        model_wrapper.load_base_model()
        model_wrapper.load_adapter(checkpoint_dir)

        tokenizer = AutoTokenizer.from_pretrained(
            base_model_name, trust_remote_code=True
        )

        return cls(
            model=model_wrapper.peft_model,
            tokenizer=tokenizer,
        )