File size: 4,478 Bytes
553fbf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
"""
MINDI 1.5 Vision-Coder — Code Generation Model

Loads the base coding model with LoRA adapters for fine-tuning
on Next.js + Tailwind + TypeScript code generation.
"""

from __future__ import annotations

from pathlib import Path
from typing import Optional

import torch
from peft import LoraConfig, PeftModel, get_peft_model, TaskType
from transformers import AutoModelForCausalLM, BitsAndBytesConfig


class MindiCodeModel:
    """Base coding model with LoRA for MINDI 1.5 fine-tuning."""

    def __init__(
        self,
        model_name: str = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
        device: Optional[str] = None,
        cache_dir: Optional[Path] = None,
        load_in_4bit: bool = False,
    ) -> None:
        self.model_name = model_name
        self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
        self.cache_dir = cache_dir or Path("./checkpoints/base")
        self.cache_dir.mkdir(parents=True, exist_ok=True)
        self.load_in_4bit = load_in_4bit
        self.model: Optional[AutoModelForCausalLM] = None
        self.peft_model: Optional[PeftModel] = None

    def load_base_model(self) -> AutoModelForCausalLM:
        """Load the base model with optional 4-bit quantization."""
        quantization_config = None
        if self.load_in_4bit:
            quantization_config = BitsAndBytesConfig(
                load_in_4bit=True,
                bnb_4bit_quant_type="nf4",
                bnb_4bit_compute_dtype=torch.bfloat16,
                bnb_4bit_use_double_quant=True,
            )

        self.model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            cache_dir=str(self.cache_dir),
            torch_dtype=torch.bfloat16,
            device_map="auto" if self.device == "cuda" else None,
            quantization_config=quantization_config,
            trust_remote_code=True,
        )
        return self.model

    def apply_lora(
        self,
        rank: int = 64,
        alpha: int = 128,
        dropout: float = 0.05,
        target_modules: Optional[list[str]] = None,
    ) -> PeftModel:
        """Apply LoRA adapters to the base model for efficient fine-tuning."""
        if self.model is None:
            raise RuntimeError("Base model not loaded. Call load_base_model() first.")

        if target_modules is None:
            target_modules = [
                "q_proj", "k_proj", "v_proj", "o_proj",
                "gate_proj", "up_proj", "down_proj",
            ]

        lora_config = LoraConfig(
            r=rank,
            lora_alpha=alpha,
            lora_dropout=dropout,
            target_modules=target_modules,
            bias="none",
            task_type=TaskType.CAUSAL_LM,
        )

        self.peft_model = get_peft_model(self.model, lora_config)
        trainable, total = self._count_parameters()
        print(f"[MindiCodeModel] LoRA applied — trainable: {trainable:,} / {total:,} "
              f"({100 * trainable / total:.2f}%)")
        return self.peft_model

    def _count_parameters(self) -> tuple[int, int]:
        """Count trainable and total parameters."""
        model = self.peft_model or self.model
        if model is None:
            return 0, 0
        trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
        total = sum(p.numel() for p in model.parameters())
        return trainable, total

    def save_adapter(self, output_dir: Optional[Path] = None) -> Path:
        """Save the LoRA adapter weights."""
        if self.peft_model is None:
            raise RuntimeError("No LoRA adapter to save. Call apply_lora() first.")
        save_path = output_dir or Path("./checkpoints/finetuned")
        save_path.mkdir(parents=True, exist_ok=True)
        self.peft_model.save_pretrained(str(save_path))
        return save_path

    def load_adapter(self, adapter_dir: Path) -> PeftModel:
        """Load a saved LoRA adapter onto the base model."""
        if self.model is None:
            self.load_base_model()
        self.peft_model = PeftModel.from_pretrained(
            self.model, str(adapter_dir)
        )
        return self.peft_model

    def resize_embeddings(self, new_vocab_size: int) -> None:
        """Resize model embeddings to accommodate new special tokens."""
        model = self.peft_model or self.model
        if model is None:
            raise RuntimeError("No model loaded.")
        model.resize_token_embeddings(new_vocab_size)