| """ |
| MINDI 1.5 Vision-Coder β Model Architecture |
| |
| Loads Qwen/Qwen2.5-Coder-7B-Instruct with LoRA adapters. |
| Handles model initialization, LoRA application, save/load, |
| and parameter counting for the base LLM component. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from pathlib import Path |
| from typing import Optional |
|
|
| import torch |
| from peft import LoraConfig, PeftModel, TaskType, get_peft_model |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
|
| class MINDIArchitecture: |
| """Qwen2.5-Coder-7B-Instruct with LoRA for MINDI 1.5 fine-tuning.""" |
|
|
| DEFAULT_TARGET_MODULES: list[str] = [ |
| "q_proj", "k_proj", "v_proj", "o_proj", |
| "gate_proj", "up_proj", "down_proj", |
| ] |
|
|
| def __init__( |
| self, |
| model_name: str = "Qwen/Qwen2.5-Coder-7B-Instruct", |
| device: Optional[str] = None, |
| cache_dir: Optional[Path] = None, |
| torch_dtype: torch.dtype = torch.bfloat16, |
| ) -> None: |
| """ |
| Initialize the architecture wrapper. |
| |
| Args: |
| model_name: HuggingFace model identifier. |
| device: Target device ('cuda', 'cpu', or None for auto). |
| cache_dir: Local directory for model weight cache. |
| torch_dtype: Data type for model weights. |
| """ |
| self.model_name = model_name |
| self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") |
| self.cache_dir = Path(cache_dir) if cache_dir else Path("./checkpoints/base") |
| self.cache_dir.mkdir(parents=True, exist_ok=True) |
| self.torch_dtype = torch_dtype |
|
|
| self.model: Optional[AutoModelForCausalLM] = None |
| self.peft_model: Optional[PeftModel] = None |
| self.tokenizer: Optional[AutoTokenizer] = None |
|
|
| self._load_model() |
|
|
| def _load_model(self) -> None: |
| """Load the base model and tokenizer from HuggingFace or cache.""" |
| print(f"[MINDIArchitecture] Loading {self.model_name} ...") |
|
|
| if self.device == "cuda": |
| |
| torch.cuda.empty_cache() |
| torch.cuda.synchronize() |
| print(f"[MINDIArchitecture] GPU cleared, loading to CPU first ...") |
|
|
| self.model = AutoModelForCausalLM.from_pretrained( |
| self.model_name, |
| torch_dtype=self.torch_dtype, |
| device_map=None, |
| trust_remote_code=True, |
| low_cpu_mem_usage=True, |
| ) |
| param_count = sum(p.numel() for p in self.model.parameters()) |
| print(f"[MINDIArchitecture] CPU load done ({param_count / 1e9:.2f}B params)") |
|
|
| if self.device == "cuda": |
| print(f"[MINDIArchitecture] Moving to CUDA ...") |
| self.model = self.model.to("cuda") |
| torch.cuda.synchronize() |
| vram_gb = torch.cuda.memory_allocated() / (1024**3) |
| print(f"[MINDIArchitecture] CUDA transfer done ({vram_gb:.1f} GB VRAM)") |
|
|
| self.tokenizer = AutoTokenizer.from_pretrained( |
| self.model_name, |
| trust_remote_code=True, |
| ) |
| print(f"[MINDIArchitecture] Loaded on {self.device} " |
| f"({self._fmt_params(self._total_params())} params)") |
|
|
| def apply_lora( |
| self, |
| r: int = 64, |
| lora_alpha: int = 128, |
| lora_dropout: float = 0.05, |
| target_modules: Optional[list[str]] = None, |
| ) -> PeftModel: |
| """ |
| Apply LoRA adapters to the base model. |
| |
| Args: |
| r: LoRA rank. |
| lora_alpha: LoRA scaling factor. |
| lora_dropout: Dropout probability for LoRA layers. |
| target_modules: List of module names to apply LoRA to. |
| |
| Returns: |
| The PEFT-wrapped model. |
| """ |
| if self.model is None: |
| raise RuntimeError("Base model not loaded.") |
|
|
| if target_modules is None: |
| target_modules = self.DEFAULT_TARGET_MODULES |
|
|
| lora_config = LoraConfig( |
| r=r, |
| lora_alpha=lora_alpha, |
| lora_dropout=lora_dropout, |
| target_modules=target_modules, |
| bias="none", |
| task_type=TaskType.CAUSAL_LM, |
| ) |
|
|
| self.peft_model = get_peft_model(self.model, lora_config) |
|
|
| info = self.get_trainable_params() |
| print(f"[MINDIArchitecture] LoRA applied (r={r}, alpha={lora_alpha})") |
| print(f" Trainable: {info['trainable']:>14,} ({info['trainable_pct']:.2f}%)") |
| print(f" Frozen: {info['frozen']:>14,}") |
| print(f" Total: {info['total']:>14,}") |
|
|
| return self.peft_model |
|
|
| def get_trainable_params(self) -> dict: |
| """ |
| Count trainable, frozen, and total parameters. |
| |
| Returns: |
| Dictionary with 'trainable', 'frozen', 'total', 'trainable_pct'. |
| """ |
| model = self.peft_model or self.model |
| if model is None: |
| return {"trainable": 0, "frozen": 0, "total": 0, "trainable_pct": 0.0} |
|
|
| trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) |
| total = sum(p.numel() for p in model.parameters()) |
| frozen = total - trainable |
| pct = 100.0 * trainable / total if total > 0 else 0.0 |
|
|
| return { |
| "trainable": trainable, |
| "frozen": frozen, |
| "total": total, |
| "trainable_pct": round(pct, 4), |
| } |
|
|
| def print_model_info(self) -> None: |
| """Print detailed model architecture and parameter information.""" |
| model = self.peft_model or self.model |
| if model is None: |
| print("[MINDIArchitecture] No model loaded.") |
| return |
|
|
| info = self.get_trainable_params() |
| print() |
| print("=" * 60) |
| print(" MINDI 1.5 β Model Architecture Info") |
| print("=" * 60) |
| print(f" Base model: {self.model_name}") |
| print(f" Device: {self.device}") |
| print(f" Dtype: {self.torch_dtype}") |
| print(f" LoRA active: {self.peft_model is not None}") |
| print(f" Total params: {self._fmt_params(info['total'])}") |
| print(f" Trainable: {self._fmt_params(info['trainable'])} " |
| f"({info['trainable_pct']:.2f}%)") |
| print(f" Frozen: {self._fmt_params(info['frozen'])}") |
|
|
| if self.peft_model is not None: |
| config = self.peft_model.peft_config.get("default") |
| if config is not None: |
| print(f" LoRA rank: {config.r}") |
| print(f" LoRA alpha: {config.lora_alpha}") |
| print(f" LoRA dropout: {config.lora_dropout}") |
| print(f" Target modules: {config.target_modules}") |
| print("=" * 60) |
| print() |
|
|
| def save_lora(self, path: Optional[Path] = None) -> Path: |
| """ |
| Save LoRA adapter weights to disk. |
| |
| Args: |
| path: Directory to save to. Defaults to checkpoints/lora. |
| |
| Returns: |
| Path where weights were saved. |
| """ |
| if self.peft_model is None: |
| raise RuntimeError("No LoRA adapter to save. Call apply_lora() first.") |
|
|
| save_path = Path(path) if path else Path("./checkpoints/lora") |
| save_path.mkdir(parents=True, exist_ok=True) |
| self.peft_model.save_pretrained(str(save_path)) |
| print(f"[MINDIArchitecture] LoRA saved to {save_path}") |
| return save_path |
|
|
| def load_lora(self, path: Path) -> PeftModel: |
| """ |
| Load LoRA adapter weights from disk. |
| |
| Args: |
| path: Directory containing saved adapter weights. |
| |
| Returns: |
| The PEFT-wrapped model with loaded adapter. |
| """ |
| path = Path(path) |
| if not path.exists(): |
| raise FileNotFoundError(f"LoRA adapter not found: {path}") |
| if self.model is None: |
| raise RuntimeError("Base model not loaded.") |
|
|
| self.peft_model = PeftModel.from_pretrained( |
| self.model, str(path) |
| ) |
| print(f"[MINDIArchitecture] LoRA loaded from {path}") |
| return self.peft_model |
|
|
| def resize_embeddings(self, new_vocab_size: int) -> None: |
| """Resize model embeddings for new special tokens.""" |
| model = self.peft_model or self.model |
| if model is None: |
| raise RuntimeError("No model loaded.") |
| old_size = model.get_input_embeddings().weight.shape[0] |
| if new_vocab_size != old_size: |
| model.resize_token_embeddings(new_vocab_size) |
| print(f"[MINDIArchitecture] Resized embeddings: {old_size} β {new_vocab_size}") |
|
|
| def get_model(self) -> AutoModelForCausalLM | PeftModel: |
| """Return the active model (PEFT if LoRA applied, else base).""" |
| model = self.peft_model or self.model |
| if model is None: |
| raise RuntimeError("No model loaded.") |
| return model |
|
|
| |
| def _total_params(self) -> int: |
| model = self.peft_model or self.model |
| if model is None: |
| return 0 |
| return sum(p.numel() for p in model.parameters()) |
|
|
| @staticmethod |
| def _fmt_params(n: int) -> str: |
| if n >= 1_000_000_000: |
| return f"{n / 1_000_000_000:.2f}B" |
| if n >= 1_000_000: |
| return f"{n / 1_000_000:.2f}M" |
| if n >= 1_000: |
| return f"{n / 1_000:.1f}K" |
| return str(n) |
|
|
|
|
| |
| if __name__ == "__main__": |
| print("=" * 60) |
| print(" MINDI 1.5 β Architecture Test") |
| print("=" * 60) |
| print() |
|
|
| |
| arch = MINDIArchitecture( |
| model_name="Qwen/Qwen2.5-Coder-7B-Instruct", |
| ) |
|
|
| |
| peft_model = arch.apply_lora( |
| r=64, |
| lora_alpha=128, |
| lora_dropout=0.05, |
| ) |
|
|
| |
| arch.print_model_info() |
|
|
| |
| info = arch.get_trainable_params() |
| assert info["trainable"] > 0, "No trainable parameters!" |
| assert info["frozen"] > info["trainable"], "More trainable than frozen β LoRA may not be applied!" |
|
|
| |
| lora_modules = [name for name, _ in peft_model.named_parameters() if "lora_" in name] |
| print(f" LoRA modules found: {len(lora_modules)}") |
| assert len(lora_modules) > 0, "No LoRA modules found!" |
|
|
| |
| print("\n Running forward pass test ...") |
| test_input = arch.tokenizer("Hello MINDI!", return_tensors="pt") |
| test_input = {k: v.to(arch.device) for k, v in test_input.items()} |
| with torch.no_grad(): |
| output = peft_model(**test_input) |
| print(f" Output logits shape: {output.logits.shape}") |
| print(f" Loss: {output.loss}") |
|
|
| print("\n β All architecture tests passed!") |
| print("=" * 60) |
|
|