""" MINDI 1.5 Vision-Coder — Model Architecture Loads Qwen/Qwen2.5-Coder-7B-Instruct with LoRA adapters. Handles model initialization, LoRA application, save/load, and parameter counting for the base LLM component. """ from __future__ import annotations from pathlib import Path from typing import Optional import torch from peft import LoraConfig, PeftModel, TaskType, get_peft_model from transformers import AutoModelForCausalLM, AutoTokenizer class MINDIArchitecture: """Qwen2.5-Coder-7B-Instruct with LoRA for MINDI 1.5 fine-tuning.""" DEFAULT_TARGET_MODULES: list[str] = [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", ] def __init__( self, model_name: str = "Qwen/Qwen2.5-Coder-7B-Instruct", device: Optional[str] = None, cache_dir: Optional[Path] = None, torch_dtype: torch.dtype = torch.bfloat16, ) -> None: """ Initialize the architecture wrapper. Args: model_name: HuggingFace model identifier. device: Target device ('cuda', 'cpu', or None for auto). cache_dir: Local directory for model weight cache. torch_dtype: Data type for model weights. """ self.model_name = model_name self.device = device or ("cuda" if torch.cuda.is_available() else "cpu") self.cache_dir = Path(cache_dir) if cache_dir else Path("./checkpoints/base") self.cache_dir.mkdir(parents=True, exist_ok=True) self.torch_dtype = torch_dtype self.model: Optional[AutoModelForCausalLM] = None self.peft_model: Optional[PeftModel] = None self.tokenizer: Optional[AutoTokenizer] = None self._load_model() def _load_model(self) -> None: """Load the base model and tokenizer from HuggingFace or cache.""" print(f"[MINDIArchitecture] Loading {self.model_name} ...") if self.device == "cuda": # Clear GPU state before loading torch.cuda.empty_cache() torch.cuda.synchronize() print(f"[MINDIArchitecture] GPU cleared, loading to CPU first ...") self.model = AutoModelForCausalLM.from_pretrained( self.model_name, torch_dtype=self.torch_dtype, device_map=None, trust_remote_code=True, low_cpu_mem_usage=True, ) param_count = sum(p.numel() for p in self.model.parameters()) print(f"[MINDIArchitecture] CPU load done ({param_count / 1e9:.2f}B params)") if self.device == "cuda": print(f"[MINDIArchitecture] Moving to CUDA ...") self.model = self.model.to("cuda") torch.cuda.synchronize() vram_gb = torch.cuda.memory_allocated() / (1024**3) print(f"[MINDIArchitecture] CUDA transfer done ({vram_gb:.1f} GB VRAM)") self.tokenizer = AutoTokenizer.from_pretrained( self.model_name, trust_remote_code=True, ) print(f"[MINDIArchitecture] Loaded on {self.device} " f"({self._fmt_params(self._total_params())} params)") def apply_lora( self, r: int = 64, lora_alpha: int = 128, lora_dropout: float = 0.05, target_modules: Optional[list[str]] = None, ) -> PeftModel: """ Apply LoRA adapters to the base model. Args: r: LoRA rank. lora_alpha: LoRA scaling factor. lora_dropout: Dropout probability for LoRA layers. target_modules: List of module names to apply LoRA to. Returns: The PEFT-wrapped model. """ if self.model is None: raise RuntimeError("Base model not loaded.") if target_modules is None: target_modules = self.DEFAULT_TARGET_MODULES lora_config = LoraConfig( r=r, lora_alpha=lora_alpha, lora_dropout=lora_dropout, target_modules=target_modules, bias="none", task_type=TaskType.CAUSAL_LM, ) self.peft_model = get_peft_model(self.model, lora_config) info = self.get_trainable_params() print(f"[MINDIArchitecture] LoRA applied (r={r}, alpha={lora_alpha})") print(f" Trainable: {info['trainable']:>14,} ({info['trainable_pct']:.2f}%)") print(f" Frozen: {info['frozen']:>14,}") print(f" Total: {info['total']:>14,}") return self.peft_model def get_trainable_params(self) -> dict: """ Count trainable, frozen, and total parameters. Returns: Dictionary with 'trainable', 'frozen', 'total', 'trainable_pct'. """ model = self.peft_model or self.model if model is None: return {"trainable": 0, "frozen": 0, "total": 0, "trainable_pct": 0.0} trainable = sum(p.numel() for p in model.parameters() if p.requires_grad) total = sum(p.numel() for p in model.parameters()) frozen = total - trainable pct = 100.0 * trainable / total if total > 0 else 0.0 return { "trainable": trainable, "frozen": frozen, "total": total, "trainable_pct": round(pct, 4), } def print_model_info(self) -> None: """Print detailed model architecture and parameter information.""" model = self.peft_model or self.model if model is None: print("[MINDIArchitecture] No model loaded.") return info = self.get_trainable_params() print() print("=" * 60) print(" MINDI 1.5 — Model Architecture Info") print("=" * 60) print(f" Base model: {self.model_name}") print(f" Device: {self.device}") print(f" Dtype: {self.torch_dtype}") print(f" LoRA active: {self.peft_model is not None}") print(f" Total params: {self._fmt_params(info['total'])}") print(f" Trainable: {self._fmt_params(info['trainable'])} " f"({info['trainable_pct']:.2f}%)") print(f" Frozen: {self._fmt_params(info['frozen'])}") if self.peft_model is not None: config = self.peft_model.peft_config.get("default") if config is not None: print(f" LoRA rank: {config.r}") print(f" LoRA alpha: {config.lora_alpha}") print(f" LoRA dropout: {config.lora_dropout}") print(f" Target modules: {config.target_modules}") print("=" * 60) print() def save_lora(self, path: Optional[Path] = None) -> Path: """ Save LoRA adapter weights to disk. Args: path: Directory to save to. Defaults to checkpoints/lora. Returns: Path where weights were saved. """ if self.peft_model is None: raise RuntimeError("No LoRA adapter to save. Call apply_lora() first.") save_path = Path(path) if path else Path("./checkpoints/lora") save_path.mkdir(parents=True, exist_ok=True) self.peft_model.save_pretrained(str(save_path)) print(f"[MINDIArchitecture] LoRA saved to {save_path}") return save_path def load_lora(self, path: Path) -> PeftModel: """ Load LoRA adapter weights from disk. Args: path: Directory containing saved adapter weights. Returns: The PEFT-wrapped model with loaded adapter. """ path = Path(path) if not path.exists(): raise FileNotFoundError(f"LoRA adapter not found: {path}") if self.model is None: raise RuntimeError("Base model not loaded.") self.peft_model = PeftModel.from_pretrained( self.model, str(path) ) print(f"[MINDIArchitecture] LoRA loaded from {path}") return self.peft_model def resize_embeddings(self, new_vocab_size: int) -> None: """Resize model embeddings for new special tokens.""" model = self.peft_model or self.model if model is None: raise RuntimeError("No model loaded.") old_size = model.get_input_embeddings().weight.shape[0] if new_vocab_size != old_size: model.resize_token_embeddings(new_vocab_size) print(f"[MINDIArchitecture] Resized embeddings: {old_size} → {new_vocab_size}") def get_model(self) -> AutoModelForCausalLM | PeftModel: """Return the active model (PEFT if LoRA applied, else base).""" model = self.peft_model or self.model if model is None: raise RuntimeError("No model loaded.") return model # ── helpers ─────────────────────────────────────────────────── def _total_params(self) -> int: model = self.peft_model or self.model if model is None: return 0 return sum(p.numel() for p in model.parameters()) @staticmethod def _fmt_params(n: int) -> str: if n >= 1_000_000_000: return f"{n / 1_000_000_000:.2f}B" if n >= 1_000_000: return f"{n / 1_000_000:.2f}M" if n >= 1_000: return f"{n / 1_000:.1f}K" return str(n) # ── Test block ──────────────────────────────────────────────────────── if __name__ == "__main__": print("=" * 60) print(" MINDI 1.5 — Architecture Test") print("=" * 60) print() # 1. Load base model arch = MINDIArchitecture( model_name="Qwen/Qwen2.5-Coder-7B-Instruct", ) # 2. Apply LoRA peft_model = arch.apply_lora( r=64, lora_alpha=128, lora_dropout=0.05, ) # 3. Print full info arch.print_model_info() # 4. Verify trainable params info = arch.get_trainable_params() assert info["trainable"] > 0, "No trainable parameters!" assert info["frozen"] > info["trainable"], "More trainable than frozen — LoRA may not be applied!" # 5. Verify LoRA modules exist lora_modules = [name for name, _ in peft_model.named_parameters() if "lora_" in name] print(f" LoRA modules found: {len(lora_modules)}") assert len(lora_modules) > 0, "No LoRA modules found!" # 6. Quick forward pass test (small input) print("\n Running forward pass test ...") test_input = arch.tokenizer("Hello MINDI!", return_tensors="pt") test_input = {k: v.to(arch.device) for k, v in test_input.items()} with torch.no_grad(): output = peft_model(**test_input) print(f" Output logits shape: {output.logits.shape}") print(f" Loss: {output.loss}") print("\n ✓ All architecture tests passed!") print("=" * 60)