MINDI-1.5-Vision-Coder / src /model /architecture.py
Faaz
Add GPU diagnostic script, fix architecture loading with low_cpu_mem_usage and sync
5fb9ec3
"""
MINDI 1.5 Vision-Coder β€” Model Architecture
Loads Qwen/Qwen2.5-Coder-7B-Instruct with LoRA adapters.
Handles model initialization, LoRA application, save/load,
and parameter counting for the base LLM component.
"""
from __future__ import annotations
from pathlib import Path
from typing import Optional
import torch
from peft import LoraConfig, PeftModel, TaskType, get_peft_model
from transformers import AutoModelForCausalLM, AutoTokenizer
class MINDIArchitecture:
"""Qwen2.5-Coder-7B-Instruct with LoRA for MINDI 1.5 fine-tuning."""
DEFAULT_TARGET_MODULES: list[str] = [
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
]
def __init__(
self,
model_name: str = "Qwen/Qwen2.5-Coder-7B-Instruct",
device: Optional[str] = None,
cache_dir: Optional[Path] = None,
torch_dtype: torch.dtype = torch.bfloat16,
) -> None:
"""
Initialize the architecture wrapper.
Args:
model_name: HuggingFace model identifier.
device: Target device ('cuda', 'cpu', or None for auto).
cache_dir: Local directory for model weight cache.
torch_dtype: Data type for model weights.
"""
self.model_name = model_name
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
self.cache_dir = Path(cache_dir) if cache_dir else Path("./checkpoints/base")
self.cache_dir.mkdir(parents=True, exist_ok=True)
self.torch_dtype = torch_dtype
self.model: Optional[AutoModelForCausalLM] = None
self.peft_model: Optional[PeftModel] = None
self.tokenizer: Optional[AutoTokenizer] = None
self._load_model()
def _load_model(self) -> None:
"""Load the base model and tokenizer from HuggingFace or cache."""
print(f"[MINDIArchitecture] Loading {self.model_name} ...")
if self.device == "cuda":
# Clear GPU state before loading
torch.cuda.empty_cache()
torch.cuda.synchronize()
print(f"[MINDIArchitecture] GPU cleared, loading to CPU first ...")
self.model = AutoModelForCausalLM.from_pretrained(
self.model_name,
torch_dtype=self.torch_dtype,
device_map=None,
trust_remote_code=True,
low_cpu_mem_usage=True,
)
param_count = sum(p.numel() for p in self.model.parameters())
print(f"[MINDIArchitecture] CPU load done ({param_count / 1e9:.2f}B params)")
if self.device == "cuda":
print(f"[MINDIArchitecture] Moving to CUDA ...")
self.model = self.model.to("cuda")
torch.cuda.synchronize()
vram_gb = torch.cuda.memory_allocated() / (1024**3)
print(f"[MINDIArchitecture] CUDA transfer done ({vram_gb:.1f} GB VRAM)")
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_name,
trust_remote_code=True,
)
print(f"[MINDIArchitecture] Loaded on {self.device} "
f"({self._fmt_params(self._total_params())} params)")
def apply_lora(
self,
r: int = 64,
lora_alpha: int = 128,
lora_dropout: float = 0.05,
target_modules: Optional[list[str]] = None,
) -> PeftModel:
"""
Apply LoRA adapters to the base model.
Args:
r: LoRA rank.
lora_alpha: LoRA scaling factor.
lora_dropout: Dropout probability for LoRA layers.
target_modules: List of module names to apply LoRA to.
Returns:
The PEFT-wrapped model.
"""
if self.model is None:
raise RuntimeError("Base model not loaded.")
if target_modules is None:
target_modules = self.DEFAULT_TARGET_MODULES
lora_config = LoraConfig(
r=r,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
target_modules=target_modules,
bias="none",
task_type=TaskType.CAUSAL_LM,
)
self.peft_model = get_peft_model(self.model, lora_config)
info = self.get_trainable_params()
print(f"[MINDIArchitecture] LoRA applied (r={r}, alpha={lora_alpha})")
print(f" Trainable: {info['trainable']:>14,} ({info['trainable_pct']:.2f}%)")
print(f" Frozen: {info['frozen']:>14,}")
print(f" Total: {info['total']:>14,}")
return self.peft_model
def get_trainable_params(self) -> dict:
"""
Count trainable, frozen, and total parameters.
Returns:
Dictionary with 'trainable', 'frozen', 'total', 'trainable_pct'.
"""
model = self.peft_model or self.model
if model is None:
return {"trainable": 0, "frozen": 0, "total": 0, "trainable_pct": 0.0}
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
frozen = total - trainable
pct = 100.0 * trainable / total if total > 0 else 0.0
return {
"trainable": trainable,
"frozen": frozen,
"total": total,
"trainable_pct": round(pct, 4),
}
def print_model_info(self) -> None:
"""Print detailed model architecture and parameter information."""
model = self.peft_model or self.model
if model is None:
print("[MINDIArchitecture] No model loaded.")
return
info = self.get_trainable_params()
print()
print("=" * 60)
print(" MINDI 1.5 β€” Model Architecture Info")
print("=" * 60)
print(f" Base model: {self.model_name}")
print(f" Device: {self.device}")
print(f" Dtype: {self.torch_dtype}")
print(f" LoRA active: {self.peft_model is not None}")
print(f" Total params: {self._fmt_params(info['total'])}")
print(f" Trainable: {self._fmt_params(info['trainable'])} "
f"({info['trainable_pct']:.2f}%)")
print(f" Frozen: {self._fmt_params(info['frozen'])}")
if self.peft_model is not None:
config = self.peft_model.peft_config.get("default")
if config is not None:
print(f" LoRA rank: {config.r}")
print(f" LoRA alpha: {config.lora_alpha}")
print(f" LoRA dropout: {config.lora_dropout}")
print(f" Target modules: {config.target_modules}")
print("=" * 60)
print()
def save_lora(self, path: Optional[Path] = None) -> Path:
"""
Save LoRA adapter weights to disk.
Args:
path: Directory to save to. Defaults to checkpoints/lora.
Returns:
Path where weights were saved.
"""
if self.peft_model is None:
raise RuntimeError("No LoRA adapter to save. Call apply_lora() first.")
save_path = Path(path) if path else Path("./checkpoints/lora")
save_path.mkdir(parents=True, exist_ok=True)
self.peft_model.save_pretrained(str(save_path))
print(f"[MINDIArchitecture] LoRA saved to {save_path}")
return save_path
def load_lora(self, path: Path) -> PeftModel:
"""
Load LoRA adapter weights from disk.
Args:
path: Directory containing saved adapter weights.
Returns:
The PEFT-wrapped model with loaded adapter.
"""
path = Path(path)
if not path.exists():
raise FileNotFoundError(f"LoRA adapter not found: {path}")
if self.model is None:
raise RuntimeError("Base model not loaded.")
self.peft_model = PeftModel.from_pretrained(
self.model, str(path)
)
print(f"[MINDIArchitecture] LoRA loaded from {path}")
return self.peft_model
def resize_embeddings(self, new_vocab_size: int) -> None:
"""Resize model embeddings for new special tokens."""
model = self.peft_model or self.model
if model is None:
raise RuntimeError("No model loaded.")
old_size = model.get_input_embeddings().weight.shape[0]
if new_vocab_size != old_size:
model.resize_token_embeddings(new_vocab_size)
print(f"[MINDIArchitecture] Resized embeddings: {old_size} β†’ {new_vocab_size}")
def get_model(self) -> AutoModelForCausalLM | PeftModel:
"""Return the active model (PEFT if LoRA applied, else base)."""
model = self.peft_model or self.model
if model is None:
raise RuntimeError("No model loaded.")
return model
# ── helpers ───────────────────────────────────────────────────
def _total_params(self) -> int:
model = self.peft_model or self.model
if model is None:
return 0
return sum(p.numel() for p in model.parameters())
@staticmethod
def _fmt_params(n: int) -> str:
if n >= 1_000_000_000:
return f"{n / 1_000_000_000:.2f}B"
if n >= 1_000_000:
return f"{n / 1_000_000:.2f}M"
if n >= 1_000:
return f"{n / 1_000:.1f}K"
return str(n)
# ── Test block ────────────────────────────────────────────────────────
if __name__ == "__main__":
print("=" * 60)
print(" MINDI 1.5 β€” Architecture Test")
print("=" * 60)
print()
# 1. Load base model
arch = MINDIArchitecture(
model_name="Qwen/Qwen2.5-Coder-7B-Instruct",
)
# 2. Apply LoRA
peft_model = arch.apply_lora(
r=64,
lora_alpha=128,
lora_dropout=0.05,
)
# 3. Print full info
arch.print_model_info()
# 4. Verify trainable params
info = arch.get_trainable_params()
assert info["trainable"] > 0, "No trainable parameters!"
assert info["frozen"] > info["trainable"], "More trainable than frozen β€” LoRA may not be applied!"
# 5. Verify LoRA modules exist
lora_modules = [name for name, _ in peft_model.named_parameters() if "lora_" in name]
print(f" LoRA modules found: {len(lora_modules)}")
assert len(lora_modules) > 0, "No LoRA modules found!"
# 6. Quick forward pass test (small input)
print("\n Running forward pass test ...")
test_input = arch.tokenizer("Hello MINDI!", return_tensors="pt")
test_input = {k: v.to(arch.device) for k, v in test_input.items()}
with torch.no_grad():
output = peft_model(**test_input)
print(f" Output logits shape: {output.logits.shape}")
print(f" Loss: {output.loss}")
print("\n βœ“ All architecture tests passed!")
print("=" * 60)