""" Component 4 verification script. This script: - Builds model from config. - Runs a small forward pass. - Prints live VRAM usage at each stage. """ from __future__ import annotations import argparse import sys from pathlib import Path from typing import Any, Dict import torch import yaml # Ensure src imports work from project root. PROJECT_ROOT = Path(__file__).resolve().parents[1] if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) from src.model_architecture.code_transformer import ( # noqa: E402 CodeTransformerLM, ModelConfig, get_model_presets, ) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Verify Component 4 model load and VRAM usage.") parser.add_argument( "--config", default="configs/component4_model_config.yaml", help="Path to model YAML config.", ) parser.add_argument("--batch_size", type=int, default=1, help="Batch size for forward test.") parser.add_argument("--seq_len", type=int, default=256, help="Sequence length for forward test.") return parser.parse_args() def load_yaml(path: Path) -> Dict[str, Any]: if not path.exists(): raise FileNotFoundError(f"Model config not found: {path}") with path.open("r", encoding="utf-8") as f: data = yaml.safe_load(f) if not isinstance(data, dict): raise ValueError("Invalid YAML format in model config.") return data def build_config(cfg_data: Dict[str, Any]) -> ModelConfig: preset = cfg_data.get("preset") model_cfg = cfg_data.get("model", {}) if not isinstance(model_cfg, dict): raise ValueError("Config key 'model' must be an object.") if preset: presets = get_model_presets() if preset not in presets: raise ValueError(f"Unknown preset '{preset}'.") base = presets[preset] merged = base.__dict__.copy() merged.update(model_cfg) return ModelConfig(**merged) return ModelConfig(**model_cfg) def gpu_memory_report(stage: str) -> None: if not torch.cuda.is_available(): print(f"[{stage}] CUDA not available") return allocated = torch.cuda.memory_allocated() / (1024**3) reserved = torch.cuda.memory_reserved() / (1024**3) max_alloc = torch.cuda.max_memory_allocated() / (1024**3) print( f"[{stage}] VRAM allocated={allocated:.2f} GB " f"reserved={reserved:.2f} GB max_allocated={max_alloc:.2f} GB" ) def main() -> None: args = parse_args() try: cfg_data = load_yaml(Path(args.config)) model_cfg = build_config(cfg_data) if args.seq_len > model_cfg.max_seq_len: raise ValueError( f"seq_len={args.seq_len} exceeds max_seq_len={model_cfg.max_seq_len} in config." ) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") use_fp16 = device.type == "cuda" if device.type == "cuda": torch.cuda.empty_cache() torch.cuda.reset_peak_memory_stats() print(f"Detected GPU: {torch.cuda.get_device_name(0)}") gpu_memory_report("start") else: print("CUDA not available. Running verification on CPU.") model = CodeTransformerLM(model_cfg) print(f"Model parameters: {model.estimate_num_parameters():,}") if use_fp16: model = model.half() model.to(device) model.eval() gpu_memory_report("after_model_load") input_ids = torch.randint( low=0, high=model_cfg.vocab_size, size=(args.batch_size, args.seq_len), dtype=torch.long, device=device, ) gpu_memory_report("after_input_alloc") with torch.no_grad(): out = model(input_ids=input_ids) logits = out["logits"] gpu_memory_report("after_forward") print(f"Forward output shape: {tuple(logits.shape)}") print("Component 4 verification passed.") except Exception as exc: print("Component 4 verification failed.") print(f"What went wrong: {exc}") print("Fix suggestion: reduce seq_len or check CUDA/PyTorch installation.") raise SystemExit(1) if __name__ == "__main__": main()