mindi-backup / scripts /verify_component4_model.py

Mindigenous

Initial full project backup with Git LFS

53f0cc2 9 days ago

4.32 kB

	"""
	Component 4 verification script.

	This script:
	- Builds model from config.
	- Runs a small forward pass.
	- Prints live VRAM usage at each stage.
	"""

	from __future__ import annotations

	import argparse
	import sys
	from pathlib import Path
	from typing import Any, Dict

	import torch
	import yaml

	# Ensure src imports work from project root.
	PROJECT_ROOT = Path(__file__).resolve().parents[1]
	if str(PROJECT_ROOT) not in sys.path:
	sys.path.insert(0, str(PROJECT_ROOT))

	from src.model_architecture.code_transformer import ( # noqa: E402
	CodeTransformerLM,
	ModelConfig,
	get_model_presets,
	)


	def parse_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser(description="Verify Component 4 model load and VRAM usage.")
	parser.add_argument(
	"--config",
	default="configs/component4_model_config.yaml",
	help="Path to model YAML config.",
	)
	parser.add_argument("--batch_size", type=int, default=1, help="Batch size for forward test.")
	parser.add_argument("--seq_len", type=int, default=256, help="Sequence length for forward test.")
	return parser.parse_args()


	def load_yaml(path: Path) -> Dict[str, Any]:
	if not path.exists():
	raise FileNotFoundError(f"Model config not found: {path}")
	with path.open("r", encoding="utf-8") as f:
	data = yaml.safe_load(f)
	if not isinstance(data, dict):
	raise ValueError("Invalid YAML format in model config.")
	return data


	def build_config(cfg_data: Dict[str, Any]) -> ModelConfig:
	preset = cfg_data.get("preset")
	model_cfg = cfg_data.get("model", {})
	if not isinstance(model_cfg, dict):
	raise ValueError("Config key 'model' must be an object.")

	if preset:
	presets = get_model_presets()
	if preset not in presets:
	raise ValueError(f"Unknown preset '{preset}'.")
	base = presets[preset]
	merged = base.__dict__.copy()
	merged.update(model_cfg)
	return ModelConfig(**merged)
	return ModelConfig(**model_cfg)


	def gpu_memory_report(stage: str) -> None:
	if not torch.cuda.is_available():
	print(f"[{stage}] CUDA not available")
	return
	allocated = torch.cuda.memory_allocated() / (1024**3)
	reserved = torch.cuda.memory_reserved() / (1024**3)
	max_alloc = torch.cuda.max_memory_allocated() / (1024**3)
	print(
	f"[{stage}] VRAM allocated={allocated:.2f} GB "
	f"reserved={reserved:.2f} GB max_allocated={max_alloc:.2f} GB"
	)


	def main() -> None:
	args = parse_args()
	try:
	cfg_data = load_yaml(Path(args.config))
	model_cfg = build_config(cfg_data)
	if args.seq_len > model_cfg.max_seq_len:
	raise ValueError(
	f"seq_len={args.seq_len} exceeds max_seq_len={model_cfg.max_seq_len} in config."
	)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	use_fp16 = device.type == "cuda"
	if device.type == "cuda":
	torch.cuda.empty_cache()
	torch.cuda.reset_peak_memory_stats()
	print(f"Detected GPU: {torch.cuda.get_device_name(0)}")
	gpu_memory_report("start")
	else:
	print("CUDA not available. Running verification on CPU.")

	model = CodeTransformerLM(model_cfg)
	print(f"Model parameters: {model.estimate_num_parameters():,}")

	if use_fp16:
	model = model.half()
	model.to(device)
	model.eval()
	gpu_memory_report("after_model_load")

	input_ids = torch.randint(
	low=0,
	high=model_cfg.vocab_size,
	size=(args.batch_size, args.seq_len),
	dtype=torch.long,
	device=device,
	)
	gpu_memory_report("after_input_alloc")

	with torch.no_grad():
	out = model(input_ids=input_ids)
	logits = out["logits"]
	gpu_memory_report("after_forward")

	print(f"Forward output shape: {tuple(logits.shape)}")
	print("Component 4 verification passed.")
	except Exception as exc:
	print("Component 4 verification failed.")
	print(f"What went wrong: {exc}")
	print("Fix suggestion: reduce seq_len or check CUDA/PyTorch installation.")
	raise SystemExit(1)


	if __name__ == "__main__":
	main()