WYRM staging modules (synthase, pup, sla2)

3f24c41 verified 2 months ago

8.08 kB

	#!/usr/bin/env python3
	"""
	GLADIUS — Deploy Synthase into Omega Kernel

	This script:
	1. Loads the Omega checkpoint (step 1000 or latest)
	2. Builds the GladiusKernel
	3. Loads checkpoint weights
	4. Runs upgrade_kernel_to_synthase()
	5. Saves the upgraded checkpoint (preserving optimizer state structure)
	6. Verifies by running a forward pass

	Usage (run on Victus from gladius-training dir):
	python deploy_synthase.py --checkpoint runs/omega/omega_step_1000.pt --output runs/omega/synthase_step_1000.pt

	Authors: Ali A. Shakil, Ava Shakil
	Date: March 27, 2026
	"""

	import sys
	import os
	import json
	import time
	import argparse
	from pathlib import Path

	import torch
	import torch.nn as nn

	# Setup paths (same as train_omega.py)
	script_dir = Path(__file__).resolve().parent
	gladius_dir = Path(r'C:\Users\ali\gladius-training')
	staging_dir = gladius_dir / 'staging'
	omega_dir = gladius_dir / 'omega'

	for p in [str(staging_dir), str(omega_dir), str(gladius_dir), str(script_dir)]:
	if p not in sys.path:
	sys.path.insert(0, p)

	from kernel.config import KernelConfig
	from kernel.kernel import GladiusKernel
	from synthase_attention import SynthaseDepthAttention, DepthCacheBuilder
	from synthase_layer import SynthaseTransformerLayer
	from synthase_surgery import upgrade_kernel_to_synthase, get_synthase_diagnostics

	try:
	from omega_config import OmegaConfig, omega_rtx2050_config
	except ImportError:
	sys.path.insert(0, str(omega_dir))
	from omega_config import OmegaConfig, omega_rtx2050_config


	def build_kernel_config(omega_cfg):
	"""Build KernelConfig from OmegaConfig (same as train_omega.py)."""
	return KernelConfig(
	hidden_dim=omega_cfg.hidden_dim,
	num_layers=omega_cfg.num_layers,
	num_heads=omega_cfg.num_heads,
	head_dim=omega_cfg.head_dim,
	ffn_dim=omega_cfg.ffn_dim,
	vocab_size=omega_cfg.new_vocab_size,
	max_seq_len=omega_cfg.max_seq_len,
	hot_memory_slots=omega_cfg.hot_memory_slots,
	warm_rank=omega_cfg.warm_rank,
	time_dim=omega_cfg.time_dim,
	time_num_frequencies=omega_cfg.time_num_frequencies,
	time_max_events=omega_cfg.time_max_events,
	cognition_state_dim=omega_cfg.cognition_state_dim,
	cognition_modes=omega_cfg.cognition_modes,
	cognition_prompt_types=omega_cfg.cognition_prompt_types,
	register_dim=omega_cfg.register_dim,
	intent_dim=omega_cfg.intent_dim,
	max_tools=omega_cfg.max_tools,
	num_specialists=omega_cfg.num_specialists,
	router_top_k=omega_cfg.router_top_k,
	attention_sparse_budget=omega_cfg.attention_sparse_budget,
	specialist_residual_scale=omega_cfg.specialist_residual_scale,
	)


	def main():
	parser = argparse.ArgumentParser(description='Deploy Synthase into GLADIUS Omega')
	parser.add_argument('--checkpoint', type=str, required=True,
	help='Omega checkpoint to upgrade (e.g. runs/omega/omega_step_1000.pt)')
	parser.add_argument('--output', type=str, default=None,
	help='Output path for upgraded checkpoint (default: auto-name)')
	parser.add_argument('--bottleneck', action='store_true',
	help='Use bottleneck variant (41% fewer depth params)')
	parser.add_argument('--depth-k', type=int, default=32,
	help='Positions per layer in depth cache (default: 32)')
	parser.add_argument('--depth-kv-heads', type=int, default=4,
	help='Depth KV heads for GQA (default: 4)')
	parser.add_argument('--device', type=str, default='cpu',
	help='Device for surgery (cpu recommended — saves VRAM for training)')
	args = parser.parse_args()

	device = torch.device(args.device)
	print(f"\n{'='*60}")
	print(f" GLADIUS SYNTHASE DEPLOYMENT")
	print(f" ATP Synthase Depth Attention → Omega Kernel")
	print(f"{'='*60}\n")

	# 1. Build config
	omega_cfg = omega_rtx2050_config()
	kernel_config = build_kernel_config(omega_cfg)

	# 2. Build fresh model
	print("[1/5] Building Omega kernel...")
	model = GladiusKernel(kernel_config).to(device)
	total_before = sum(p.numel() for p in model.parameters())
	print(f" Params (pre-synthase): {total_before:,}")

	# 3. Load checkpoint
	print(f"\n[2/5] Loading checkpoint: {args.checkpoint}")
	cp = torch.load(args.checkpoint, map_location='cpu', weights_only=False)

	if 'model_state_dict' in cp:
	model.load_state_dict(cp['model_state_dict'], strict=False)
	step = cp.get('step', 0)
	best_loss = cp.get('best_loss', float('inf'))
	print(f" Step: {step}, Best loss: {best_loss:.4f}")
	else:
	model.load_state_dict(cp, strict=False)
	step = 0
	best_loss = float('inf')
	print(" Raw state dict (no metadata)")

	# 4. Upgrade to Synthase
	print(f"\n[3/5] Performing Synthase surgery...")
	model = upgrade_kernel_to_synthase(
	model,
	num_depth_kv_heads=args.depth_kv_heads,
	depth_k=args.depth_k,
	use_bottleneck=args.bottleneck,
	init_from_backbone=True,
	)
	total_after = sum(p.numel() for p in model.parameters())
	delta = total_after - total_before
	print(f" Params added: {delta:,} ({delta/total_before*100:.1f}% overhead)")

	# 5. Verify with forward pass
	print(f"\n[4/5] Verification forward pass...")
	model.eval()
	with torch.no_grad():
	test_ids = torch.randint(0, omega_cfg.new_vocab_size, (1, 64), device=device)
	try:
	result = model(test_ids)
	logits = result['logits']
	print(f" Output shape: {logits.shape}")
	print(f" Logits range: [{logits.min().item():.4f}, {logits.max().item():.4f}]")

	# Check depth diagnostics
	diag = get_synthase_diagnostics(model)
	for k, v in sorted(diag.items()):
	if 'scale' in k:
	print(f" {k}: {v:.4f}")

	# Check balance_loss returned
	if 'balance_loss' in result:
	print(f" balance_loss: {result['balance_loss'].item():.6f}")
	if 'router_indices' in result and result['router_indices'] is not None:
	print(f" router_indices: {result['router_indices']}")

	print(" Forward pass: OK")
	except Exception as e:
	print(f" Forward pass FAILED: {e}")
	import traceback
	traceback.print_exc()
	return 1

	# 6. Save upgraded checkpoint
	output_path = args.output
	if output_path is None:
	base = Path(args.checkpoint)
	output_path = str(base.parent / f"synthase_{base.name}")

	print(f"\n[5/5] Saving upgraded checkpoint: {output_path}")

	save_dict = {
	'model_state_dict': model.state_dict(),
	'step': step,
	'best_loss': best_loss,
	'phase': cp.get('phase', 'unknown'),
	'synthase': True,
	'synthase_config': {
	'depth_k': args.depth_k,
	'num_depth_kv_heads': args.depth_kv_heads,
	'use_bottleneck': args.bottleneck,
	'params_added': delta,
	},
	}

	# NOTE: We intentionally do NOT copy optimizer state.
	# The optimizer param groups will change (new depth params need their own group).
	# train_omega.py will create a fresh optimizer with the right groups.
	# The LR warmup will re-ramp from the current step.

	torch.save(save_dict, output_path)
	size_mb = os.path.getsize(output_path) / 1024 / 1024
	print(f" Size: {size_mb:.1f} MB")

	print(f"\n{'='*60}")
	print(f" DEPLOYMENT COMPLETE")
	print(f" ")
	print(f" Next: Resume training with:")
	print(f" python train_omega.py --resume {output_path}")
	print(f" ")
	print(f" The optimizer will be rebuilt with depth params in a new group.")
	print(f" LR scheduler will continue from step {step}.")
	print(f"{'='*60}\n")
	return 0


	if __name__ == '__main__':
	sys.exit(main())