#!/usr/bin/env python3 """ GLADIUS — Deploy Synthase into Omega Kernel This script: 1. Loads the Omega checkpoint (step 1000 or latest) 2. Builds the GladiusKernel 3. Loads checkpoint weights 4. Runs upgrade_kernel_to_synthase() 5. Saves the upgraded checkpoint (preserving optimizer state structure) 6. Verifies by running a forward pass Usage (run on Victus from gladius-training dir): python deploy_synthase.py --checkpoint runs/omega/omega_step_1000.pt --output runs/omega/synthase_step_1000.pt Authors: Ali A. Shakil, Ava Shakil Date: March 27, 2026 """ import sys import os import json import time import argparse from pathlib import Path import torch import torch.nn as nn # Setup paths (same as train_omega.py) script_dir = Path(__file__).resolve().parent gladius_dir = Path(r'C:\Users\ali\gladius-training') staging_dir = gladius_dir / 'staging' omega_dir = gladius_dir / 'omega' for p in [str(staging_dir), str(omega_dir), str(gladius_dir), str(script_dir)]: if p not in sys.path: sys.path.insert(0, p) from kernel.config import KernelConfig from kernel.kernel import GladiusKernel from synthase_attention import SynthaseDepthAttention, DepthCacheBuilder from synthase_layer import SynthaseTransformerLayer from synthase_surgery import upgrade_kernel_to_synthase, get_synthase_diagnostics try: from omega_config import OmegaConfig, omega_rtx2050_config except ImportError: sys.path.insert(0, str(omega_dir)) from omega_config import OmegaConfig, omega_rtx2050_config def build_kernel_config(omega_cfg): """Build KernelConfig from OmegaConfig (same as train_omega.py).""" return KernelConfig( hidden_dim=omega_cfg.hidden_dim, num_layers=omega_cfg.num_layers, num_heads=omega_cfg.num_heads, head_dim=omega_cfg.head_dim, ffn_dim=omega_cfg.ffn_dim, vocab_size=omega_cfg.new_vocab_size, max_seq_len=omega_cfg.max_seq_len, hot_memory_slots=omega_cfg.hot_memory_slots, warm_rank=omega_cfg.warm_rank, time_dim=omega_cfg.time_dim, time_num_frequencies=omega_cfg.time_num_frequencies, time_max_events=omega_cfg.time_max_events, cognition_state_dim=omega_cfg.cognition_state_dim, cognition_modes=omega_cfg.cognition_modes, cognition_prompt_types=omega_cfg.cognition_prompt_types, register_dim=omega_cfg.register_dim, intent_dim=omega_cfg.intent_dim, max_tools=omega_cfg.max_tools, num_specialists=omega_cfg.num_specialists, router_top_k=omega_cfg.router_top_k, attention_sparse_budget=omega_cfg.attention_sparse_budget, specialist_residual_scale=omega_cfg.specialist_residual_scale, ) def main(): parser = argparse.ArgumentParser(description='Deploy Synthase into GLADIUS Omega') parser.add_argument('--checkpoint', type=str, required=True, help='Omega checkpoint to upgrade (e.g. runs/omega/omega_step_1000.pt)') parser.add_argument('--output', type=str, default=None, help='Output path for upgraded checkpoint (default: auto-name)') parser.add_argument('--bottleneck', action='store_true', help='Use bottleneck variant (41% fewer depth params)') parser.add_argument('--depth-k', type=int, default=32, help='Positions per layer in depth cache (default: 32)') parser.add_argument('--depth-kv-heads', type=int, default=4, help='Depth KV heads for GQA (default: 4)') parser.add_argument('--device', type=str, default='cpu', help='Device for surgery (cpu recommended — saves VRAM for training)') args = parser.parse_args() device = torch.device(args.device) print(f"\n{'='*60}") print(f" GLADIUS SYNTHASE DEPLOYMENT") print(f" ATP Synthase Depth Attention → Omega Kernel") print(f"{'='*60}\n") # 1. Build config omega_cfg = omega_rtx2050_config() kernel_config = build_kernel_config(omega_cfg) # 2. Build fresh model print("[1/5] Building Omega kernel...") model = GladiusKernel(kernel_config).to(device) total_before = sum(p.numel() for p in model.parameters()) print(f" Params (pre-synthase): {total_before:,}") # 3. Load checkpoint print(f"\n[2/5] Loading checkpoint: {args.checkpoint}") cp = torch.load(args.checkpoint, map_location='cpu', weights_only=False) if 'model_state_dict' in cp: model.load_state_dict(cp['model_state_dict'], strict=False) step = cp.get('step', 0) best_loss = cp.get('best_loss', float('inf')) print(f" Step: {step}, Best loss: {best_loss:.4f}") else: model.load_state_dict(cp, strict=False) step = 0 best_loss = float('inf') print(" Raw state dict (no metadata)") # 4. Upgrade to Synthase print(f"\n[3/5] Performing Synthase surgery...") model = upgrade_kernel_to_synthase( model, num_depth_kv_heads=args.depth_kv_heads, depth_k=args.depth_k, use_bottleneck=args.bottleneck, init_from_backbone=True, ) total_after = sum(p.numel() for p in model.parameters()) delta = total_after - total_before print(f" Params added: {delta:,} ({delta/total_before*100:.1f}% overhead)") # 5. Verify with forward pass print(f"\n[4/5] Verification forward pass...") model.eval() with torch.no_grad(): test_ids = torch.randint(0, omega_cfg.new_vocab_size, (1, 64), device=device) try: result = model(test_ids) logits = result['logits'] print(f" Output shape: {logits.shape}") print(f" Logits range: [{logits.min().item():.4f}, {logits.max().item():.4f}]") # Check depth diagnostics diag = get_synthase_diagnostics(model) for k, v in sorted(diag.items()): if 'scale' in k: print(f" {k}: {v:.4f}") # Check balance_loss returned if 'balance_loss' in result: print(f" balance_loss: {result['balance_loss'].item():.6f}") if 'router_indices' in result and result['router_indices'] is not None: print(f" router_indices: {result['router_indices']}") print(" Forward pass: OK") except Exception as e: print(f" Forward pass FAILED: {e}") import traceback traceback.print_exc() return 1 # 6. Save upgraded checkpoint output_path = args.output if output_path is None: base = Path(args.checkpoint) output_path = str(base.parent / f"synthase_{base.name}") print(f"\n[5/5] Saving upgraded checkpoint: {output_path}") save_dict = { 'model_state_dict': model.state_dict(), 'step': step, 'best_loss': best_loss, 'phase': cp.get('phase', 'unknown'), 'synthase': True, 'synthase_config': { 'depth_k': args.depth_k, 'num_depth_kv_heads': args.depth_kv_heads, 'use_bottleneck': args.bottleneck, 'params_added': delta, }, } # NOTE: We intentionally do NOT copy optimizer state. # The optimizer param groups will change (new depth params need their own group). # train_omega.py will create a fresh optimizer with the right groups. # The LR warmup will re-ramp from the current step. torch.save(save_dict, output_path) size_mb = os.path.getsize(output_path) / 1024 / 1024 print(f" Size: {size_mb:.1f} MB") print(f"\n{'='*60}") print(f" DEPLOYMENT COMPLETE") print(f" ") print(f" Next: Resume training with:") print(f" python train_omega.py --resume {output_path}") print(f" ") print(f" The optimizer will be rebuilt with depth params in a new group.") print(f" LR scheduler will continue from step {step}.") print(f"{'='*60}\n") return 0 if __name__ == '__main__': sys.exit(main())