gladius-v2-kernel / staging /synthase /deploy_synthase.py
amuzetnoM's picture
WYRM staging modules (synthase, pup, sla2)
3f24c41 verified
#!/usr/bin/env python3
"""
GLADIUS — Deploy Synthase into Omega Kernel
This script:
1. Loads the Omega checkpoint (step 1000 or latest)
2. Builds the GladiusKernel
3. Loads checkpoint weights
4. Runs upgrade_kernel_to_synthase()
5. Saves the upgraded checkpoint (preserving optimizer state structure)
6. Verifies by running a forward pass
Usage (run on Victus from gladius-training dir):
python deploy_synthase.py --checkpoint runs/omega/omega_step_1000.pt --output runs/omega/synthase_step_1000.pt
Authors: Ali A. Shakil, Ava Shakil
Date: March 27, 2026
"""
import sys
import os
import json
import time
import argparse
from pathlib import Path
import torch
import torch.nn as nn
# Setup paths (same as train_omega.py)
script_dir = Path(__file__).resolve().parent
gladius_dir = Path(r'C:\Users\ali\gladius-training')
staging_dir = gladius_dir / 'staging'
omega_dir = gladius_dir / 'omega'
for p in [str(staging_dir), str(omega_dir), str(gladius_dir), str(script_dir)]:
if p not in sys.path:
sys.path.insert(0, p)
from kernel.config import KernelConfig
from kernel.kernel import GladiusKernel
from synthase_attention import SynthaseDepthAttention, DepthCacheBuilder
from synthase_layer import SynthaseTransformerLayer
from synthase_surgery import upgrade_kernel_to_synthase, get_synthase_diagnostics
try:
from omega_config import OmegaConfig, omega_rtx2050_config
except ImportError:
sys.path.insert(0, str(omega_dir))
from omega_config import OmegaConfig, omega_rtx2050_config
def build_kernel_config(omega_cfg):
"""Build KernelConfig from OmegaConfig (same as train_omega.py)."""
return KernelConfig(
hidden_dim=omega_cfg.hidden_dim,
num_layers=omega_cfg.num_layers,
num_heads=omega_cfg.num_heads,
head_dim=omega_cfg.head_dim,
ffn_dim=omega_cfg.ffn_dim,
vocab_size=omega_cfg.new_vocab_size,
max_seq_len=omega_cfg.max_seq_len,
hot_memory_slots=omega_cfg.hot_memory_slots,
warm_rank=omega_cfg.warm_rank,
time_dim=omega_cfg.time_dim,
time_num_frequencies=omega_cfg.time_num_frequencies,
time_max_events=omega_cfg.time_max_events,
cognition_state_dim=omega_cfg.cognition_state_dim,
cognition_modes=omega_cfg.cognition_modes,
cognition_prompt_types=omega_cfg.cognition_prompt_types,
register_dim=omega_cfg.register_dim,
intent_dim=omega_cfg.intent_dim,
max_tools=omega_cfg.max_tools,
num_specialists=omega_cfg.num_specialists,
router_top_k=omega_cfg.router_top_k,
attention_sparse_budget=omega_cfg.attention_sparse_budget,
specialist_residual_scale=omega_cfg.specialist_residual_scale,
)
def main():
parser = argparse.ArgumentParser(description='Deploy Synthase into GLADIUS Omega')
parser.add_argument('--checkpoint', type=str, required=True,
help='Omega checkpoint to upgrade (e.g. runs/omega/omega_step_1000.pt)')
parser.add_argument('--output', type=str, default=None,
help='Output path for upgraded checkpoint (default: auto-name)')
parser.add_argument('--bottleneck', action='store_true',
help='Use bottleneck variant (41% fewer depth params)')
parser.add_argument('--depth-k', type=int, default=32,
help='Positions per layer in depth cache (default: 32)')
parser.add_argument('--depth-kv-heads', type=int, default=4,
help='Depth KV heads for GQA (default: 4)')
parser.add_argument('--device', type=str, default='cpu',
help='Device for surgery (cpu recommended — saves VRAM for training)')
args = parser.parse_args()
device = torch.device(args.device)
print(f"\n{'='*60}")
print(f" GLADIUS SYNTHASE DEPLOYMENT")
print(f" ATP Synthase Depth Attention → Omega Kernel")
print(f"{'='*60}\n")
# 1. Build config
omega_cfg = omega_rtx2050_config()
kernel_config = build_kernel_config(omega_cfg)
# 2. Build fresh model
print("[1/5] Building Omega kernel...")
model = GladiusKernel(kernel_config).to(device)
total_before = sum(p.numel() for p in model.parameters())
print(f" Params (pre-synthase): {total_before:,}")
# 3. Load checkpoint
print(f"\n[2/5] Loading checkpoint: {args.checkpoint}")
cp = torch.load(args.checkpoint, map_location='cpu', weights_only=False)
if 'model_state_dict' in cp:
model.load_state_dict(cp['model_state_dict'], strict=False)
step = cp.get('step', 0)
best_loss = cp.get('best_loss', float('inf'))
print(f" Step: {step}, Best loss: {best_loss:.4f}")
else:
model.load_state_dict(cp, strict=False)
step = 0
best_loss = float('inf')
print(" Raw state dict (no metadata)")
# 4. Upgrade to Synthase
print(f"\n[3/5] Performing Synthase surgery...")
model = upgrade_kernel_to_synthase(
model,
num_depth_kv_heads=args.depth_kv_heads,
depth_k=args.depth_k,
use_bottleneck=args.bottleneck,
init_from_backbone=True,
)
total_after = sum(p.numel() for p in model.parameters())
delta = total_after - total_before
print(f" Params added: {delta:,} ({delta/total_before*100:.1f}% overhead)")
# 5. Verify with forward pass
print(f"\n[4/5] Verification forward pass...")
model.eval()
with torch.no_grad():
test_ids = torch.randint(0, omega_cfg.new_vocab_size, (1, 64), device=device)
try:
result = model(test_ids)
logits = result['logits']
print(f" Output shape: {logits.shape}")
print(f" Logits range: [{logits.min().item():.4f}, {logits.max().item():.4f}]")
# Check depth diagnostics
diag = get_synthase_diagnostics(model)
for k, v in sorted(diag.items()):
if 'scale' in k:
print(f" {k}: {v:.4f}")
# Check balance_loss returned
if 'balance_loss' in result:
print(f" balance_loss: {result['balance_loss'].item():.6f}")
if 'router_indices' in result and result['router_indices'] is not None:
print(f" router_indices: {result['router_indices']}")
print(" Forward pass: OK")
except Exception as e:
print(f" Forward pass FAILED: {e}")
import traceback
traceback.print_exc()
return 1
# 6. Save upgraded checkpoint
output_path = args.output
if output_path is None:
base = Path(args.checkpoint)
output_path = str(base.parent / f"synthase_{base.name}")
print(f"\n[5/5] Saving upgraded checkpoint: {output_path}")
save_dict = {
'model_state_dict': model.state_dict(),
'step': step,
'best_loss': best_loss,
'phase': cp.get('phase', 'unknown'),
'synthase': True,
'synthase_config': {
'depth_k': args.depth_k,
'num_depth_kv_heads': args.depth_kv_heads,
'use_bottleneck': args.bottleneck,
'params_added': delta,
},
}
# NOTE: We intentionally do NOT copy optimizer state.
# The optimizer param groups will change (new depth params need their own group).
# train_omega.py will create a fresh optimizer with the right groups.
# The LR warmup will re-ramp from the current step.
torch.save(save_dict, output_path)
size_mb = os.path.getsize(output_path) / 1024 / 1024
print(f" Size: {size_mb:.1f} MB")
print(f"\n{'='*60}")
print(f" DEPLOYMENT COMPLETE")
print(f" ")
print(f" Next: Resume training with:")
print(f" python train_omega.py --resume {output_path}")
print(f" ")
print(f" The optimizer will be rebuilt with depth params in a new group.")
print(f" LR scheduler will continue from step {step}.")
print(f"{'='*60}\n")
return 0
if __name__ == '__main__':
sys.exit(main())