Spaces:

dylan-marimo-io
/

mhc-stability

Sleeping

App Files Files Community

mhc-stability / mhc /simulation.py

dylan-marimo-io

Upload 7 files

8ab7e68 verified about 2 months ago

raw

history blame contribute delete

6.89 kB

	"""
	Simulation engine for deep network signal propagation.

	This module simulates how signals propagate through deep residual networks
	with different residual mixing strategies:

	- baseline: Identity matrices (no mixing, standard residual connections)
	- hc: Random unconstrained matrices (Hyper-Connections)
	- mhc: Sinkhorn-projected doubly stochastic matrices (Manifold-Constrained HC)

	Key insight from the mHC paper:
	The COMPOSITE mapping (product of all layer matrices H_L @ H_{L-1} @ ... @ H_0)
	is what matters for signal propagation:
	- For HC: composite gains explode exponentially (3000x+ at depth 64)
	- For mHC: composite gains stay bounded (~1.6x at depth 64)

	This happens because doubly stochastic matrices are closed under multiplication.

	Author: Subhadip Mitra <contact@subhadipmitra.com>
	Based on DeepSeek's mHC paper: https://arxiv.org/abs/2512.24880
	"""

	import numpy as np
	from typing import Dict, Literal, Optional

	from .sinkhorn import sinkhorn_knopp
	from .metrics import compute_all_metrics


	def generate_residual_matrix(
	n: int,
	method: Literal['baseline', 'hc', 'mhc'],
	sinkhorn_iters: int = 20,
	rng: Optional[np.random.Generator] = None
	) -> np.ndarray:
	"""
	Generate a residual mixing matrix.

	Args:
	n: Size of square matrix (number of streams)
	method: One of:
	- 'baseline': Identity matrix (no mixing)
	- 'hc': Random matrix with N(0, 1) entries
	- 'mhc': Random matrix projected to doubly stochastic via Sinkhorn
	sinkhorn_iters: Number of Sinkhorn iterations for mHC method
	rng: Random number generator for reproducibility

	Returns:
	Residual mixing matrix of shape (n, n)

	Example:
	>>> rng = np.random.default_rng(42)
	>>> M = generate_residual_matrix(4, 'mhc', sinkhorn_iters=20, rng=rng)
	>>> M.shape
	(4, 4)
	"""
	if rng is None:
	rng = np.random.default_rng()

	if method == 'baseline':
	return np.eye(n)

	# Generate random matrix for HC and mHC
	M = rng.standard_normal((n, n))

	if method == 'hc':
	return M

	if method == 'mhc':
	# At k=0, return raw random matrix (same as HC) to show explosive behavior
	# At k>0, apply Sinkhorn projection to show transition to stability
	if sinkhorn_iters == 0:
	return M
	return sinkhorn_knopp(M, iterations=sinkhorn_iters)

	raise ValueError(f"Unknown method: {method}. Expected 'baseline', 'hc', or 'mhc'.")


	def simulate_depth(
	depth: int,
	n: int,
	method: Literal['baseline', 'hc', 'mhc'],
	sinkhorn_iters: int = 20,
	seed: int = 42
	) -> Dict:
	"""
	Simulate signal propagation through a deep residual network.

	This function generates `depth` residual matrices and computes both
	per-layer metrics and cumulative composite metrics at each depth.

	The composite mapping at layer l is:
	Composite(l) = H_l @ H_{l-1} @ ... @ H_1 @ H_0

	This represents the total transformation applied to signals from
	the input to layer l.

	Args:
	depth: Number of layers to simulate
	n: Matrix size (number of streams in multi-stream residual)
	method: Residual mixing strategy ('baseline', 'hc', or 'mhc')
	sinkhorn_iters: Number of Sinkhorn iterations for mHC
	seed: Random seed for reproducibility

	Returns:
	Dict containing:
	- 'method': str - the method used
	- 'depth': int - number of layers
	- 'n': int - matrix size
	- 'sinkhorn_iters': int - Sinkhorn iterations used
	- 'seed': int - random seed used
	- 'per_layer': list of dicts with metrics for each layer's matrix
	- 'composite': list of dicts with metrics for composite at each depth

	Example:
	>>> result = simulate_depth(64, 4, 'mhc', seed=42)
	>>> result['composite'][-1]['forward_gain'] < 5
	True
	"""
	rng = np.random.default_rng(seed)

	per_layer = []
	composite_metrics = []

	composite = np.eye(n) # Start with identity

	for layer_idx in range(depth):
	# Generate this layer's residual matrix
	H = generate_residual_matrix(n, method, sinkhorn_iters, rng)

	# Store per-layer metrics
	per_layer.append({
	'layer': layer_idx,
	**compute_all_metrics(H)
	})

	# Update composite: multiply from the left
	# Composite(l) = H_l @ Composite(l-1) = H_l @ H_{l-1} @ ... @ H_0
	composite = H @ composite

	# Store composite metrics at this depth
	composite_metrics.append({
	'upto_layer': layer_idx,
	**compute_all_metrics(composite)
	})

	return {
	'method': method,
	'depth': depth,
	'n': n,
	'sinkhorn_iters': sinkhorn_iters,
	'seed': seed,
	'per_layer': per_layer,
	'composite': composite_metrics,
	}


	def run_comparison(
	depth: int = 64,
	n: int = 4,
	sinkhorn_iters: int = 20,
	seed: int = 42
	) -> Dict:
	"""
	Run simulation for all three methods and return comparison.

	This is the main entry point for generating comparison data.
	It runs simulate_depth for baseline, HC, and mHC with the same
	parameters, making direct comparison possible.

	Args:
	depth: Number of layers to simulate
	n: Matrix size (number of streams)
	sinkhorn_iters: Number of Sinkhorn iterations for mHC
	seed: Random seed (same seed used for all methods for fair comparison)

	Returns:
	Dict with keys 'baseline', 'hc', 'mhc' containing simulation results

	Example:
	>>> results = run_comparison(depth=64, n=4, seed=42)
	>>> # Baseline should stay at 1
	>>> results['baseline']['composite'][-1]['forward_gain']
	1.0
	>>> # HC should explode
	>>> results['hc']['composite'][-1]['forward_gain'] > 10
	True
	>>> # mHC should stay bounded
	>>> results['mhc']['composite'][-1]['forward_gain'] < 5
	True
	"""
	return {
	'baseline': simulate_depth(depth, n, 'baseline', sinkhorn_iters, seed),
	'hc': simulate_depth(depth, n, 'hc', sinkhorn_iters, seed),
	'mhc': simulate_depth(depth, n, 'mhc', sinkhorn_iters, seed),
	}


	if __name__ == "__main__":
	# Quick demo when run directly
	print("Running mHC simulation comparison...")
	print("=" * 50)

	results = run_comparison(depth=64, n=4, seed=42)

	for method in ['baseline', 'hc', 'mhc']:
	final_composite = results[method]['composite'][-1]
	print(f"\n{method.upper()}:")
	print(f" Final composite forward_gain: {final_composite['forward_gain']:.4f}")
	print(f" Final composite backward_gain: {final_composite['backward_gain']:.4f}")
	print(f" Final composite spectral_norm: {final_composite['spectral_norm']:.4f}")