Spaces:
Sleeping
Sleeping
| """ | |
| Simulation engine for deep network signal propagation. | |
| This module simulates how signals propagate through deep residual networks | |
| with different residual mixing strategies: | |
| - baseline: Identity matrices (no mixing, standard residual connections) | |
| - hc: Random unconstrained matrices (Hyper-Connections) | |
| - mhc: Sinkhorn-projected doubly stochastic matrices (Manifold-Constrained HC) | |
| Key insight from the mHC paper: | |
| The COMPOSITE mapping (product of all layer matrices H_L @ H_{L-1} @ ... @ H_0) | |
| is what matters for signal propagation: | |
| - For HC: composite gains explode exponentially (3000x+ at depth 64) | |
| - For mHC: composite gains stay bounded (~1.6x at depth 64) | |
| This happens because doubly stochastic matrices are closed under multiplication. | |
| Author: Subhadip Mitra <contact@subhadipmitra.com> | |
| Based on DeepSeek's mHC paper: https://arxiv.org/abs/2512.24880 | |
| """ | |
| import numpy as np | |
| from typing import Dict, Literal, Optional | |
| from .sinkhorn import sinkhorn_knopp | |
| from .metrics import compute_all_metrics | |
| def generate_residual_matrix( | |
| n: int, | |
| method: Literal['baseline', 'hc', 'mhc'], | |
| sinkhorn_iters: int = 20, | |
| rng: Optional[np.random.Generator] = None | |
| ) -> np.ndarray: | |
| """ | |
| Generate a residual mixing matrix. | |
| Args: | |
| n: Size of square matrix (number of streams) | |
| method: One of: | |
| - 'baseline': Identity matrix (no mixing) | |
| - 'hc': Random matrix with N(0, 1) entries | |
| - 'mhc': Random matrix projected to doubly stochastic via Sinkhorn | |
| sinkhorn_iters: Number of Sinkhorn iterations for mHC method | |
| rng: Random number generator for reproducibility | |
| Returns: | |
| Residual mixing matrix of shape (n, n) | |
| Example: | |
| >>> rng = np.random.default_rng(42) | |
| >>> M = generate_residual_matrix(4, 'mhc', sinkhorn_iters=20, rng=rng) | |
| >>> M.shape | |
| (4, 4) | |
| """ | |
| if rng is None: | |
| rng = np.random.default_rng() | |
| if method == 'baseline': | |
| return np.eye(n) | |
| # Generate random matrix for HC and mHC | |
| M = rng.standard_normal((n, n)) | |
| if method == 'hc': | |
| return M | |
| if method == 'mhc': | |
| # At k=0, return raw random matrix (same as HC) to show explosive behavior | |
| # At k>0, apply Sinkhorn projection to show transition to stability | |
| if sinkhorn_iters == 0: | |
| return M | |
| return sinkhorn_knopp(M, iterations=sinkhorn_iters) | |
| raise ValueError(f"Unknown method: {method}. Expected 'baseline', 'hc', or 'mhc'.") | |
| def simulate_depth( | |
| depth: int, | |
| n: int, | |
| method: Literal['baseline', 'hc', 'mhc'], | |
| sinkhorn_iters: int = 20, | |
| seed: int = 42 | |
| ) -> Dict: | |
| """ | |
| Simulate signal propagation through a deep residual network. | |
| This function generates `depth` residual matrices and computes both | |
| per-layer metrics and cumulative composite metrics at each depth. | |
| The composite mapping at layer l is: | |
| Composite(l) = H_l @ H_{l-1} @ ... @ H_1 @ H_0 | |
| This represents the total transformation applied to signals from | |
| the input to layer l. | |
| Args: | |
| depth: Number of layers to simulate | |
| n: Matrix size (number of streams in multi-stream residual) | |
| method: Residual mixing strategy ('baseline', 'hc', or 'mhc') | |
| sinkhorn_iters: Number of Sinkhorn iterations for mHC | |
| seed: Random seed for reproducibility | |
| Returns: | |
| Dict containing: | |
| - 'method': str - the method used | |
| - 'depth': int - number of layers | |
| - 'n': int - matrix size | |
| - 'sinkhorn_iters': int - Sinkhorn iterations used | |
| - 'seed': int - random seed used | |
| - 'per_layer': list of dicts with metrics for each layer's matrix | |
| - 'composite': list of dicts with metrics for composite at each depth | |
| Example: | |
| >>> result = simulate_depth(64, 4, 'mhc', seed=42) | |
| >>> result['composite'][-1]['forward_gain'] < 5 | |
| True | |
| """ | |
| rng = np.random.default_rng(seed) | |
| per_layer = [] | |
| composite_metrics = [] | |
| composite = np.eye(n) # Start with identity | |
| for layer_idx in range(depth): | |
| # Generate this layer's residual matrix | |
| H = generate_residual_matrix(n, method, sinkhorn_iters, rng) | |
| # Store per-layer metrics | |
| per_layer.append({ | |
| 'layer': layer_idx, | |
| **compute_all_metrics(H) | |
| }) | |
| # Update composite: multiply from the left | |
| # Composite(l) = H_l @ Composite(l-1) = H_l @ H_{l-1} @ ... @ H_0 | |
| composite = H @ composite | |
| # Store composite metrics at this depth | |
| composite_metrics.append({ | |
| 'upto_layer': layer_idx, | |
| **compute_all_metrics(composite) | |
| }) | |
| return { | |
| 'method': method, | |
| 'depth': depth, | |
| 'n': n, | |
| 'sinkhorn_iters': sinkhorn_iters, | |
| 'seed': seed, | |
| 'per_layer': per_layer, | |
| 'composite': composite_metrics, | |
| } | |
| def run_comparison( | |
| depth: int = 64, | |
| n: int = 4, | |
| sinkhorn_iters: int = 20, | |
| seed: int = 42 | |
| ) -> Dict: | |
| """ | |
| Run simulation for all three methods and return comparison. | |
| This is the main entry point for generating comparison data. | |
| It runs simulate_depth for baseline, HC, and mHC with the same | |
| parameters, making direct comparison possible. | |
| Args: | |
| depth: Number of layers to simulate | |
| n: Matrix size (number of streams) | |
| sinkhorn_iters: Number of Sinkhorn iterations for mHC | |
| seed: Random seed (same seed used for all methods for fair comparison) | |
| Returns: | |
| Dict with keys 'baseline', 'hc', 'mhc' containing simulation results | |
| Example: | |
| >>> results = run_comparison(depth=64, n=4, seed=42) | |
| >>> # Baseline should stay at 1 | |
| >>> results['baseline']['composite'][-1]['forward_gain'] | |
| 1.0 | |
| >>> # HC should explode | |
| >>> results['hc']['composite'][-1]['forward_gain'] > 10 | |
| True | |
| >>> # mHC should stay bounded | |
| >>> results['mhc']['composite'][-1]['forward_gain'] < 5 | |
| True | |
| """ | |
| return { | |
| 'baseline': simulate_depth(depth, n, 'baseline', sinkhorn_iters, seed), | |
| 'hc': simulate_depth(depth, n, 'hc', sinkhorn_iters, seed), | |
| 'mhc': simulate_depth(depth, n, 'mhc', sinkhorn_iters, seed), | |
| } | |
| if __name__ == "__main__": | |
| # Quick demo when run directly | |
| print("Running mHC simulation comparison...") | |
| print("=" * 50) | |
| results = run_comparison(depth=64, n=4, seed=42) | |
| for method in ['baseline', 'hc', 'mhc']: | |
| final_composite = results[method]['composite'][-1] | |
| print(f"\n{method.upper()}:") | |
| print(f" Final composite forward_gain: {final_composite['forward_gain']:.4f}") | |
| print(f" Final composite backward_gain: {final_composite['backward_gain']:.4f}") | |
| print(f" Final composite spectral_norm: {final_composite['spectral_norm']:.4f}") | |