import numpy as np from mpmath import mp, mpf, mpc, power, fabs, nstr, e as euler_e, arg as mparg mp.dps = 30 def F_mp(z, N=200): s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0) for n in range(1, N+1): term = power(z, n) / power(n, n) s += term if fabs(term) < mpf('1e-30'): break return s def Fprime_mp(z, N=200): s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0) for n in range(1, N+1): term = power(z, n-1) / power(n, n-1) s += term if fabs(term) < mpf('1e-30'): break return s print("--- Complex gradient |F'/F| on grid near roots ---") print(f"{'Re':>6s} {'Im':>6s} {'|F/F|':>10s} {'arg':>10s} {'Behavior':>12s}") for re in [-3.0, -4.0, -5.0, -6.0, -7.0]: for im in [0, 15, 30, 50, 70, 85]: zv = mpc(re, im) Fv = F_mp(zv) Fpv = Fprime_mp(zv) if fabs(Fv) > mpf('1e-10'): ratio = Fpv / Fv mag = float(fabs(ratio)) phase = float(mparg(ratio)) if mag > 1.1: behavior = "EXPLODING" elif mag < 0.9: behavior = "VANISHING" else: behavior = "~STABLE" print(f"{re:6.1f} {im:6.0f} {mag:10.4f} {phase:10.4f} {behavior:>12s}") print("\n--- Asymptotic alpha in F'/F ~ 1/e + alpha/z ---") for z_val in [5, 10, 20, 50, 100, 200]: zv = mpf(z_val) ratio = Fprime_mp(zv) / F_mp(zv) alpha = zv * (ratio - mpf(1)/euler_e) print(f" z={z_val:4d}: F'/F = {nstr(ratio, 10):>14s}, alpha ~ {nstr(alpha, 6):>10s}") # Gradient flow simulation print("\n" + "=" * 65) print("PROPER GRADIENT FLOW SIMULATION (50 layers)") print("=" * 65) np.random.seed(42) for scenario_name, init_z in [ ("z=5.0 (document's choice)", 5.0), ("z=1.36 (critical point)", 1.360372), ("z=0.5 (small positive)", 0.5), ("z=-2.0 (negative)", -2.0), ("Normal(0,1) random", None), ]: cumul_exact = 1.0 cumul_approx = 1.0 cumul_relu = 1.0 cumul_swish = 1.0 for layer in range(50): z = init_z if init_z is not None else np.random.randn() # Exact gradient ratio |F'/F| zv = mpf(z) Fv = F_mp(zv) Fpv = Fprime_mp(zv) if fabs(Fv) > mpf('1e-10'): exact_ratio = float(fabs(Fpv / Fv)) else: exact_ratio = 1.0 approx_ratio = abs(1/float(euler_e) + 1/(2*max(abs(z), 0.1))) relu_grad = 1.0 if z > 0 else 0.0 s = 1 / (1 + np.exp(-z)) swish_grad = abs(s + z * s * (1-s)) cumul_exact *= exact_ratio cumul_approx *= approx_ratio cumul_relu *= relu_grad cumul_swish *= swish_grad print(f"\n {scenario_name}:") print(f" Exact |F'/F|^50 = {cumul_exact:.4e}") print(f" Approx|^F'/F|^50 = {cumul_approx:.4e}") print(f" ReLU prod(grad) = {cumul_relu:.4e}") print(f" Swish prod(grad) = {cumul_swish:.4e}")