| import numpy as np |
| from mpmath import mp, mpf, mpc, power, fabs, nstr, e as euler_e, arg as mparg |
|
|
| mp.dps = 30 |
|
|
| def F_mp(z, N=200): |
| s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0) |
| for n in range(1, N+1): |
| term = power(z, n) / power(n, n) |
| s += term |
| if fabs(term) < mpf('1e-30'): |
| break |
| return s |
|
|
| def Fprime_mp(z, N=200): |
| s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0) |
| for n in range(1, N+1): |
| term = power(z, n-1) / power(n, n-1) |
| s += term |
| if fabs(term) < mpf('1e-30'): |
| break |
| return s |
|
|
| print("--- Complex gradient |F'/F| on grid near roots ---") |
| print(f"{'Re':>6s} {'Im':>6s} {'|F/F|':>10s} {'arg':>10s} {'Behavior':>12s}") |
| for re in [-3.0, -4.0, -5.0, -6.0, -7.0]: |
| for im in [0, 15, 30, 50, 70, 85]: |
| zv = mpc(re, im) |
| Fv = F_mp(zv) |
| Fpv = Fprime_mp(zv) |
| if fabs(Fv) > mpf('1e-10'): |
| ratio = Fpv / Fv |
| mag = float(fabs(ratio)) |
| phase = float(mparg(ratio)) |
| if mag > 1.1: |
| behavior = "EXPLODING" |
| elif mag < 0.9: |
| behavior = "VANISHING" |
| else: |
| behavior = "~STABLE" |
| print(f"{re:6.1f} {im:6.0f} {mag:10.4f} {phase:10.4f} {behavior:>12s}") |
|
|
| print("\n--- Asymptotic alpha in F'/F ~ 1/e + alpha/z ---") |
| for z_val in [5, 10, 20, 50, 100, 200]: |
| zv = mpf(z_val) |
| ratio = Fprime_mp(zv) / F_mp(zv) |
| alpha = zv * (ratio - mpf(1)/euler_e) |
| print(f" z={z_val:4d}: F'/F = {nstr(ratio, 10):>14s}, alpha ~ {nstr(alpha, 6):>10s}") |
|
|
| |
| print("\n" + "=" * 65) |
| print("PROPER GRADIENT FLOW SIMULATION (50 layers)") |
| print("=" * 65) |
|
|
| np.random.seed(42) |
|
|
| for scenario_name, init_z in [ |
| ("z=5.0 (document's choice)", 5.0), |
| ("z=1.36 (critical point)", 1.360372), |
| ("z=0.5 (small positive)", 0.5), |
| ("z=-2.0 (negative)", -2.0), |
| ("Normal(0,1) random", None), |
| ]: |
| cumul_exact = 1.0 |
| cumul_approx = 1.0 |
| cumul_relu = 1.0 |
| cumul_swish = 1.0 |
| |
| for layer in range(50): |
| z = init_z if init_z is not None else np.random.randn() |
| |
| |
| zv = mpf(z) |
| Fv = F_mp(zv) |
| Fpv = Fprime_mp(zv) |
| if fabs(Fv) > mpf('1e-10'): |
| exact_ratio = float(fabs(Fpv / Fv)) |
| else: |
| exact_ratio = 1.0 |
| |
| approx_ratio = abs(1/float(euler_e) + 1/(2*max(abs(z), 0.1))) |
| relu_grad = 1.0 if z > 0 else 0.0 |
| s = 1 / (1 + np.exp(-z)) |
| swish_grad = abs(s + z * s * (1-s)) |
| |
| cumul_exact *= exact_ratio |
| cumul_approx *= approx_ratio |
| cumul_relu *= relu_grad |
| cumul_swish *= swish_grad |
| |
| print(f"\n {scenario_name}:") |
| print(f" Exact |F'/F|^50 = {cumul_exact:.4e}") |
| print(f" Approx|^F'/F|^50 = {cumul_approx:.4e}") |
| print(f" ReLU prod(grad) = {cumul_relu:.4e}") |
| print(f" Swish prod(grad) = {cumul_swish:.4e}") |
|
|
|
|