File size: 2,997 Bytes
ca18949
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
from mpmath import mp, mpf, mpc, power, fabs, nstr, e as euler_e, arg as mparg

mp.dps = 30

def F_mp(z, N=200):
    s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0)
    for n in range(1, N+1):
        term = power(z, n) / power(n, n)
        s += term
        if fabs(term) < mpf('1e-30'):
            break
    return s

def Fprime_mp(z, N=200):
    s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0)
    for n in range(1, N+1):
        term = power(z, n-1) / power(n, n-1)
        s += term
        if fabs(term) < mpf('1e-30'):
            break
    return s

print("--- Complex gradient |F'/F| on grid near roots ---")
print(f"{'Re':>6s} {'Im':>6s}  {'|F/F|':>10s}  {'arg':>10s}  {'Behavior':>12s}")
for re in [-3.0, -4.0, -5.0, -6.0, -7.0]:
    for im in [0, 15, 30, 50, 70, 85]:
        zv = mpc(re, im)
        Fv = F_mp(zv)
        Fpv = Fprime_mp(zv)
        if fabs(Fv) > mpf('1e-10'):
            ratio = Fpv / Fv
            mag = float(fabs(ratio))
            phase = float(mparg(ratio))
            if mag > 1.1:
                behavior = "EXPLODING"
            elif mag < 0.9:
                behavior = "VANISHING"
            else:
                behavior = "~STABLE"
            print(f"{re:6.1f} {im:6.0f}  {mag:10.4f}  {phase:10.4f}  {behavior:>12s}")

print("\n--- Asymptotic alpha in F'/F ~ 1/e + alpha/z ---")
for z_val in [5, 10, 20, 50, 100, 200]:
    zv = mpf(z_val)
    ratio = Fprime_mp(zv) / F_mp(zv)
    alpha = zv * (ratio - mpf(1)/euler_e)
    print(f"  z={z_val:4d}: F'/F = {nstr(ratio, 10):>14s}, alpha ~ {nstr(alpha, 6):>10s}")

# Gradient flow simulation
print("\n" + "=" * 65)
print("PROPER GRADIENT FLOW SIMULATION (50 layers)")
print("=" * 65)

np.random.seed(42)

for scenario_name, init_z in [
    ("z=5.0 (document's choice)", 5.0),
    ("z=1.36 (critical point)", 1.360372),
    ("z=0.5 (small positive)", 0.5),
    ("z=-2.0 (negative)", -2.0),
    ("Normal(0,1) random", None),
]:
    cumul_exact = 1.0
    cumul_approx = 1.0
    cumul_relu = 1.0
    cumul_swish = 1.0
    
    for layer in range(50):
        z = init_z if init_z is not None else np.random.randn()
        
        # Exact gradient ratio |F'/F|
        zv = mpf(z)
        Fv = F_mp(zv)
        Fpv = Fprime_mp(zv)
        if fabs(Fv) > mpf('1e-10'):
            exact_ratio = float(fabs(Fpv / Fv))
        else:
            exact_ratio = 1.0
        
        approx_ratio = abs(1/float(euler_e) + 1/(2*max(abs(z), 0.1)))
        relu_grad = 1.0 if z > 0 else 0.0
        s = 1 / (1 + np.exp(-z))
        swish_grad = abs(s + z * s * (1-s))
        
        cumul_exact *= exact_ratio
        cumul_approx *= approx_ratio
        cumul_relu *= relu_grad
        cumul_swish *= swish_grad
    
    print(f"\n  {scenario_name}:")
    print(f"    Exact |F'/F|^50  = {cumul_exact:.4e}")
    print(f"    Approx|^F'/F|^50 = {cumul_approx:.4e}")
    print(f"    ReLU prod(grad)  = {cumul_relu:.4e}")
    print(f"    Swish prod(grad) = {cumul_swish:.4e}")