5dimension's picture
Upload scripts/exp2c.py with huggingface_hub
ca18949 verified
import numpy as np
from mpmath import mp, mpf, mpc, power, fabs, nstr, e as euler_e, arg as mparg
mp.dps = 30
def F_mp(z, N=200):
s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0)
for n in range(1, N+1):
term = power(z, n) / power(n, n)
s += term
if fabs(term) < mpf('1e-30'):
break
return s
def Fprime_mp(z, N=200):
s = mpc(0) if isinstance(z, (mpc, complex)) else mpf(0)
for n in range(1, N+1):
term = power(z, n-1) / power(n, n-1)
s += term
if fabs(term) < mpf('1e-30'):
break
return s
print("--- Complex gradient |F'/F| on grid near roots ---")
print(f"{'Re':>6s} {'Im':>6s} {'|F/F|':>10s} {'arg':>10s} {'Behavior':>12s}")
for re in [-3.0, -4.0, -5.0, -6.0, -7.0]:
for im in [0, 15, 30, 50, 70, 85]:
zv = mpc(re, im)
Fv = F_mp(zv)
Fpv = Fprime_mp(zv)
if fabs(Fv) > mpf('1e-10'):
ratio = Fpv / Fv
mag = float(fabs(ratio))
phase = float(mparg(ratio))
if mag > 1.1:
behavior = "EXPLODING"
elif mag < 0.9:
behavior = "VANISHING"
else:
behavior = "~STABLE"
print(f"{re:6.1f} {im:6.0f} {mag:10.4f} {phase:10.4f} {behavior:>12s}")
print("\n--- Asymptotic alpha in F'/F ~ 1/e + alpha/z ---")
for z_val in [5, 10, 20, 50, 100, 200]:
zv = mpf(z_val)
ratio = Fprime_mp(zv) / F_mp(zv)
alpha = zv * (ratio - mpf(1)/euler_e)
print(f" z={z_val:4d}: F'/F = {nstr(ratio, 10):>14s}, alpha ~ {nstr(alpha, 6):>10s}")
# Gradient flow simulation
print("\n" + "=" * 65)
print("PROPER GRADIENT FLOW SIMULATION (50 layers)")
print("=" * 65)
np.random.seed(42)
for scenario_name, init_z in [
("z=5.0 (document's choice)", 5.0),
("z=1.36 (critical point)", 1.360372),
("z=0.5 (small positive)", 0.5),
("z=-2.0 (negative)", -2.0),
("Normal(0,1) random", None),
]:
cumul_exact = 1.0
cumul_approx = 1.0
cumul_relu = 1.0
cumul_swish = 1.0
for layer in range(50):
z = init_z if init_z is not None else np.random.randn()
# Exact gradient ratio |F'/F|
zv = mpf(z)
Fv = F_mp(zv)
Fpv = Fprime_mp(zv)
if fabs(Fv) > mpf('1e-10'):
exact_ratio = float(fabs(Fpv / Fv))
else:
exact_ratio = 1.0
approx_ratio = abs(1/float(euler_e) + 1/(2*max(abs(z), 0.1)))
relu_grad = 1.0 if z > 0 else 0.0
s = 1 / (1 + np.exp(-z))
swish_grad = abs(s + z * s * (1-s))
cumul_exact *= exact_ratio
cumul_approx *= approx_ratio
cumul_relu *= relu_grad
cumul_swish *= swish_grad
print(f"\n {scenario_name}:")
print(f" Exact |F'/F|^50 = {cumul_exact:.4e}")
print(f" Approx|^F'/F|^50 = {cumul_approx:.4e}")
print(f" ReLU prod(grad) = {cumul_relu:.4e}")
print(f" Swish prod(grad) = {cumul_swish:.4e}")