Add max_iter cap and non-finite checks to _optimal_quintic [skip-build]

Adds a max_iter parameter (default 1000) to replace the open-ended while
loop. Raises ValueError if the linear solve or node update produces
non-finite values, and RuntimeError if convergence is not reached within
max_iter iterations.

Files changed (1) hide show

torch-ext/optimizer/newton_schulz.py +19 -3

torch-ext/optimizer/newton_schulz.py CHANGED Viewed

@@ -10,7 +10,7 @@ COMM_DTYPE = torch.bfloat16
 DEFAULT_CHUNK_SIZE_RATIO = 4
-def _optimal_quintic(l, u):
     """
     Use the simplified Remez algorithm to find the optimal odd quintic approximant
     to the constant function x -> 1 over the interval [l, u].
@@ -19,14 +19,18 @@ def _optimal_quintic(l, u):
     approximation error max_{x in [l,u]} |p(x) - 1|. Iterates by updating the
     two interior equioscillation nodes q, r until convergence. Returns the
     closed-form equioscillating solution when l ≈ u.
     """
     assert 0 <= l <= u
     if 1 - 5e-6 <= l / u:
         return (15 / 8) / u, (-10 / 8) / (u**3), (3 / 8) / (u**5)
     q = (3 * l + u) / 4
     r = (l + 3 * u) / 4
-    E, old_E = inf, None
-    while not old_E or abs(old_E - E) > 1e-15:
         old_E = E
         LHS = np.array([
             [l, l**3, l**5, 1],
@@ -35,9 +39,21 @@ def _optimal_quintic(l, u):
             [u, u**3, u**5, -1],
         ])
         a, b, c, E = np.linalg.solve(LHS, np.ones(4))
         q, r = np.sqrt(
             (-3 * b + np.array([-1, 1]) * sqrt(9 * b**2 - 20 * a * c)) /
             (10 * c))
     return float(a), float(b), float(c)

 DEFAULT_CHUNK_SIZE_RATIO = 4
+def _optimal_quintic(l, u, max_iter=1000):
     """
     Use the simplified Remez algorithm to find the optimal odd quintic approximant
     to the constant function x -> 1 over the interval [l, u].
     approximation error max_{x in [l,u]} |p(x) - 1|. Iterates by updating the
     two interior equioscillation nodes q, r until convergence. Returns the
     closed-form equioscillating solution when l ≈ u.
+    Raises ValueError if any intermediate value (a, b, c, E, q, r) is non-finite
+    (NaN or inf). Raises RuntimeError if convergence is not reached within
+    max_iter iterations.
     """
     assert 0 <= l <= u
     if 1 - 5e-6 <= l / u:
         return (15 / 8) / u, (-10 / 8) / (u**3), (3 / 8) / (u**5)
     q = (3 * l + u) / 4
     r = (l + 3 * u) / 4
+    E = inf
+    for _ in range(max_iter):
         old_E = E
         LHS = np.array([
             [l, l**3, l**5, 1],
             [u, u**3, u**5, -1],
         ])
         a, b, c, E = np.linalg.solve(LHS, np.ones(4))
+        if not np.all(np.isfinite([a, b, c, E])):
+            raise ValueError(
+                f"_optimal_quintic: non-finite solve result "
+                f"a={a}, b={b}, c={c}, E={E}")
         q, r = np.sqrt(
             (-3 * b + np.array([-1, 1]) * sqrt(9 * b**2 - 20 * a * c)) /
             (10 * c))
+        if not np.all(np.isfinite([q, r])):
+            raise ValueError(
+                f"_optimal_quintic: non-finite node update q={q}, r={r}")
+        if abs(old_E - E) <= 1e-15:
+            break
+    else:
+        raise RuntimeError(
+            f"_optimal_quintic: did not converge after {max_iter} iterations")
     return float(a), float(b), float(c)