web_interface: improve onnxruntime load-failure recovery
Browse files- Surface the actual ImportError before any installer prompt.
- Detect VC++ runtime (vcruntime140/msvcp140) before launching the
UAC installer, so users with VC++ already installed aren't asked
to elevate for a fix that wouldn't help.
- Detect AVX2 via IsProcessorFeaturePresent and auto-downgrade to
onnxruntime==1.17.3 (last AVX-only release) on non-AVX2 CPUs.
- Track recovery attempts via env var to avoid infinite re-exec loops.
- web_interface/app.py +129 -21
web_interface/app.py
CHANGED
|
@@ -49,6 +49,12 @@ PYENV_MARKER = PYENV_DIR / ".bootstrapped"
|
|
| 49 |
# "DLL load failed while importing onnxruntime_pybind11_state".
|
| 50 |
VC_REDIST_URL = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
# Default folder if it exists; otherwise the first auto-discovered folder
|
| 53 |
# next to this script is used. Override with --model-dir or the UI picker.
|
| 54 |
DEFAULT_MODEL_DIR = ROOT / "V1.1_onnx"
|
|
@@ -200,19 +206,68 @@ def _bootstrap_portable(force_reinstall: bool) -> None:
|
|
| 200 |
|
| 201 |
|
| 202 |
# ---------------------------------------------------------------------------
|
| 203 |
-
# onnxruntime / Visual C++ runtime recovery
|
| 204 |
# ---------------------------------------------------------------------------
|
| 205 |
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
if os.name != "nt":
|
| 211 |
return False
|
| 212 |
msg = str(err)
|
| 213 |
return "DLL load failed" in msg and "onnxruntime_pybind11_state" in msg
|
| 214 |
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
def _install_vcredist_with_uac() -> bool:
|
| 217 |
"""Download vc_redist.x64.exe and run it elevated. Returns True on
|
| 218 |
apparent success (exit 0 or 3010 = success-pending-reboot)."""
|
|
@@ -263,34 +318,87 @@ def _install_vcredist_with_uac() -> bool:
|
|
| 263 |
return False
|
| 264 |
|
| 265 |
|
| 266 |
-
def
|
| 267 |
-
"""
|
| 268 |
-
|
| 269 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 270 |
print()
|
| 271 |
print("=" * 60)
|
| 272 |
print(" onnxruntime failed to load")
|
| 273 |
print("=" * 60)
|
| 274 |
-
print("
|
| 275 |
-
print("
|
| 276 |
-
print("
|
|
|
|
| 277 |
print()
|
| 278 |
|
| 279 |
-
if not
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
print()
|
| 281 |
print("Manual fix:")
|
| 282 |
print(f" 1. Download {VC_REDIST_URL}")
|
| 283 |
print(" 2. Run it (accept the UAC prompt)")
|
| 284 |
print(" 3. Re-run this script")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
print()
|
| 286 |
-
print("
|
| 287 |
-
print(
|
|
|
|
|
|
|
|
|
|
| 288 |
sys.exit(1)
|
| 289 |
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
|
| 296 |
# ---------------------------------------------------------------------------
|
|
@@ -466,8 +574,8 @@ def _run_app() -> None:
|
|
| 466 |
try:
|
| 467 |
import onnxruntime as ort
|
| 468 |
except ImportError as e:
|
| 469 |
-
if
|
| 470 |
-
|
| 471 |
sys.exit(1) # only reached if recover failed without exiting
|
| 472 |
raise
|
| 473 |
import gradio as gr
|
|
|
|
| 49 |
# "DLL load failed while importing onnxruntime_pybind11_state".
|
| 50 |
VC_REDIST_URL = "https://aka.ms/vs/17/release/vc_redist.x64.exe"
|
| 51 |
|
| 52 |
+
# Last onnxruntime release that ships AVX-only (no-AVX2) wheels. ORT >= 1.18
|
| 53 |
+
# requires AVX2; on older CPUs the import fails with the same generic
|
| 54 |
+
# "DLL load failed" message as a missing VC++ runtime. We pin to this version
|
| 55 |
+
# as an automatic fallback when AVX2 is not detected.
|
| 56 |
+
ORT_NO_AVX2_FALLBACK_VERSION = "1.17.3"
|
| 57 |
+
|
| 58 |
# Default folder if it exists; otherwise the first auto-discovered folder
|
| 59 |
# next to this script is used. Override with --model-dir or the UI picker.
|
| 60 |
DEFAULT_MODEL_DIR = ROOT / "V1.1_onnx"
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
# ---------------------------------------------------------------------------
|
| 209 |
+
# onnxruntime / Visual C++ runtime / AVX2 recovery
|
| 210 |
# ---------------------------------------------------------------------------
|
| 211 |
|
| 212 |
+
# Tracks which automatic recoveries have already run, propagated to re-execs
|
| 213 |
+
# via the environment so we don't loop forever if a fix doesn't actually fix
|
| 214 |
+
# things (e.g. UAC accepted but the install silently failed).
|
| 215 |
+
_RECOVERY_ATTEMPTS_VAR = "_OPPAI_RECOVERY_ATTEMPTS"
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def _looks_like_native_load_failure(err: BaseException) -> bool:
|
| 219 |
+
"""Heuristic: did onnxruntime fail at native-DLL load time? Both a
|
| 220 |
+
missing VC++ runtime and a missing AVX2 instruction set produce the same
|
| 221 |
+
"DLL load failed while importing onnxruntime_pybind11_state" message, so
|
| 222 |
+
we leave it to _recover_after_onnxruntime_load_failure to tell them apart."""
|
| 223 |
if os.name != "nt":
|
| 224 |
return False
|
| 225 |
msg = str(err)
|
| 226 |
return "DLL load failed" in msg and "onnxruntime_pybind11_state" in msg
|
| 227 |
|
| 228 |
|
| 229 |
+
def _has_vcruntime() -> bool:
|
| 230 |
+
"""True if the VC++ 2015-2022 runtime DLLs are present in System32. We
|
| 231 |
+
look at vcruntime140.dll and msvcp140.dll because onnxruntime's native
|
| 232 |
+
code links against both."""
|
| 233 |
+
if os.name != "nt":
|
| 234 |
+
return True
|
| 235 |
+
sys32 = Path(os.environ.get("WINDIR", r"C:\Windows")) / "System32"
|
| 236 |
+
return (sys32 / "vcruntime140.dll").exists() and (sys32 / "msvcp140.dll").exists()
|
| 237 |
+
|
| 238 |
+
|
| 239 |
+
def _has_avx2() -> bool:
|
| 240 |
+
"""True if the OS reports AVX2 support via IsProcessorFeaturePresent
|
| 241 |
+
(PF_AVX2_INSTRUCTIONS_AVAILABLE = 40). Conservative on error: returns
|
| 242 |
+
True if the API can't be called, so we don't downgrade ORT spuriously."""
|
| 243 |
+
if os.name != "nt":
|
| 244 |
+
return True
|
| 245 |
+
try:
|
| 246 |
+
import ctypes
|
| 247 |
+
return bool(ctypes.windll.kernel32.IsProcessorFeaturePresent(40))
|
| 248 |
+
except OSError:
|
| 249 |
+
return True
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _onnxruntime_version() -> str | None:
|
| 253 |
+
try:
|
| 254 |
+
import importlib.metadata as md
|
| 255 |
+
return md.version("onnxruntime")
|
| 256 |
+
except Exception: # noqa: BLE001
|
| 257 |
+
return None
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _recovery_attempted(name: str) -> bool:
|
| 261 |
+
return name in os.environ.get(_RECOVERY_ATTEMPTS_VAR, "").split(",")
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
def _mark_recovery_attempted(name: str) -> None:
|
| 265 |
+
cur = [p for p in os.environ.get(_RECOVERY_ATTEMPTS_VAR, "").split(",") if p]
|
| 266 |
+
if name not in cur:
|
| 267 |
+
cur.append(name)
|
| 268 |
+
os.environ[_RECOVERY_ATTEMPTS_VAR] = ",".join(cur)
|
| 269 |
+
|
| 270 |
+
|
| 271 |
def _install_vcredist_with_uac() -> bool:
|
| 272 |
"""Download vc_redist.x64.exe and run it elevated. Returns True on
|
| 273 |
apparent success (exit 0 or 3010 = success-pending-reboot)."""
|
|
|
|
| 318 |
return False
|
| 319 |
|
| 320 |
|
| 321 |
+
def _downgrade_onnxruntime_for_no_avx2() -> bool:
|
| 322 |
+
"""Force-reinstall onnxruntime at the last AVX-only release into the
|
| 323 |
+
current interpreter. Returns True on apparent pip success."""
|
| 324 |
+
target = f"onnxruntime=={ORT_NO_AVX2_FALLBACK_VERSION}"
|
| 325 |
+
print(f"[runtime] Reinstalling {target} (this CPU lacks AVX2) ...")
|
| 326 |
+
try:
|
| 327 |
+
subprocess.check_call(
|
| 328 |
+
[sys.executable, "-m", "pip", "install", "--force-reinstall", target]
|
| 329 |
+
)
|
| 330 |
+
return True
|
| 331 |
+
except subprocess.CalledProcessError as e:
|
| 332 |
+
print(f"[runtime] pip install failed: {e}")
|
| 333 |
+
return False
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def _relaunch_app() -> None:
|
| 337 |
+
"""Re-exec app.py in the same interpreter so onnxruntime gets a fresh
|
| 338 |
+
DLL-load attempt. Windows caches failed loads within a process, so an
|
| 339 |
+
in-place re-import never picks up a newly-installed runtime or wheel."""
|
| 340 |
+
args = [a for a in sys.argv[1:] if a != "--reinstall"]
|
| 341 |
+
print("\n[runtime] Re-launching the app ...\n")
|
| 342 |
+
rc = subprocess.call([sys.executable, str(Path(__file__).resolve()), *args])
|
| 343 |
+
sys.exit(rc)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def _recover_after_onnxruntime_load_failure(err: BaseException) -> None:
|
| 347 |
+
"""Diagnose why onnxruntime's native load failed, attempt the matching
|
| 348 |
+
recovery (install VC++ runtime, or downgrade ORT for non-AVX2 CPUs),
|
| 349 |
+
then re-exec. Exits 1 with diagnostics if no automatic fix applies."""
|
| 350 |
+
has_vc = _has_vcruntime()
|
| 351 |
+
has_avx2 = _has_avx2()
|
| 352 |
+
ort_ver = _onnxruntime_version()
|
| 353 |
+
|
| 354 |
print()
|
| 355 |
print("=" * 60)
|
| 356 |
print(" onnxruntime failed to load")
|
| 357 |
print("=" * 60)
|
| 358 |
+
print(f"Underlying error: {err}")
|
| 359 |
+
print(f" installed onnxruntime: {ort_ver or 'unknown'}")
|
| 360 |
+
print(f" VC++ 2015-2022 runtime: {'present' if has_vc else 'missing'}")
|
| 361 |
+
print(f" CPU AVX2 support: {'yes' if has_avx2 else 'no'}")
|
| 362 |
print()
|
| 363 |
|
| 364 |
+
if not has_vc and not _recovery_attempted("vcredist"):
|
| 365 |
+
print("VC++ runtime is missing — installing it now.")
|
| 366 |
+
print("Accept the UAC prompt that appears.")
|
| 367 |
+
_mark_recovery_attempted("vcredist")
|
| 368 |
+
if _install_vcredist_with_uac():
|
| 369 |
+
_relaunch_app()
|
| 370 |
+
return
|
| 371 |
print()
|
| 372 |
print("Manual fix:")
|
| 373 |
print(f" 1. Download {VC_REDIST_URL}")
|
| 374 |
print(" 2. Run it (accept the UAC prompt)")
|
| 375 |
print(" 3. Re-run this script")
|
| 376 |
+
sys.exit(1)
|
| 377 |
+
|
| 378 |
+
if not has_avx2 and not _recovery_attempted("avx2_downgrade"):
|
| 379 |
+
print("CPU lacks AVX2 — modern onnxruntime wheels require it.")
|
| 380 |
+
_mark_recovery_attempted("avx2_downgrade")
|
| 381 |
+
if _downgrade_onnxruntime_for_no_avx2():
|
| 382 |
+
_relaunch_app()
|
| 383 |
+
return
|
| 384 |
print()
|
| 385 |
+
print("Manual fix:")
|
| 386 |
+
print(
|
| 387 |
+
f" {sys.executable} -m pip install --force-reinstall "
|
| 388 |
+
f"onnxruntime=={ORT_NO_AVX2_FALLBACK_VERSION}"
|
| 389 |
+
)
|
| 390 |
sys.exit(1)
|
| 391 |
|
| 392 |
+
# Either every known recovery has already been tried, or VC++ is present
|
| 393 |
+
# and AVX2 is reported — the cause is something we can't fix automatically.
|
| 394 |
+
print("No automatic recovery applies. Likely culprits:")
|
| 395 |
+
print(" - Antivirus quarantining onnxruntime_pybind11_state.pyd")
|
| 396 |
+
print(
|
| 397 |
+
f" - Corrupt install: {sys.executable} -m pip install "
|
| 398 |
+
"--force-reinstall onnxruntime"
|
| 399 |
+
)
|
| 400 |
+
print(" - 32-bit Python loading 64-bit wheels (or vice versa)")
|
| 401 |
+
sys.exit(1)
|
| 402 |
|
| 403 |
|
| 404 |
# ---------------------------------------------------------------------------
|
|
|
|
| 574 |
try:
|
| 575 |
import onnxruntime as ort
|
| 576 |
except ImportError as e:
|
| 577 |
+
if _looks_like_native_load_failure(e):
|
| 578 |
+
_recover_after_onnxruntime_load_failure(e)
|
| 579 |
sys.exit(1) # only reached if recover failed without exiting
|
| 580 |
raise
|
| 581 |
import gradio as gr
|