analyst-buddy / app.py
hjerpe's picture
F006/F008: serve Qwen models + model switcher (vanilla-first)
44ce4f7 verified
Raw
History Blame Contribute Delete
2.6 kB
"""analyst-buddy Gradio app β€” thin entrypoint shim (F004).
The implementation lives in the single canonical module
``sql_env.server.app_ui`` so the process has exactly ONE ``Step``/class identity
(no dual-import footgun where ``app`` and ``sql_env.app`` would create two
distinct ``Step`` types β€” an F006 ``isinstance`` hazard). This file only
re-exports ``build_demo`` (the public surface HF Spaces / ``uv run python app.py``
and the test suite construct) and launches it when run directly.
Run locally: ``uv run python app.py``.
"""
from __future__ import annotations
# ZeroGPU: the `spaces` package MUST be imported before ANYTHING initializes CUDA
# (i.e. before preload/torch below), or it raises "CUDA has been initialized before
# importing the `spaces` package". So import it FIRST. Absent off-Space (local/tests),
# where the `@spaces.GPU` decorator is a documented no-op anyway.
try:
import spaces # noqa: F401
except ImportError: # pragma: no cover - not on a ZeroGPU Space (local / tests)
pass
# Prefer the ABSOLUTE packaged module so that `import app` (flat, via the test
# pythonpath) and `import sql_env.app` BOTH delegate to the exact same
# `sql_env.server.app_ui` β€” a single `Step`/class identity (R1). The flat
# fallback (`server.app_ui`) is only for a bare layout with no installed package;
# under this repo `sql_env` always resolves, so the canonical path wins.
try:
from sql_env.server.app_ui import _launch_demo, build_demo
except ImportError: # pragma: no cover - bare flat-layout / direct-run fallback
from server.app_ui import _launch_demo, build_demo # type: ignore[no-redef]
__all__ = ["build_demo"]
if __name__ == "__main__":
# F006/F008 β€” the HF ZeroGPU Space entrypoint. Heavy serving imports stay inside
# __main__ (NOT at module import) so `import app` remains dep-light for the tests.
#
# Preload the available models in the PARENT process so each per-ask @spaces.GPU
# fork inherits the loaded weights (ZeroGPU does not persist state created inside
# the GPU call). A failed preload is logged and the model loads lazily on first ask.
try:
from sql_env.server.serving import preload_available
except ImportError: # pragma: no cover - bare flat-layout / direct-run fallback
from server.serving import preload_available # type: ignore[no-redef]
preload_available()
# _launch_demo applies the theme + CSS that build_demo stashed (gradio 6.x reads
# theme/css at .launch() time, not on the Blocks constructor).
_launch_demo(build_demo(enable_model_selector=True))