small-cuts / app.py
macayaven's picture
Upload folder using huggingface_hub
9455e9e verified
Raw
History Blame Contribute Delete
3.38 kB
"""Hugging Face Space entrypoint for Small Cuts.
Local dev keeps the lazy/mock defaults. On a Space this module refuses unsafe
CPU local inference and never lets startup failures crash-loop the container:
- ``import spaces`` happens before anything touches torch (ZeroGPU hijack).
- The narrator loads lazily inside the ``@spaces.GPU`` event handler.
- TTS runs inside @spaces.GPU workers too (kokoro's torch use poisons
worker forks if it ever runs in the main process).
"""
import os
import sys
import warnings
from pathlib import Path
import gradio as gr
from starlette.exceptions import StarletteDeprecationWarning
ROOT = Path(__file__).resolve().parent
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
warnings.filterwarnings(
"ignore",
message=r".*HTTP_422_UNPROCESSABLE_ENTITY.*HTTP_422_UNPROCESSABLE_CONTENT.*",
category=StarletteDeprecationWarning,
)
ON_SPACE = bool(os.environ.get("SPACE_ID"))
ENGINE_MODE = bool(os.environ.get("SMALL_CUTS_ENGINE_URL", "").strip())
from small_cuts.hf_relay import RELAY_BUCKET_ENV # noqa: E402
RELAY_MODE = bool(os.environ.get(RELAY_BUCKET_ENV, "").strip())
MODAL_UPLOAD_MODE = bool(os.environ.get("SMALL_CUTS_MODAL_API_URL", "").strip())
VIEWER_ONLY_MODE = ENGINE_MODE or RELAY_MODE or MODAL_UPLOAD_MODE
NEEDS_LOCAL_INFERENCE = not VIEWER_ONLY_MODE
try:
import spaces # noqa: F401 (must precede torch imports for ZeroGPU)
except ImportError: # local dev / CI: no ZeroGPU
spaces = None
if NEEDS_LOCAL_INFERENCE:
if ON_SPACE:
os.environ.setdefault("SMALL_CUTS_BACKEND", "transformers")
else:
os.environ.setdefault("SMALL_CUTS_BACKEND", "llama_cpp")
os.environ.setdefault("SMALL_CUTS_TTS_BACKEND", "kokoro")
from small_cuts.observability import capture_exception, init_sentry # noqa: E402
from small_cuts.space_hooks import install_relay_hooks # noqa: E402
from small_cuts.viewer import THEME, build_viewer_app # noqa: E402
init_sentry()
STARTUP_ERROR: str | None = None
def _allow_cpu_inference() -> bool:
return os.environ.get("SMALL_CUTS_ALLOW_CPU_INFERENCE", "").strip().lower() in (
"1",
"true",
"yes",
)
def _validate_startup_mode() -> None:
if ON_SPACE and NEEDS_LOCAL_INFERENCE and spaces is None and not _allow_cpu_inference():
raise RuntimeError(
"refusing local inference on a Space without ZeroGPU; configure relay, engine, "
"or Modal upload mode, or set SMALL_CUTS_ALLOW_CPU_INFERENCE=1 explicitly"
)
def _degraded_app(message: str) -> gr.Blocks:
with gr.Blocks(title="Small Cuts") as degraded:
gr.Markdown(
f"# Small Cuts is temporarily unavailable\n\nStartup configuration failed: `{message}`"
)
return degraded
def _build_demo() -> gr.Blocks:
_validate_startup_mode()
# In engine/relay/upload modes the Space is a public reader and upload front door, so it must
# not warm local model weights. In local-inference mode, ZeroGPU loads lazily inside the
# Gradio handler decorated with @spaces.GPU.
app = build_viewer_app()
install_relay_hooks(app.app)
return app
try:
demo = _build_demo()
except Exception as exc:
capture_exception(exc)
STARTUP_ERROR = str(exc)
demo = _degraded_app(STARTUP_ERROR)
if __name__ == "__main__":
demo.launch(theme=THEME)