| """Hugging Face Space entrypoint for Small Cuts. |
| |
| Local dev keeps the lazy/mock defaults. On a Space this module refuses unsafe |
| CPU local inference and never lets startup failures crash-loop the container: |
| |
| - ``import spaces`` happens before anything touches torch (ZeroGPU hijack). |
| - The narrator loads lazily inside the ``@spaces.GPU`` event handler. |
| - TTS runs inside @spaces.GPU workers too (kokoro's torch use poisons |
| worker forks if it ever runs in the main process). |
| """ |
|
|
| import os |
| import sys |
| import warnings |
| from pathlib import Path |
|
|
| import gradio as gr |
| from starlette.exceptions import StarletteDeprecationWarning |
|
|
| ROOT = Path(__file__).resolve().parent |
| SRC = ROOT / "src" |
| if str(SRC) not in sys.path: |
| sys.path.insert(0, str(SRC)) |
|
|
| warnings.filterwarnings( |
| "ignore", |
| message=r".*HTTP_422_UNPROCESSABLE_ENTITY.*HTTP_422_UNPROCESSABLE_CONTENT.*", |
| category=StarletteDeprecationWarning, |
| ) |
|
|
| ON_SPACE = bool(os.environ.get("SPACE_ID")) |
| ENGINE_MODE = bool(os.environ.get("SMALL_CUTS_ENGINE_URL", "").strip()) |
|
|
| from small_cuts.hf_relay import RELAY_BUCKET_ENV |
|
|
| RELAY_MODE = bool(os.environ.get(RELAY_BUCKET_ENV, "").strip()) |
| MODAL_UPLOAD_MODE = bool(os.environ.get("SMALL_CUTS_MODAL_API_URL", "").strip()) |
| VIEWER_ONLY_MODE = ENGINE_MODE or RELAY_MODE or MODAL_UPLOAD_MODE |
| NEEDS_LOCAL_INFERENCE = not VIEWER_ONLY_MODE |
|
|
| try: |
| import spaces |
| except ImportError: |
| spaces = None |
|
|
| if NEEDS_LOCAL_INFERENCE: |
| if ON_SPACE: |
| os.environ.setdefault("SMALL_CUTS_BACKEND", "transformers") |
| else: |
| os.environ.setdefault("SMALL_CUTS_BACKEND", "llama_cpp") |
| os.environ.setdefault("SMALL_CUTS_TTS_BACKEND", "kokoro") |
|
|
| from small_cuts.observability import capture_exception, init_sentry |
| from small_cuts.space_hooks import install_relay_hooks |
| from small_cuts.viewer import THEME, build_viewer_app |
|
|
| init_sentry() |
|
|
| STARTUP_ERROR: str | None = None |
|
|
|
|
| def _allow_cpu_inference() -> bool: |
| return os.environ.get("SMALL_CUTS_ALLOW_CPU_INFERENCE", "").strip().lower() in ( |
| "1", |
| "true", |
| "yes", |
| ) |
|
|
|
|
| def _validate_startup_mode() -> None: |
| if ON_SPACE and NEEDS_LOCAL_INFERENCE and spaces is None and not _allow_cpu_inference(): |
| raise RuntimeError( |
| "refusing local inference on a Space without ZeroGPU; configure relay, engine, " |
| "or Modal upload mode, or set SMALL_CUTS_ALLOW_CPU_INFERENCE=1 explicitly" |
| ) |
|
|
|
|
| def _degraded_app(message: str) -> gr.Blocks: |
| with gr.Blocks(title="Small Cuts") as degraded: |
| gr.Markdown( |
| f"# Small Cuts is temporarily unavailable\n\nStartup configuration failed: `{message}`" |
| ) |
| return degraded |
|
|
|
|
| def _build_demo() -> gr.Blocks: |
| _validate_startup_mode() |
| |
| |
| |
| app = build_viewer_app() |
| install_relay_hooks(app.app) |
| return app |
|
|
|
|
| try: |
| demo = _build_demo() |
| except Exception as exc: |
| capture_exception(exc) |
| STARTUP_ERROR = str(exc) |
| demo = _degraded_app(STARTUP_ERROR) |
|
|
| if __name__ == "__main__": |
| demo.launch(theme=THEME) |
|
|