Spaces:

build-small-hackathon
/

small-cuts

Running

App Files Files Community

small-cuts / app.py

macayaven

Upload folder using huggingface_hub

9455e9e verified 18 days ago

Raw

History Blame Contribute Delete

3.38 kB

	"""Hugging Face Space entrypoint for Small Cuts.

	Local dev keeps the lazy/mock defaults. On a Space this module refuses unsafe
	CPU local inference and never lets startup failures crash-loop the container:

	- ``import spaces`` happens before anything touches torch (ZeroGPU hijack).
	- The narrator loads lazily inside the ``@spaces.GPU`` event handler.
	- TTS runs inside @spaces.GPU workers too (kokoro's torch use poisons
	worker forks if it ever runs in the main process).
	"""

	import os
	import sys
	import warnings
	from pathlib import Path

	import gradio as gr
	from starlette.exceptions import StarletteDeprecationWarning

	ROOT = Path(__file__).resolve().parent
	SRC = ROOT / "src"
	if str(SRC) not in sys.path:
	sys.path.insert(0, str(SRC))

	warnings.filterwarnings(
	"ignore",
	message=r".HTTP_422_UNPROCESSABLE_ENTITY.HTTP_422_UNPROCESSABLE_CONTENT.*",
	category=StarletteDeprecationWarning,
	)

	ON_SPACE = bool(os.environ.get("SPACE_ID"))
	ENGINE_MODE = bool(os.environ.get("SMALL_CUTS_ENGINE_URL", "").strip())

	from small_cuts.hf_relay import RELAY_BUCKET_ENV # noqa: E402

	RELAY_MODE = bool(os.environ.get(RELAY_BUCKET_ENV, "").strip())
	MODAL_UPLOAD_MODE = bool(os.environ.get("SMALL_CUTS_MODAL_API_URL", "").strip())
	VIEWER_ONLY_MODE = ENGINE_MODE or RELAY_MODE or MODAL_UPLOAD_MODE
	NEEDS_LOCAL_INFERENCE = not VIEWER_ONLY_MODE

	try:
	import spaces # noqa: F401 (must precede torch imports for ZeroGPU)
	except ImportError: # local dev / CI: no ZeroGPU
	spaces = None

	if NEEDS_LOCAL_INFERENCE:
	if ON_SPACE:
	os.environ.setdefault("SMALL_CUTS_BACKEND", "transformers")
	else:
	os.environ.setdefault("SMALL_CUTS_BACKEND", "llama_cpp")
	os.environ.setdefault("SMALL_CUTS_TTS_BACKEND", "kokoro")

	from small_cuts.observability import capture_exception, init_sentry # noqa: E402
	from small_cuts.space_hooks import install_relay_hooks # noqa: E402
	from small_cuts.viewer import THEME, build_viewer_app # noqa: E402

	init_sentry()

	STARTUP_ERROR: str \| None = None


	def _allow_cpu_inference() -> bool:
	return os.environ.get("SMALL_CUTS_ALLOW_CPU_INFERENCE", "").strip().lower() in (
	"1",
	"true",
	"yes",
	)


	def _validate_startup_mode() -> None:
	if ON_SPACE and NEEDS_LOCAL_INFERENCE and spaces is None and not _allow_cpu_inference():
	raise RuntimeError(
	"refusing local inference on a Space without ZeroGPU; configure relay, engine, "
	"or Modal upload mode, or set SMALL_CUTS_ALLOW_CPU_INFERENCE=1 explicitly"
	)


	def _degraded_app(message: str) -> gr.Blocks:
	with gr.Blocks(title="Small Cuts") as degraded:
	gr.Markdown(
	f"# Small Cuts is temporarily unavailable\n\nStartup configuration failed: `{message}`"
	)
	return degraded


	def _build_demo() -> gr.Blocks:
	_validate_startup_mode()
	# In engine/relay/upload modes the Space is a public reader and upload front door, so it must
	# not warm local model weights. In local-inference mode, ZeroGPU loads lazily inside the
	# Gradio handler decorated with @spaces.GPU.
	app = build_viewer_app()
	install_relay_hooks(app.app)
	return app


	try:
	demo = _build_demo()
	except Exception as exc:
	capture_exception(exc)
	STARTUP_ERROR = str(exc)
	demo = _degraded_app(STARTUP_ERROR)

	if __name__ == "__main__":
	demo.launch(theme=THEME)