"""Gradio UI for Small Cuts.""" from __future__ import annotations import gradio as gr import numpy as np from PIL import Image from .frames import pick_key_frame, sample_frames from .narrator import get_backend, narrate from .styles import DEFAULT_STYLE_KEY, style_choices from .theme import build_theme from .title_card import derive_title, render_title_card from .tts import speak TITLE = "🎬 Small Cuts" TAGLINE = ( "Your life, narrated. Drop in a moment — from your phone, webcam, or " "smart-glasses footage — pick a director, and hear what scene you're really in. " "Every model under 32B. Everything runs in this Space." ) # Off-Brand cinematic theme for the M2 custom UI quest. THEME = build_theme() def _gpu(duration: int = 90): """Mark an event handler for ZeroGPU. No-op off-Space. ZeroGPU's startup scan looks for the GPU mark on the functions Gradio binds — decorating an inner helper instead leaves requests unscheduled (worker dies with "No CUDA GPUs are available"). TTS is marked too: any torch forward in the main process poisons later worker forks. """ def deco(fn): try: import spaces except ImportError: return fn return spaces.GPU(duration=duration)(fn) return deco def _narrate_core( image: Image.Image | None, style_key: str, scene_hint: str, empty_text: str ) -> tuple[Image.Image, str]: if image is None: text = empty_text else: result = narrate(image, style_key=style_key, scene_hint=scene_hint or "") text = result.text return render_title_card(derive_title(text), style_key), text @_gpu() def _narrate_handler( image: Image.Image | None, style_key: str, scene_hint: str ) -> tuple[Image.Image, str]: return _narrate_core( image, style_key, scene_hint, "The narrator clears his throat, looks at the empty screen, and waits. " "Some scenes, after all, require a scene.", ) @_gpu() def _narrate_video_handler( video_path: str | None, style_key: str, scene_hint: str ) -> tuple[Image.Image, str]: frame = pick_key_frame(sample_frames(video_path)) if video_path else None return _narrate_core( frame, style_key, scene_hint, "The narrator squints at the projector. Nothing. He has narrated " "blank screens before, but never by choice.", ) @_gpu(duration=30) def _speak_handler(text: str) -> tuple[int, np.ndarray] | None: if not text.strip(): return None speech = speak(text) return speech.sample_rate, speech.audio def build_app() -> gr.Blocks: backend = get_backend() with gr.Blocks(title=TITLE) as demo: gr.Markdown(f"# {TITLE}\n{TAGLINE}") with gr.Row(): with gr.Column(scale=1): image = gr.Image(label="Your moment", type="pil", sources=["upload", "webcam"]) video = gr.Video( label="…or a clip (glasses or phone, narrates the middle of the scene)", sources=["upload"], ) style = gr.Dropdown( choices=style_choices(), value=DEFAULT_STYLE_KEY, label="Director's cut", ) hint = gr.Textbox( label="Anything the narrator should know? (optional)", placeholder="e.g. this is my third coffee today", ) go = gr.Button("🎬 Roll narration", variant="primary") with gr.Column(scale=1): card = gr.Image(label="Title card", interactive=False) narration = gr.Textbox(label="The narrator says…", lines=8) speak_btn = gr.Button("🔊 Read it to me", variant="secondary") audio = gr.Audio(label="The narrator speaks…", interactive=False) gr.Markdown( f"backend: `{backend.name}` · model: `{backend.model_id}` · " "no cloud APIs — Off the Grid 🏕️" ) go.click(_narrate_handler, inputs=[image, style, hint], outputs=[card, narration]) image.change(_narrate_handler, inputs=[image, style, hint], outputs=[card, narration]) video.change(_narrate_video_handler, inputs=[video, style, hint], outputs=[card, narration]) speak_btn.click(_speak_handler, inputs=[narration], outputs=[audio]) return demo