| """Gradio UI for Small Cuts.""" |
|
|
| from __future__ import annotations |
|
|
| import gradio as gr |
| import numpy as np |
| from PIL import Image |
|
|
| from .frames import pick_key_frame, sample_frames |
| from .narrator import get_backend, narrate |
| from .styles import DEFAULT_STYLE_KEY, style_choices |
| from .theme import build_theme |
| from .title_card import derive_title, render_title_card |
| from .tts import speak |
|
|
| TITLE = "🎬 Small Cuts" |
| TAGLINE = ( |
| "Your life, narrated. Drop in a moment — from your phone, webcam, or " |
| "smart-glasses footage — pick a director, and hear what scene you're really in. " |
| "Every model under 32B. Everything runs in this Space." |
| ) |
|
|
| |
| THEME = build_theme() |
|
|
|
|
| def _gpu(duration: int = 90): |
| """Mark an event handler for ZeroGPU. No-op off-Space. |
| |
| ZeroGPU's startup scan looks for the GPU mark on the functions Gradio |
| binds — decorating an inner helper instead leaves requests unscheduled |
| (worker dies with "No CUDA GPUs are available"). TTS is marked too: |
| any torch forward in the main process poisons later worker forks. |
| """ |
|
|
| def deco(fn): |
| try: |
| import spaces |
| except ImportError: |
| return fn |
| return spaces.GPU(duration=duration)(fn) |
|
|
| return deco |
|
|
|
|
| def _narrate_core( |
| image: Image.Image | None, style_key: str, scene_hint: str, empty_text: str |
| ) -> tuple[Image.Image, str]: |
| if image is None: |
| text = empty_text |
| else: |
| result = narrate(image, style_key=style_key, scene_hint=scene_hint or "") |
| text = result.text |
| return render_title_card(derive_title(text), style_key), text |
|
|
|
|
| @_gpu() |
| def _narrate_handler( |
| image: Image.Image | None, style_key: str, scene_hint: str |
| ) -> tuple[Image.Image, str]: |
| return _narrate_core( |
| image, |
| style_key, |
| scene_hint, |
| "The narrator clears his throat, looks at the empty screen, and waits. " |
| "Some scenes, after all, require a scene.", |
| ) |
|
|
|
|
| @_gpu() |
| def _narrate_video_handler( |
| video_path: str | None, style_key: str, scene_hint: str |
| ) -> tuple[Image.Image, str]: |
| frame = pick_key_frame(sample_frames(video_path)) if video_path else None |
| return _narrate_core( |
| frame, |
| style_key, |
| scene_hint, |
| "The narrator squints at the projector. Nothing. He has narrated " |
| "blank screens before, but never by choice.", |
| ) |
|
|
|
|
| @_gpu(duration=30) |
| def _speak_handler(text: str) -> tuple[int, np.ndarray] | None: |
| if not text.strip(): |
| return None |
| speech = speak(text) |
| return speech.sample_rate, speech.audio |
|
|
|
|
| def build_app() -> gr.Blocks: |
| backend = get_backend() |
| with gr.Blocks(title=TITLE) as demo: |
| gr.Markdown(f"# {TITLE}\n{TAGLINE}") |
| with gr.Row(): |
| with gr.Column(scale=1): |
| image = gr.Image(label="Your moment", type="pil", sources=["upload", "webcam"]) |
| video = gr.Video( |
| label="…or a clip (glasses or phone, narrates the middle of the scene)", |
| sources=["upload"], |
| ) |
| style = gr.Dropdown( |
| choices=style_choices(), |
| value=DEFAULT_STYLE_KEY, |
| label="Director's cut", |
| ) |
| hint = gr.Textbox( |
| label="Anything the narrator should know? (optional)", |
| placeholder="e.g. this is my third coffee today", |
| ) |
| go = gr.Button("🎬 Roll narration", variant="primary") |
| with gr.Column(scale=1): |
| card = gr.Image(label="Title card", interactive=False) |
| narration = gr.Textbox(label="The narrator says…", lines=8) |
| speak_btn = gr.Button("🔊 Read it to me", variant="secondary") |
| audio = gr.Audio(label="The narrator speaks…", interactive=False) |
| gr.Markdown( |
| f"<sub>backend: `{backend.name}` · model: `{backend.model_id}` · " |
| "no cloud APIs — Off the Grid 🏕️</sub>" |
| ) |
| go.click(_narrate_handler, inputs=[image, style, hint], outputs=[card, narration]) |
| image.change(_narrate_handler, inputs=[image, style, hint], outputs=[card, narration]) |
| video.change(_narrate_video_handler, inputs=[video, style, hint], outputs=[card, narration]) |
| speak_btn.click(_speak_handler, inputs=[narration], outputs=[audio]) |
| return demo |
|
|