rupkotha / app.py
Deb
Prepare for HF Space: disable mock, module-level demo, README
8aa73fd
Raw
History Blame Contribute Delete
9.95 kB
# app.py β€” Gradio Blocks entry point. UI + wiring only. ZERO model references.
"""Rupkotha (ΰ¦°ΰ§‚ΰ¦ͺকΰ¦₯ΰ¦Ύ) β€” a bedtime-story app for kids.
This file orchestrates the UI and chains core functions:
transcribe() β†’ generate_story() β†’ speak()
It must contain no model names, paths, or model logic β€” those live only in core/.
Layout: a two-panel "studio" β€” a Create panel (language/style, pictures, ask) and a
Story panel (text + audio + save) β€” over a night-sky theme. Session memory uses
gr.State, never browser storage (CLAUDE.md Β§11).
"""
from pathlib import Path
import gradio as gr
from core.vision_story import generate_story
from core.stt import transcribe
from core.tts import speak
from core.prompts import STYLES
# Language radio: display label β†’ internal code passed to core functions.
_LANGUAGES = [("English", "en"), ("বাংলা", "bn")]
_STYLE_CHOICES = {lang: list(styles.keys()) for lang, styles in STYLES.items()}
_CSS_PATH = Path(__file__).parent / "assets" / "styles.css"
HISTORY_SIZE = 3 # how many recent stories to keep (CLAUDE.md Β§11: last 3)
def _styles_for(language: str):
"""Return a style-dropdown update for the chosen language."""
choices = _STYLE_CHOICES.get(language, _STYLE_CHOICES["en"])
return gr.update(choices=choices, value=choices[0])
def _preview(files):
"""Show uploaded images in the preview gallery; hide it when empty."""
files = files or []
return gr.update(value=files, visible=bool(files))
def _voice_to_text(audio_path, language):
"""Transcribe a mic recording into the instruction box. On empty/failed
transcription, leave whatever the child already typed untouched."""
text = transcribe(audio_path, language)
return text if text else gr.update()
def _tell_a_story(images, instruction, language, style, child_name):
"""Chain: images + instruction β†’ story text β†’ motherly-voice audio.
Each core call degrades gracefully (never raises), so the UI always shows
a story even if Modal is unreachable or audio synthesis fails. Also returns
a `current` dict so the Save button can capture the exact result shown.
"""
image_paths = [img for img in (images or [])]
story, model_label = generate_story(
image_paths=image_paths,
instruction=instruction or "",
language=language,
style=style,
child_name=child_name or "",
)
wav_path, tts_label = speak(story, language)
badge = f"πŸ“– {model_label}γ€€Β·γ€€πŸ”Š {tts_label}"
current = {"story": story, "audio": wav_path, "badge": badge}
return story, wav_path, badge, current
def _history_updates(history):
"""Flatten `history` into per-slot updates: (group, markdown, audio) Γ— N."""
updates = []
for i in range(HISTORY_SIZE):
if i < len(history):
entry = history[i]
body = f"{entry['story']}\n\n<span class='saved-badge'>{entry['badge']}</span>"
updates += [
gr.update(visible=True),
gr.update(value=body),
gr.update(value=entry.get("audio")),
]
else:
updates += [
gr.update(visible=False),
gr.update(value=""),
gr.update(value=None),
]
return updates
def _save_story(current, history):
"""Prepend the current story to the session history (newest first, max N)."""
history = list(history or [])
if current and current.get("story"):
history = ([current] + history)[:HISTORY_SIZE]
return [history, *_history_updates(history)]
def build_ui() -> gr.Blocks:
theme = gr.themes.Soft(
primary_hue="amber",
secondary_hue="orange",
neutral_hue="slate",
radius_size="lg",
font=[gr.themes.GoogleFont("Nunito"), "ui-sans-serif", "sans-serif"],
)
css_kw = {"css_paths": [str(_CSS_PATH)]} if _CSS_PATH.exists() else {}
with gr.Blocks(title="ΰ¦°ΰ§‚ΰ¦ͺকΰ¦₯ΰ¦Ύ Β· Rupkotha", theme=theme, fill_width=True, **css_kw) as demo:
# ── Hero ─────────────────────────────────────────────────────────
gr.HTML(
"""
<div id="hero">
<div class="hero-moon">πŸŒ™</div>
<h1>ΰ¦°ΰ§‚ΰ¦ͺকΰ¦₯ΰ¦Ύ Β· Rupkotha</h1>
<p>Show a picture, ask for a story β€” and hear it told in a warm
motherly voice.</p>
</div>
"""
)
with gr.Row(elem_id="studio", equal_height=False):
# ── Create panel ─────────────────────────────────────────────
with gr.Column(scale=5, elem_classes="panel"):
gr.HTML('<div class="panel-head"><span class="step">1</span>Choose</div>')
with gr.Row():
language = gr.Radio(
choices=_LANGUAGES, value="en",
label="Language Β· ΰ¦­ΰ¦Ύΰ¦·ΰ¦Ύ", elem_classes="seg",
)
style = gr.Dropdown(
choices=_STYLE_CHOICES["en"], value=_STYLE_CHOICES["en"][0],
label="Story style",
)
gr.HTML('<div class="panel-head"><span class="step">2</span>Show your pictures</div>')
images = gr.File(
file_count="multiple",
type="filepath",
file_types=["image"],
label="Drawings or toys β€” 1 to 4 pictures",
elem_classes="upload-box",
)
preview = gr.Gallery(
label="Your pictures",
columns=4,
height="auto",
object_fit="contain", # show the whole image, don't crop/trim
show_label=True,
visible=False,
elem_classes="preview",
)
gr.HTML('<div class="panel-head"><span class="step">3</span>Ask for a story</div>')
mic = gr.Audio(
sources=["microphone"],
type="filepath",
label="🎀 Speak your request (optional) β€” it fills the box below",
)
instruction = gr.Textbox(
label="What story do you want?",
placeholder="tell me a story about my cat…",
lines=2,
)
child_name = gr.Textbox(
label="Your name (optional)",
placeholder="e.g. Rupa β€” woven into the story",
lines=1,
)
generate_btn = gr.Button(
"✨ Tell me a story", variant="primary", size="lg",
elem_id="generate-btn",
)
# ── Story panel ──────────────────────────────────────────────
with gr.Column(scale=6, elem_classes="panel story-panel"):
gr.HTML('<div class="panel-head">πŸ“– Your story</div>')
story_out = gr.Textbox(
show_label=False,
lines=8,
max_lines=40, # grow to fit the whole story (no inner scrollbar)
autoscroll=False,
placeholder="Your bedtime story will appear here… ✨",
elem_classes="story-text",
container=False,
)
audio_out = gr.Audio(label="πŸ”Š Listen (press play to replay)", type="filepath")
badge_out = gr.Markdown(elem_classes="model-badge")
save_btn = gr.Button("πŸ’Ύ Save this story", elem_id="save-btn")
# ── Saved stories: last 3, each replayable (gr.State session memory) ─
current = gr.State(None)
history = gr.State([])
gr.HTML('<div class="section-title">🌟 Your saved stories</div>')
slots = []
with gr.Row(elem_id="history-row", equal_height=False):
for _ in range(HISTORY_SIZE):
with gr.Column(scale=1, min_width=240):
with gr.Group(visible=False, elem_classes="saved-card") as slot_group:
slot_md = gr.Markdown(elem_classes="saved-text")
slot_audio = gr.Audio(type="filepath", label="Replay")
slots.append((slot_group, slot_md, slot_audio))
# ── Wiring ───────────────────────────────────────────────────────
language.change(_styles_for, inputs=language, outputs=style)
# Show thumbnails of the uploaded pictures.
images.change(_preview, inputs=images, outputs=preview)
# Voice is a bonus: it fills the typed box, which stays primary (Β§2, Β§14).
mic.stop_recording(_voice_to_text, inputs=[mic, language], outputs=instruction)
generate_btn.click(
_tell_a_story,
inputs=[images, instruction, language, style, child_name],
outputs=[story_out, audio_out, badge_out, current],
)
# Flatten slots for the Save outputs: history + (group, md, audio) Γ— N.
slot_outputs = [comp for slot in slots for comp in slot]
save_btn.click(
_save_story,
inputs=[current, history],
outputs=[history, *slot_outputs],
)
return demo
# Module-level `demo` so Hugging Face Spaces (gradio SDK) can discover it.
demo = build_ui().queue()
if __name__ == "__main__":
demo.launch()