# app.py — Gradio Blocks entry point. UI + wiring only. ZERO model references. """Rupkotha (রূপকথা) — a bedtime-story app for kids. This file orchestrates the UI and chains core functions: transcribe() → generate_story() → speak() It must contain no model names, paths, or model logic — those live only in core/. Layout: a two-panel "studio" — a Create panel (language/style, pictures, ask) and a Story panel (text + audio + save) — over a night-sky theme. Session memory uses gr.State, never browser storage (CLAUDE.md §11). """ from pathlib import Path import gradio as gr from core.vision_story import generate_story from core.stt import transcribe from core.tts import speak from core.prompts import STYLES # Language radio: display label → internal code passed to core functions. _LANGUAGES = [("English", "en"), ("বাংলা", "bn")] _STYLE_CHOICES = {lang: list(styles.keys()) for lang, styles in STYLES.items()} _CSS_PATH = Path(__file__).parent / "assets" / "styles.css" HISTORY_SIZE = 3 # how many recent stories to keep (CLAUDE.md §11: last 3) def _styles_for(language: str): """Return a style-dropdown update for the chosen language.""" choices = _STYLE_CHOICES.get(language, _STYLE_CHOICES["en"]) return gr.update(choices=choices, value=choices[0]) def _preview(files): """Show uploaded images in the preview gallery; hide it when empty.""" files = files or [] return gr.update(value=files, visible=bool(files)) def _voice_to_text(audio_path, language): """Transcribe a mic recording into the instruction box. On empty/failed transcription, leave whatever the child already typed untouched.""" text = transcribe(audio_path, language) return text if text else gr.update() def _tell_a_story(images, instruction, language, style, child_name): """Chain: images + instruction → story text → motherly-voice audio. Each core call degrades gracefully (never raises), so the UI always shows a story even if Modal is unreachable or audio synthesis fails. Also returns a `current` dict so the Save button can capture the exact result shown. """ image_paths = [img for img in (images or [])] story, model_label = generate_story( image_paths=image_paths, instruction=instruction or "", language=language, style=style, child_name=child_name or "", ) wav_path, tts_label = speak(story, language) badge = f"📖 {model_label} · 🔊 {tts_label}" current = {"story": story, "audio": wav_path, "badge": badge} return story, wav_path, badge, current def _history_updates(history): """Flatten `history` into per-slot updates: (group, markdown, audio) × N.""" updates = [] for i in range(HISTORY_SIZE): if i < len(history): entry = history[i] body = f"{entry['story']}\n\n{entry['badge']}" updates += [ gr.update(visible=True), gr.update(value=body), gr.update(value=entry.get("audio")), ] else: updates += [ gr.update(visible=False), gr.update(value=""), gr.update(value=None), ] return updates def _save_story(current, history): """Prepend the current story to the session history (newest first, max N).""" history = list(history or []) if current and current.get("story"): history = ([current] + history)[:HISTORY_SIZE] return [history, *_history_updates(history)] def build_ui() -> gr.Blocks: theme = gr.themes.Soft( primary_hue="amber", secondary_hue="orange", neutral_hue="slate", radius_size="lg", font=[gr.themes.GoogleFont("Nunito"), "ui-sans-serif", "sans-serif"], ) css_kw = {"css_paths": [str(_CSS_PATH)]} if _CSS_PATH.exists() else {} with gr.Blocks(title="রূপকথা · Rupkotha", theme=theme, fill_width=True, **css_kw) as demo: # ── Hero ───────────────────────────────────────────────────────── gr.HTML( """
🌙

রূপকথা · Rupkotha

Show a picture, ask for a story — and hear it told in a warm motherly voice.

""" ) with gr.Row(elem_id="studio", equal_height=False): # ── Create panel ───────────────────────────────────────────── with gr.Column(scale=5, elem_classes="panel"): gr.HTML('
1Choose
') with gr.Row(): language = gr.Radio( choices=_LANGUAGES, value="en", label="Language · ভাষা", elem_classes="seg", ) style = gr.Dropdown( choices=_STYLE_CHOICES["en"], value=_STYLE_CHOICES["en"][0], label="Story style", ) gr.HTML('
2Show your pictures
') images = gr.File( file_count="multiple", type="filepath", file_types=["image"], label="Drawings or toys — 1 to 4 pictures", elem_classes="upload-box", ) preview = gr.Gallery( label="Your pictures", columns=4, height="auto", object_fit="contain", # show the whole image, don't crop/trim show_label=True, visible=False, elem_classes="preview", ) gr.HTML('
3Ask for a story
') mic = gr.Audio( sources=["microphone"], type="filepath", label="🎤 Speak your request (optional) — it fills the box below", ) instruction = gr.Textbox( label="What story do you want?", placeholder="tell me a story about my cat…", lines=2, ) child_name = gr.Textbox( label="Your name (optional)", placeholder="e.g. Rupa — woven into the story", lines=1, ) generate_btn = gr.Button( "✨ Tell me a story", variant="primary", size="lg", elem_id="generate-btn", ) # ── Story panel ────────────────────────────────────────────── with gr.Column(scale=6, elem_classes="panel story-panel"): gr.HTML('
📖 Your story
') story_out = gr.Textbox( show_label=False, lines=8, max_lines=40, # grow to fit the whole story (no inner scrollbar) autoscroll=False, placeholder="Your bedtime story will appear here… ✨", elem_classes="story-text", container=False, ) audio_out = gr.Audio(label="🔊 Listen (press play to replay)", type="filepath") badge_out = gr.Markdown(elem_classes="model-badge") save_btn = gr.Button("💾 Save this story", elem_id="save-btn") # ── Saved stories: last 3, each replayable (gr.State session memory) ─ current = gr.State(None) history = gr.State([]) gr.HTML('
🌟 Your saved stories
') slots = [] with gr.Row(elem_id="history-row", equal_height=False): for _ in range(HISTORY_SIZE): with gr.Column(scale=1, min_width=240): with gr.Group(visible=False, elem_classes="saved-card") as slot_group: slot_md = gr.Markdown(elem_classes="saved-text") slot_audio = gr.Audio(type="filepath", label="Replay") slots.append((slot_group, slot_md, slot_audio)) # ── Wiring ─────────────────────────────────────────────────────── language.change(_styles_for, inputs=language, outputs=style) # Show thumbnails of the uploaded pictures. images.change(_preview, inputs=images, outputs=preview) # Voice is a bonus: it fills the typed box, which stays primary (§2, §14). mic.stop_recording(_voice_to_text, inputs=[mic, language], outputs=instruction) generate_btn.click( _tell_a_story, inputs=[images, instruction, language, style, child_name], outputs=[story_out, audio_out, badge_out, current], ) # Flatten slots for the Save outputs: history + (group, md, audio) × N. slot_outputs = [comp for slot in slots for comp in slot] save_btn.click( _save_story, inputs=[current, history], outputs=[history, *slot_outputs], ) return demo # Module-level `demo` so Hugging Face Spaces (gradio SDK) can discover it. demo = build_ui().queue() if __name__ == "__main__": demo.launch()