Spaces:
Running
Running
| # app.py β Gradio Blocks entry point. UI + wiring only. ZERO model references. | |
| """Rupkotha (ΰ¦°ΰ§ΰ¦ͺΰ¦ΰ¦₯ΰ¦Ύ) β a bedtime-story app for kids. | |
| This file orchestrates the UI and chains core functions: | |
| transcribe() β generate_story() β speak() | |
| It must contain no model names, paths, or model logic β those live only in core/. | |
| Layout: a two-panel "studio" β a Create panel (language/style, pictures, ask) and a | |
| Story panel (text + audio + save) β over a night-sky theme. Session memory uses | |
| gr.State, never browser storage (CLAUDE.md Β§11). | |
| """ | |
| from pathlib import Path | |
| import gradio as gr | |
| from core.vision_story import generate_story | |
| from core.stt import transcribe | |
| from core.tts import speak | |
| from core.prompts import STYLES | |
| # Language radio: display label β internal code passed to core functions. | |
| _LANGUAGES = [("English", "en"), ("বাΰ¦ΰ¦²ΰ¦Ύ", "bn")] | |
| _STYLE_CHOICES = {lang: list(styles.keys()) for lang, styles in STYLES.items()} | |
| _CSS_PATH = Path(__file__).parent / "assets" / "styles.css" | |
| HISTORY_SIZE = 3 # how many recent stories to keep (CLAUDE.md Β§11: last 3) | |
| def _styles_for(language: str): | |
| """Return a style-dropdown update for the chosen language.""" | |
| choices = _STYLE_CHOICES.get(language, _STYLE_CHOICES["en"]) | |
| return gr.update(choices=choices, value=choices[0]) | |
| def _preview(files): | |
| """Show uploaded images in the preview gallery; hide it when empty.""" | |
| files = files or [] | |
| return gr.update(value=files, visible=bool(files)) | |
| def _voice_to_text(audio_path, language): | |
| """Transcribe a mic recording into the instruction box. On empty/failed | |
| transcription, leave whatever the child already typed untouched.""" | |
| text = transcribe(audio_path, language) | |
| return text if text else gr.update() | |
| def _tell_a_story(images, instruction, language, style, child_name): | |
| """Chain: images + instruction β story text β motherly-voice audio. | |
| Each core call degrades gracefully (never raises), so the UI always shows | |
| a story even if Modal is unreachable or audio synthesis fails. Also returns | |
| a `current` dict so the Save button can capture the exact result shown. | |
| """ | |
| image_paths = [img for img in (images or [])] | |
| story, model_label = generate_story( | |
| image_paths=image_paths, | |
| instruction=instruction or "", | |
| language=language, | |
| style=style, | |
| child_name=child_name or "", | |
| ) | |
| wav_path, tts_label = speak(story, language) | |
| badge = f"π {model_label}γΒ·γπ {tts_label}" | |
| current = {"story": story, "audio": wav_path, "badge": badge} | |
| return story, wav_path, badge, current | |
| def _history_updates(history): | |
| """Flatten `history` into per-slot updates: (group, markdown, audio) Γ N.""" | |
| updates = [] | |
| for i in range(HISTORY_SIZE): | |
| if i < len(history): | |
| entry = history[i] | |
| body = f"{entry['story']}\n\n<span class='saved-badge'>{entry['badge']}</span>" | |
| updates += [ | |
| gr.update(visible=True), | |
| gr.update(value=body), | |
| gr.update(value=entry.get("audio")), | |
| ] | |
| else: | |
| updates += [ | |
| gr.update(visible=False), | |
| gr.update(value=""), | |
| gr.update(value=None), | |
| ] | |
| return updates | |
| def _save_story(current, history): | |
| """Prepend the current story to the session history (newest first, max N).""" | |
| history = list(history or []) | |
| if current and current.get("story"): | |
| history = ([current] + history)[:HISTORY_SIZE] | |
| return [history, *_history_updates(history)] | |
| def build_ui() -> gr.Blocks: | |
| theme = gr.themes.Soft( | |
| primary_hue="amber", | |
| secondary_hue="orange", | |
| neutral_hue="slate", | |
| radius_size="lg", | |
| font=[gr.themes.GoogleFont("Nunito"), "ui-sans-serif", "sans-serif"], | |
| ) | |
| css_kw = {"css_paths": [str(_CSS_PATH)]} if _CSS_PATH.exists() else {} | |
| with gr.Blocks(title="ΰ¦°ΰ§ΰ¦ͺΰ¦ΰ¦₯ΰ¦Ύ Β· Rupkotha", theme=theme, fill_width=True, **css_kw) as demo: | |
| # ββ Hero βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML( | |
| """ | |
| <div id="hero"> | |
| <div class="hero-moon">π</div> | |
| <h1>ΰ¦°ΰ§ΰ¦ͺΰ¦ΰ¦₯ΰ¦Ύ Β· Rupkotha</h1> | |
| <p>Show a picture, ask for a story β and hear it told in a warm | |
| motherly voice.</p> | |
| </div> | |
| """ | |
| ) | |
| with gr.Row(elem_id="studio", equal_height=False): | |
| # ββ Create panel βββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=5, elem_classes="panel"): | |
| gr.HTML('<div class="panel-head"><span class="step">1</span>Choose</div>') | |
| with gr.Row(): | |
| language = gr.Radio( | |
| choices=_LANGUAGES, value="en", | |
| label="Language Β· ΰ¦ΰ¦Ύΰ¦·ΰ¦Ύ", elem_classes="seg", | |
| ) | |
| style = gr.Dropdown( | |
| choices=_STYLE_CHOICES["en"], value=_STYLE_CHOICES["en"][0], | |
| label="Story style", | |
| ) | |
| gr.HTML('<div class="panel-head"><span class="step">2</span>Show your pictures</div>') | |
| images = gr.File( | |
| file_count="multiple", | |
| type="filepath", | |
| file_types=["image"], | |
| label="Drawings or toys β 1 to 4 pictures", | |
| elem_classes="upload-box", | |
| ) | |
| preview = gr.Gallery( | |
| label="Your pictures", | |
| columns=4, | |
| height="auto", | |
| object_fit="contain", # show the whole image, don't crop/trim | |
| show_label=True, | |
| visible=False, | |
| elem_classes="preview", | |
| ) | |
| gr.HTML('<div class="panel-head"><span class="step">3</span>Ask for a story</div>') | |
| mic = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| label="π€ Speak your request (optional) β it fills the box below", | |
| ) | |
| instruction = gr.Textbox( | |
| label="What story do you want?", | |
| placeholder="tell me a story about my catβ¦", | |
| lines=2, | |
| ) | |
| child_name = gr.Textbox( | |
| label="Your name (optional)", | |
| placeholder="e.g. Rupa β woven into the story", | |
| lines=1, | |
| ) | |
| generate_btn = gr.Button( | |
| "β¨ Tell me a story", variant="primary", size="lg", | |
| elem_id="generate-btn", | |
| ) | |
| # ββ Story panel ββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Column(scale=6, elem_classes="panel story-panel"): | |
| gr.HTML('<div class="panel-head">π Your story</div>') | |
| story_out = gr.Textbox( | |
| show_label=False, | |
| lines=8, | |
| max_lines=40, # grow to fit the whole story (no inner scrollbar) | |
| autoscroll=False, | |
| placeholder="Your bedtime story will appear hereβ¦ β¨", | |
| elem_classes="story-text", | |
| container=False, | |
| ) | |
| audio_out = gr.Audio(label="π Listen (press play to replay)", type="filepath") | |
| badge_out = gr.Markdown(elem_classes="model-badge") | |
| save_btn = gr.Button("πΎ Save this story", elem_id="save-btn") | |
| # ββ Saved stories: last 3, each replayable (gr.State session memory) β | |
| current = gr.State(None) | |
| history = gr.State([]) | |
| gr.HTML('<div class="section-title">π Your saved stories</div>') | |
| slots = [] | |
| with gr.Row(elem_id="history-row", equal_height=False): | |
| for _ in range(HISTORY_SIZE): | |
| with gr.Column(scale=1, min_width=240): | |
| with gr.Group(visible=False, elem_classes="saved-card") as slot_group: | |
| slot_md = gr.Markdown(elem_classes="saved-text") | |
| slot_audio = gr.Audio(type="filepath", label="Replay") | |
| slots.append((slot_group, slot_md, slot_audio)) | |
| # ββ Wiring βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| language.change(_styles_for, inputs=language, outputs=style) | |
| # Show thumbnails of the uploaded pictures. | |
| images.change(_preview, inputs=images, outputs=preview) | |
| # Voice is a bonus: it fills the typed box, which stays primary (Β§2, Β§14). | |
| mic.stop_recording(_voice_to_text, inputs=[mic, language], outputs=instruction) | |
| generate_btn.click( | |
| _tell_a_story, | |
| inputs=[images, instruction, language, style, child_name], | |
| outputs=[story_out, audio_out, badge_out, current], | |
| ) | |
| # Flatten slots for the Save outputs: history + (group, md, audio) Γ N. | |
| slot_outputs = [comp for slot in slots for comp in slot] | |
| save_btn.click( | |
| _save_story, | |
| inputs=[current, history], | |
| outputs=[history, *slot_outputs], | |
| ) | |
| return demo | |
| # Module-level `demo` so Hugging Face Spaces (gradio SDK) can discover it. | |
| demo = build_ui().queue() | |
| if __name__ == "__main__": | |
| demo.launch() | |