Spaces:
Running
Running
File size: 9,951 Bytes
f655146 8aa73fd f655146 8aa73fd f655146 8aa73fd f655146 06223b0 f655146 06223b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | # app.py β Gradio Blocks entry point. UI + wiring only. ZERO model references.
"""Rupkotha (ΰ¦°ΰ§ΰ¦ͺΰ¦ΰ¦₯ΰ¦Ύ) β a bedtime-story app for kids.
This file orchestrates the UI and chains core functions:
transcribe() β generate_story() β speak()
It must contain no model names, paths, or model logic β those live only in core/.
Layout: a two-panel "studio" β a Create panel (language/style, pictures, ask) and a
Story panel (text + audio + save) β over a night-sky theme. Session memory uses
gr.State, never browser storage (CLAUDE.md Β§11).
"""
from pathlib import Path
import gradio as gr
from core.vision_story import generate_story
from core.stt import transcribe
from core.tts import speak
from core.prompts import STYLES
# Language radio: display label β internal code passed to core functions.
_LANGUAGES = [("English", "en"), ("বাΰ¦ΰ¦²ΰ¦Ύ", "bn")]
_STYLE_CHOICES = {lang: list(styles.keys()) for lang, styles in STYLES.items()}
_CSS_PATH = Path(__file__).parent / "assets" / "styles.css"
HISTORY_SIZE = 3 # how many recent stories to keep (CLAUDE.md Β§11: last 3)
def _styles_for(language: str):
"""Return a style-dropdown update for the chosen language."""
choices = _STYLE_CHOICES.get(language, _STYLE_CHOICES["en"])
return gr.update(choices=choices, value=choices[0])
def _preview(files):
"""Show uploaded images in the preview gallery; hide it when empty."""
files = files or []
return gr.update(value=files, visible=bool(files))
def _voice_to_text(audio_path, language):
"""Transcribe a mic recording into the instruction box. On empty/failed
transcription, leave whatever the child already typed untouched."""
text = transcribe(audio_path, language)
return text if text else gr.update()
def _tell_a_story(images, instruction, language, style, child_name):
"""Chain: images + instruction β story text β motherly-voice audio.
Each core call degrades gracefully (never raises), so the UI always shows
a story even if Modal is unreachable or audio synthesis fails. Also returns
a `current` dict so the Save button can capture the exact result shown.
"""
image_paths = [img for img in (images or [])]
story, model_label = generate_story(
image_paths=image_paths,
instruction=instruction or "",
language=language,
style=style,
child_name=child_name or "",
)
wav_path, tts_label = speak(story, language)
badge = f"π {model_label}γΒ·γπ {tts_label}"
current = {"story": story, "audio": wav_path, "badge": badge}
return story, wav_path, badge, current
def _history_updates(history):
"""Flatten `history` into per-slot updates: (group, markdown, audio) Γ N."""
updates = []
for i in range(HISTORY_SIZE):
if i < len(history):
entry = history[i]
body = f"{entry['story']}\n\n<span class='saved-badge'>{entry['badge']}</span>"
updates += [
gr.update(visible=True),
gr.update(value=body),
gr.update(value=entry.get("audio")),
]
else:
updates += [
gr.update(visible=False),
gr.update(value=""),
gr.update(value=None),
]
return updates
def _save_story(current, history):
"""Prepend the current story to the session history (newest first, max N)."""
history = list(history or [])
if current and current.get("story"):
history = ([current] + history)[:HISTORY_SIZE]
return [history, *_history_updates(history)]
def build_ui() -> gr.Blocks:
theme = gr.themes.Soft(
primary_hue="amber",
secondary_hue="orange",
neutral_hue="slate",
radius_size="lg",
font=[gr.themes.GoogleFont("Nunito"), "ui-sans-serif", "sans-serif"],
)
css_kw = {"css_paths": [str(_CSS_PATH)]} if _CSS_PATH.exists() else {}
with gr.Blocks(title="ΰ¦°ΰ§ΰ¦ͺΰ¦ΰ¦₯ΰ¦Ύ Β· Rupkotha", theme=theme, fill_width=True, **css_kw) as demo:
# ββ Hero βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
gr.HTML(
"""
<div id="hero">
<div class="hero-moon">π</div>
<h1>ΰ¦°ΰ§ΰ¦ͺΰ¦ΰ¦₯ΰ¦Ύ Β· Rupkotha</h1>
<p>Show a picture, ask for a story β and hear it told in a warm
motherly voice.</p>
</div>
"""
)
with gr.Row(elem_id="studio", equal_height=False):
# ββ Create panel βββββββββββββββββββββββββββββββββββββββββββββ
with gr.Column(scale=5, elem_classes="panel"):
gr.HTML('<div class="panel-head"><span class="step">1</span>Choose</div>')
with gr.Row():
language = gr.Radio(
choices=_LANGUAGES, value="en",
label="Language Β· ΰ¦ΰ¦Ύΰ¦·ΰ¦Ύ", elem_classes="seg",
)
style = gr.Dropdown(
choices=_STYLE_CHOICES["en"], value=_STYLE_CHOICES["en"][0],
label="Story style",
)
gr.HTML('<div class="panel-head"><span class="step">2</span>Show your pictures</div>')
images = gr.File(
file_count="multiple",
type="filepath",
file_types=["image"],
label="Drawings or toys β 1 to 4 pictures",
elem_classes="upload-box",
)
preview = gr.Gallery(
label="Your pictures",
columns=4,
height="auto",
object_fit="contain", # show the whole image, don't crop/trim
show_label=True,
visible=False,
elem_classes="preview",
)
gr.HTML('<div class="panel-head"><span class="step">3</span>Ask for a story</div>')
mic = gr.Audio(
sources=["microphone"],
type="filepath",
label="π€ Speak your request (optional) β it fills the box below",
)
instruction = gr.Textbox(
label="What story do you want?",
placeholder="tell me a story about my catβ¦",
lines=2,
)
child_name = gr.Textbox(
label="Your name (optional)",
placeholder="e.g. Rupa β woven into the story",
lines=1,
)
generate_btn = gr.Button(
"β¨ Tell me a story", variant="primary", size="lg",
elem_id="generate-btn",
)
# ββ Story panel ββββββββββββββββββββββββββββββββββββββββββββββ
with gr.Column(scale=6, elem_classes="panel story-panel"):
gr.HTML('<div class="panel-head">π Your story</div>')
story_out = gr.Textbox(
show_label=False,
lines=8,
max_lines=40, # grow to fit the whole story (no inner scrollbar)
autoscroll=False,
placeholder="Your bedtime story will appear hereβ¦ β¨",
elem_classes="story-text",
container=False,
)
audio_out = gr.Audio(label="π Listen (press play to replay)", type="filepath")
badge_out = gr.Markdown(elem_classes="model-badge")
save_btn = gr.Button("πΎ Save this story", elem_id="save-btn")
# ββ Saved stories: last 3, each replayable (gr.State session memory) β
current = gr.State(None)
history = gr.State([])
gr.HTML('<div class="section-title">π Your saved stories</div>')
slots = []
with gr.Row(elem_id="history-row", equal_height=False):
for _ in range(HISTORY_SIZE):
with gr.Column(scale=1, min_width=240):
with gr.Group(visible=False, elem_classes="saved-card") as slot_group:
slot_md = gr.Markdown(elem_classes="saved-text")
slot_audio = gr.Audio(type="filepath", label="Replay")
slots.append((slot_group, slot_md, slot_audio))
# ββ Wiring βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
language.change(_styles_for, inputs=language, outputs=style)
# Show thumbnails of the uploaded pictures.
images.change(_preview, inputs=images, outputs=preview)
# Voice is a bonus: it fills the typed box, which stays primary (Β§2, Β§14).
mic.stop_recording(_voice_to_text, inputs=[mic, language], outputs=instruction)
generate_btn.click(
_tell_a_story,
inputs=[images, instruction, language, style, child_name],
outputs=[story_out, audio_out, badge_out, current],
)
# Flatten slots for the Save outputs: history + (group, md, audio) Γ N.
slot_outputs = [comp for slot in slots for comp in slot]
save_btn.click(
_save_story,
inputs=[current, history],
outputs=[history, *slot_outputs],
)
return demo
# Module-level `demo` so Hugging Face Spaces (gradio SDK) can discover it.
demo = build_ui().queue()
if __name__ == "__main__":
demo.launch()
|