Spaces:

build-small-hackathon
/

rupkotha

Running

Deb

Prepare for HF Space: disable mock, module-level demo, README

8aa73fd 14 days ago

9.95 kB

	# app.py — Gradio Blocks entry point. UI + wiring only. ZERO model references.
	"""Rupkotha (রূপকথা) — a bedtime-story app for kids.

	This file orchestrates the UI and chains core functions:
	transcribe() → generate_story() → speak()
	It must contain no model names, paths, or model logic — those live only in core/.

	Layout: a two-panel "studio" — a Create panel (language/style, pictures, ask) and a
	Story panel (text + audio + save) — over a night-sky theme. Session memory uses
	gr.State, never browser storage (CLAUDE.md §11).
	"""

	from pathlib import Path

	import gradio as gr

	from core.vision_story import generate_story
	from core.stt import transcribe
	from core.tts import speak
	from core.prompts import STYLES

	# Language radio: display label → internal code passed to core functions.
	_LANGUAGES = [("English", "en"), ("বাংলা", "bn")]
	_STYLE_CHOICES = {lang: list(styles.keys()) for lang, styles in STYLES.items()}

	_CSS_PATH = Path(__file__).parent / "assets" / "styles.css"

	HISTORY_SIZE = 3 # how many recent stories to keep (CLAUDE.md §11: last 3)


	def _styles_for(language: str):
	"""Return a style-dropdown update for the chosen language."""
	choices = _STYLE_CHOICES.get(language, _STYLE_CHOICES["en"])
	return gr.update(choices=choices, value=choices[0])


	def _preview(files):
	"""Show uploaded images in the preview gallery; hide it when empty."""
	files = files or []
	return gr.update(value=files, visible=bool(files))


	def _voice_to_text(audio_path, language):
	"""Transcribe a mic recording into the instruction box. On empty/failed
	transcription, leave whatever the child already typed untouched."""
	text = transcribe(audio_path, language)
	return text if text else gr.update()


	def _tell_a_story(images, instruction, language, style, child_name):
	"""Chain: images + instruction → story text → motherly-voice audio.

	Each core call degrades gracefully (never raises), so the UI always shows
	a story even if Modal is unreachable or audio synthesis fails. Also returns
	a `current` dict so the Save button can capture the exact result shown.
	"""
	image_paths = [img for img in (images or [])]
	story, model_label = generate_story(
	image_paths=image_paths,
	instruction=instruction or "",
	language=language,
	style=style,
	child_name=child_name or "",
	)
	wav_path, tts_label = speak(story, language)
	badge = f"📖 {model_label}　·　🔊 {tts_label}"
	current = {"story": story, "audio": wav_path, "badge": badge}
	return story, wav_path, badge, current


	def _history_updates(history):
	"""Flatten `history` into per-slot updates: (group, markdown, audio) × N."""
	updates = []
	for i in range(HISTORY_SIZE):
	if i < len(history):
	entry = history[i]
	body = f"{entry['story']}\n\n<span class='saved-badge'>{entry['badge']}</span>"
	updates += [
	gr.update(visible=True),
	gr.update(value=body),
	gr.update(value=entry.get("audio")),
	]
	else:
	updates += [
	gr.update(visible=False),
	gr.update(value=""),
	gr.update(value=None),
	]
	return updates


	def _save_story(current, history):
	"""Prepend the current story to the session history (newest first, max N)."""
	history = list(history or [])
	if current and current.get("story"):
	history = ([current] + history)[:HISTORY_SIZE]
	return [history, *_history_updates(history)]


	def build_ui() -> gr.Blocks:
	theme = gr.themes.Soft(
	primary_hue="amber",
	secondary_hue="orange",
	neutral_hue="slate",
	radius_size="lg",
	font=[gr.themes.GoogleFont("Nunito"), "ui-sans-serif", "sans-serif"],
	)
	css_kw = {"css_paths": [str(_CSS_PATH)]} if _CSS_PATH.exists() else {}
	with gr.Blocks(title="রূপকথা · Rupkotha", theme=theme, fill_width=True, **css_kw) as demo:
	# ── Hero ─────────────────────────────────────────────────────────
	gr.HTML(
	"""
	<div id="hero">
	<div class="hero-moon">🌙</div>
	<h1>রূপকথা · Rupkotha</h1>
	<p>Show a picture, ask for a story — and hear it told in a warm
	motherly voice.</p>
	</div>
	"""
	)

	with gr.Row(elem_id="studio", equal_height=False):
	# ── Create panel ─────────────────────────────────────────────
	with gr.Column(scale=5, elem_classes="panel"):
	gr.HTML('<div class="panel-head"><span class="step">1</span>Choose</div>')
	with gr.Row():
	language = gr.Radio(
	choices=_LANGUAGES, value="en",
	label="Language · ভাষা", elem_classes="seg",
	)
	style = gr.Dropdown(
	choices=_STYLE_CHOICES["en"], value=_STYLE_CHOICES["en"][0],
	label="Story style",
	)

	gr.HTML('<div class="panel-head"><span class="step">2</span>Show your pictures</div>')
	images = gr.File(
	file_count="multiple",
	type="filepath",
	file_types=["image"],
	label="Drawings or toys — 1 to 4 pictures",
	elem_classes="upload-box",
	)
	preview = gr.Gallery(
	label="Your pictures",
	columns=4,
	height="auto",
	object_fit="contain", # show the whole image, don't crop/trim
	show_label=True,
	visible=False,
	elem_classes="preview",
	)

	gr.HTML('<div class="panel-head"><span class="step">3</span>Ask for a story</div>')
	mic = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="🎤 Speak your request (optional) — it fills the box below",
	)
	instruction = gr.Textbox(
	label="What story do you want?",
	placeholder="tell me a story about my cat…",
	lines=2,
	)
	child_name = gr.Textbox(
	label="Your name (optional)",
	placeholder="e.g. Rupa — woven into the story",
	lines=1,
	)
	generate_btn = gr.Button(
	"✨ Tell me a story", variant="primary", size="lg",
	elem_id="generate-btn",
	)

	# ── Story panel ──────────────────────────────────────────────
	with gr.Column(scale=6, elem_classes="panel story-panel"):
	gr.HTML('<div class="panel-head">📖 Your story</div>')
	story_out = gr.Textbox(
	show_label=False,
	lines=8,
	max_lines=40, # grow to fit the whole story (no inner scrollbar)
	autoscroll=False,
	placeholder="Your bedtime story will appear here… ✨",
	elem_classes="story-text",
	container=False,
	)
	audio_out = gr.Audio(label="🔊 Listen (press play to replay)", type="filepath")
	badge_out = gr.Markdown(elem_classes="model-badge")
	save_btn = gr.Button("💾 Save this story", elem_id="save-btn")

	# ── Saved stories: last 3, each replayable (gr.State session memory) ─
	current = gr.State(None)
	history = gr.State([])
	gr.HTML('<div class="section-title">🌟 Your saved stories</div>')
	slots = []
	with gr.Row(elem_id="history-row", equal_height=False):
	for _ in range(HISTORY_SIZE):
	with gr.Column(scale=1, min_width=240):
	with gr.Group(visible=False, elem_classes="saved-card") as slot_group:
	slot_md = gr.Markdown(elem_classes="saved-text")
	slot_audio = gr.Audio(type="filepath", label="Replay")
	slots.append((slot_group, slot_md, slot_audio))

	# ── Wiring ───────────────────────────────────────────────────────
	language.change(_styles_for, inputs=language, outputs=style)

	# Show thumbnails of the uploaded pictures.
	images.change(_preview, inputs=images, outputs=preview)

	# Voice is a bonus: it fills the typed box, which stays primary (§2, §14).
	mic.stop_recording(_voice_to_text, inputs=[mic, language], outputs=instruction)

	generate_btn.click(
	_tell_a_story,
	inputs=[images, instruction, language, style, child_name],
	outputs=[story_out, audio_out, badge_out, current],
	)

	# Flatten slots for the Save outputs: history + (group, md, audio) × N.
	slot_outputs = [comp for slot in slots for comp in slot]
	save_btn.click(
	_save_story,
	inputs=[current, history],
	outputs=[history, *slot_outputs],
	)
	return demo


	# Module-level `demo` so Hugging Face Spaces (gradio SDK) can discover it.
	demo = build_ui().queue()

	if __name__ == "__main__":
	demo.launch()