Spaces:

build-small-hackathon
/

memory-bridge

Runtime error

Sheikh Mohammad Rakib

feat: enable public sharing in demo launch configuration

0a5baed 13 days ago

15.2 kB

	import gradio as gr
	import requests
	import uuid
	import base64
	import json
	from pathlib import Path

	# ── CONFIG ────────────────────────────────────────────────────────────────────
	BUILD_PERSONA_URL = "https://sheikhmdrakib-career--build-persona.modal.run"
	CHAT_URL = "https://sheikhmdrakib-career--chat.modal.run"
	TRANSCRIBE_URL = "https://sheikhmdrakib-career--transcribe.modal.run"
	VISION_URL = "https://sheikhmdrakib-career--describe-photo.modal.run"
	OCR_URL = "https://sheikhmdrakib-career--ocr-document.modal.run"
	TTS_URL = "https://sheikhmdrakib-career--text-to-speech.modal.run"
	LIST_PERSONAS_URL = "https://sheikhmdrakib-career--list-personas.modal.run"
	# ─────────────────────────────────────────────────────────────────────────────


	def encode_file(path):
	with open(path, "rb") as f:
	return base64.b64encode(f.read()).decode()


	def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files):
	if not name.strip():
	return "❌ Please enter the person's name.", None, gr.update()

	texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
	captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
	voice_transcripts = []

	# We will build a step-by-step log to show the user exactly what succeeded/failed
	status_log = []

	if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
	return "❌ Please provide at least one input.", None, gr.update()

	# 1. Transcribe voice note (Cohere ASR)
	if voice_file is not None:
	try:
	r = requests.post(TRANSCRIBE_URL, json={
	"audio_b64": encode_file(voice_file),
	"filename": Path(voice_file).name,
	}, timeout=180)

	if r.status_code == 200:
	transcript = r.json().get("transcript", "")
	if transcript:
	voice_transcripts.append(transcript)
	status_log.append("✅ Voice note transcribed successfully.")
	else:
	status_log.append("⚠️ Voice note processed, but no text was found.")
	else:
	status_log.append(f"❌ Voice transcription failed (HTTP {r.status_code}): {r.text}")
	except Exception as e:
	status_log.append(f"❌ Voice transcription failed: {e}")

	# 2. Describe uploaded photos (MiniCPM-V)
	if photo_files:
	success_count = 0
	for i, photo in enumerate(photo_files):
	try:
	r = requests.post(VISION_URL, json={"image_b64": encode_file(photo)}, timeout=180)
	if r.status_code == 200:
	desc = r.json().get("description", "")
	if desc:
	captions.append(desc)
	success_count += 1
	else:
	status_log.append(f"❌ Photo {i+1} description failed (HTTP {r.status_code}).")
	except Exception as e:
	status_log.append(f"❌ Photo {i+1} description failed: {e}")
	if success_count > 0:
	status_log.append(f"✅ {success_count}/{len(photo_files)} photos described successfully.")

	# 3. OCR scanned letters (Nemotron Parse)
	if scanned_files:
	success_count = 0
	for i, scan in enumerate(scanned_files):
	try:
	r = requests.post(OCR_URL, json={"image_b64": encode_file(scan)}, timeout=180)
	if r.status_code == 200:
	ocr_text = r.json().get("text", "")
	if ocr_text:
	texts.append(ocr_text)
	success_count += 1
	else:
	status_log.append(f"❌ Scan {i+1} OCR failed (HTTP {r.status_code}).")
	except Exception as e:
	status_log.append(f"❌ Scan {i+1} OCR failed: {e}")
	if success_count > 0:
	status_log.append(f"✅ {success_count}/{len(scanned_files)} scanned documents read successfully.")

	# Check if we have AT LEAST SOME data to build the persona
	if not texts and not captions and not voice_transcripts:
	status_log.append("\n❌ ABORTED: All AI processing failed, and no manual text/captions were provided. Cannot build persona.")
	return "\n\n".join(status_log), None, gr.update()

	# 4. Build persona (Qwen 32B)
	persona_id = str(uuid.uuid4())[:8]
	try:
	r = requests.post(BUILD_PERSONA_URL, json={
	"persona_id": persona_id, "name": name.strip(),
	"relationship": relationship.strip(),
	"texts": texts, "photo_captions": captions,
	"voice_transcripts": voice_transcripts,
	}, timeout=1200)

	if r.status_code == 200:
	result = r.json()
	if result.get("success"):
	persona = result["persona"]
	summary = f"""\n🎉 {name}'s memory has been successfully preserved!

	Persona ID: `{persona_id}`
	Personality: {', '.join(persona.get('personality_traits', [])[:3])}
	Language: {persona.get('language', 'Auto')}
	Memories captured: {len(persona.get('key_memories', []))}

	Go to the 💬 Talk tab and enter the Persona ID."""
	status_log.append(summary)
	return "\n".join(status_log), persona_id, gr.update(value=persona_id)
	else:
	status_log.append(f"\n❌ Persona builder failed: {result}")
	else:
	status_log.append(f"\n❌ Persona builder failed (HTTP {r.status_code}): {r.text}")

	except Exception as e:
	status_log.append(f"\n❌ Persona builder failed: {e}")

	# Fallback return if the final step failed
	return "\n\n".join(status_log), None, gr.update()


	def chat_with_persona(persona_id, message, history, language, enable_voice):
	history = history or []

	if not persona_id.strip():
	history = history + [{"role": "assistant", "content": "⚠️ Please enter a Persona ID first."}]
	return "", history, None

	if not message.strip():
	return "", history, None

	try:
	r = requests.post(CHAT_URL, json={
	"persona_id": persona_id.strip(),
	"history": [{"role": m["role"], "content": m["content"]} for m in history],
	"message": message.strip(),
	"language": language,
	}, timeout=180)
	result = r.json()
	response_text = result.get("text", result.get("response", "..."))
	voice_desc = result.get("voice_description", "warm elderly voice")
	except Exception as e:
	response_text = f"⚠️ Error: {e}"
	voice_desc = "warm elderly voice"

	history = history + [
	{"role": "user", "content": message},
	{"role": "assistant", "content": response_text},
	]

	# Generate voice response (VoxCPM2)
	audio_path = None
	if enable_voice:
	try:
	r = requests.post(TTS_URL, json={
	"text": response_text,
	"voice_description": voice_desc,
	}, timeout=180)
	if r.status_code == 200:
	import tempfile
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
	f.write(r.content)
	audio_path = f.name
	except Exception:
	pass

	return "", history, audio_path


	def load_personas():
	for attempt in range(2):
	try:
	r = requests.get(LIST_PERSONAS_URL, timeout=90)
	personas = r.json().get("personas", [])
	if not personas:
	return "No personas saved yet."
	lines = [f"{p['name']} ({p['relationship']}) — ID: `{p['id']}`" for p in personas]
	return "\n\n".join(lines)
	except Exception as e:
	if attempt == 0:
	continue
	return f"⚠️ Modal is waking up, please try again in 30 seconds."


	# ── UI ────────────────────────────────────────────────────────────────────────

	css = """
	@import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400;0,600;1,400&family=Source+Sans+3:wght@300;400;600&display=swap');
	* { box-sizing: border-box; }
	body, .gradio-container { background: #0e0b08 !important; font-family: 'Source Sans 3', sans-serif !important; color: #e8dcc8 !important; }
	.gradio-container { max-width: 900px !important; margin: 0 auto !important; }
	h1, h2, h3 { font-family: 'Lora', serif !important; color: #d4a96a !important; }
	.header-title { text-align: center; font-family: 'Lora', serif; font-size: 2.4em; color: #d4a96a; margin: 24px 0 4px 0; }
	.header-sub { text-align: center; color: #8a7560; font-size: 1em; margin-bottom: 28px; font-style: italic; }
	.divider { border: none; border-top: 1px solid #2a2015; margin: 20px 0; }
	label { color: #8a7560 !important; font-size: 0.85em !important; letter-spacing: 0.08em !important; text-transform: uppercase !important; }
	textarea, input[type="text"] { background: #1a1510 !important; border: 1px solid #3a2e1e !important; color: #e8dcc8 !important; border-radius: 6px !important; }
	.model-badge { display: inline-block; background: #1f1710; border: 1px solid #3a2e1e; border-radius: 4px; padding: 2px 8px; font-size: 0.75em; color: #8a7560; margin: 2px; }
	"""

	with gr.Blocks(title="Memory Keeper") as demo:

	gr.HTML("""
	<div class="header-title">🕯️ Memory Keeper</div>
	<div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
	<hr class="divider">
	<div style="text-align:center; margin-bottom:16px;">
	<span class="model-badge">🧠 Qwen2.5-32B</span>
	<span class="model-badge">🎤 Cohere Transcribe</span>
	<span class="model-badge">👁️ MiniCPM-V 4.6</span>
	<span class="model-badge">📄 Nemotron Parse</span>
	<span class="model-badge">🔊 VoxCPM2</span>
	<span class="model-badge">🌍 Tiny Aya Fire</span>
	</div>
	""")

	with gr.Tabs():

	# ── TAB 1: PRESERVE ──
	with gr.Tab("📜 Preserve a Memory"):
	gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")

	with gr.Row():
	name_input = gr.Textbox(label="Their Name", placeholder="e.g. Dadu, Nana, Abba...")
	relationship_input = gr.Textbox(label="Your Relationship", placeholder="e.g. Grandfather, Mother...")

	text_input = gr.Textbox(
	label="📝 Letters / Diary Entries / Writings",
	placeholder="Paste their writings here. Separate multiple entries with ---",
	lines=6,
	)

	with gr.Row():
	photo_files = gr.File(
	label="🖼️ Photos (MiniCPM-V 4.6 will describe them)",
	file_count="multiple", file_types=["image"],
	)
	scanned_files = gr.File(
	label="📄 Scanned Letters/Docs (Nemotron Parse OCR)",
	file_count="multiple", file_types=["image"],
	)

	photo_captions = gr.Textbox(
	label="🖼️ Manual Photo Captions (optional, one per line)",
	placeholder="Or describe photos manually here...",
	lines=3,
	)

	voice_input = gr.Audio(
	label="🎤 Voice Note (Cohere Transcribe ASR)",
	type="filepath", sources=["upload", "microphone"],
	)

	build_btn = gr.Button("✨ Preserve Their Memory", variant="primary")
	build_output = gr.Markdown()
	persona_id_state = gr.State()
	persona_id_hidden = gr.Textbox(visible=False)

	build_btn.click(
	fn=build_persona,
	inputs=[name_input, relationship_input, text_input, photo_captions,
	voice_input, photo_files, scanned_files],
	outputs=[build_output, persona_id_state, persona_id_hidden],
	show_progress="full",
	)

	# ── TAB 2: TALK ──
	with gr.Tab("💬 Talk to Them"):
	gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Enter the Persona ID and start a conversation. Enable voice to hear them speak.</p>")

	with gr.Row():
	persona_id_input = gr.Textbox(label="Persona ID", placeholder="e.g. a3f9c2b1")
	language_select = gr.Dropdown(
	label="Language", choices=["auto", "English", "Bengali", "Hindi", "Chinese", "Japanese", "Korean", "Thai"], value="auto",
	)
	enable_voice = gr.Checkbox(label="🔊 Voice Response (VoxCPM2)", value=False)

	chatbot = gr.Chatbot(label="", height=420, placeholder="Their words will appear here...")

	with gr.Row():
	msg_input = gr.Textbox(label="Your message", placeholder="What would you like to say?", lines=2, scale=4)
	send_btn = gr.Button("Send →", variant="primary", scale=1)

	voice_output = gr.Audio(label="🔊 Voice Response", visible=True, autoplay=True)
	clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")
	chat_history = gr.State([])

	send_btn.click(
	fn=chat_with_persona,
	inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice],
	outputs=[msg_input, chatbot, voice_output],
	)
	msg_input.submit(
	fn=chat_with_persona,
	inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice],
	outputs=[msg_input, chatbot, voice_output],
	)
	clear_btn.click(lambda: ([], []), outputs=[chat_history, chatbot])

	# ── TAB 3: SAVED ──
	with gr.Tab("📁 Saved Memories"):
	refresh_btn = gr.Button("🔄 Load Saved Memories", variant="secondary")
	personas_output = gr.Markdown()
	refresh_btn.click(fn=load_personas, outputs=personas_output)

	gr.HTML("""
	<hr class="divider">
	<p style='text-align:center; color:#3a2e1e; font-size:0.8em; font-style:italic;'>
	Built for Build Small Hackathon · 6 AI Models · Hosted on Modal + Hugging Face
	</p>
	""")

	if __name__ == "__main__":
	demo.launch(css=css, share=True)