memory-bridge / app.py
Sheikh Mohammad Rakib
feat: enable public sharing in demo launch configuration
0a5baed
Raw
History Blame Contribute Delete
15.2 kB
import gradio as gr
import requests
import uuid
import base64
import json
from pathlib import Path
# ── CONFIG ────────────────────────────────────────────────────────────────────
BUILD_PERSONA_URL = "https://sheikhmdrakib-career--build-persona.modal.run"
CHAT_URL = "https://sheikhmdrakib-career--chat.modal.run"
TRANSCRIBE_URL = "https://sheikhmdrakib-career--transcribe.modal.run"
VISION_URL = "https://sheikhmdrakib-career--describe-photo.modal.run"
OCR_URL = "https://sheikhmdrakib-career--ocr-document.modal.run"
TTS_URL = "https://sheikhmdrakib-career--text-to-speech.modal.run"
LIST_PERSONAS_URL = "https://sheikhmdrakib-career--list-personas.modal.run"
# ─────────────────────────────────────────────────────────────────────────────
def encode_file(path):
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode()
def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files):
if not name.strip():
return "❌ Please enter the person's name.", None, gr.update()
texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
voice_transcripts = []
# We will build a step-by-step log to show the user exactly what succeeded/failed
status_log = []
if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
return "❌ Please provide at least one input.", None, gr.update()
# 1. Transcribe voice note (Cohere ASR)
if voice_file is not None:
try:
r = requests.post(TRANSCRIBE_URL, json={
"audio_b64": encode_file(voice_file),
"filename": Path(voice_file).name,
}, timeout=180)
if r.status_code == 200:
transcript = r.json().get("transcript", "")
if transcript:
voice_transcripts.append(transcript)
status_log.append("βœ… Voice note transcribed successfully.")
else:
status_log.append("⚠️ Voice note processed, but no text was found.")
else:
status_log.append(f"❌ Voice transcription failed (HTTP {r.status_code}): {r.text}")
except Exception as e:
status_log.append(f"❌ Voice transcription failed: {e}")
# 2. Describe uploaded photos (MiniCPM-V)
if photo_files:
success_count = 0
for i, photo in enumerate(photo_files):
try:
r = requests.post(VISION_URL, json={"image_b64": encode_file(photo)}, timeout=180)
if r.status_code == 200:
desc = r.json().get("description", "")
if desc:
captions.append(desc)
success_count += 1
else:
status_log.append(f"❌ Photo {i+1} description failed (HTTP {r.status_code}).")
except Exception as e:
status_log.append(f"❌ Photo {i+1} description failed: {e}")
if success_count > 0:
status_log.append(f"βœ… {success_count}/{len(photo_files)} photos described successfully.")
# 3. OCR scanned letters (Nemotron Parse)
if scanned_files:
success_count = 0
for i, scan in enumerate(scanned_files):
try:
r = requests.post(OCR_URL, json={"image_b64": encode_file(scan)}, timeout=180)
if r.status_code == 200:
ocr_text = r.json().get("text", "")
if ocr_text:
texts.append(ocr_text)
success_count += 1
else:
status_log.append(f"❌ Scan {i+1} OCR failed (HTTP {r.status_code}).")
except Exception as e:
status_log.append(f"❌ Scan {i+1} OCR failed: {e}")
if success_count > 0:
status_log.append(f"βœ… {success_count}/{len(scanned_files)} scanned documents read successfully.")
# Check if we have AT LEAST SOME data to build the persona
if not texts and not captions and not voice_transcripts:
status_log.append("\n❌ **ABORTED:** All AI processing failed, and no manual text/captions were provided. Cannot build persona.")
return "\n\n".join(status_log), None, gr.update()
# 4. Build persona (Qwen 32B)
persona_id = str(uuid.uuid4())[:8]
try:
r = requests.post(BUILD_PERSONA_URL, json={
"persona_id": persona_id, "name": name.strip(),
"relationship": relationship.strip(),
"texts": texts, "photo_captions": captions,
"voice_transcripts": voice_transcripts,
}, timeout=1200)
if r.status_code == 200:
result = r.json()
if result.get("success"):
persona = result["persona"]
summary = f"""\nπŸŽ‰ **{name}'s memory has been successfully preserved!**
**Persona ID:** `{persona_id}`
**Personality:** {', '.join(persona.get('personality_traits', [])[:3])}
**Language:** {persona.get('language', 'Auto')}
**Memories captured:** {len(persona.get('key_memories', []))}
Go to the **πŸ’¬ Talk** tab and enter the Persona ID."""
status_log.append(summary)
return "\n".join(status_log), persona_id, gr.update(value=persona_id)
else:
status_log.append(f"\n❌ Persona builder failed: {result}")
else:
status_log.append(f"\n❌ Persona builder failed (HTTP {r.status_code}): {r.text}")
except Exception as e:
status_log.append(f"\n❌ Persona builder failed: {e}")
# Fallback return if the final step failed
return "\n\n".join(status_log), None, gr.update()
def chat_with_persona(persona_id, message, history, language, enable_voice):
history = history or []
if not persona_id.strip():
history = history + [{"role": "assistant", "content": "⚠️ Please enter a Persona ID first."}]
return "", history, None
if not message.strip():
return "", history, None
try:
r = requests.post(CHAT_URL, json={
"persona_id": persona_id.strip(),
"history": [{"role": m["role"], "content": m["content"]} for m in history],
"message": message.strip(),
"language": language,
}, timeout=180)
result = r.json()
response_text = result.get("text", result.get("response", "..."))
voice_desc = result.get("voice_description", "warm elderly voice")
except Exception as e:
response_text = f"⚠️ Error: {e}"
voice_desc = "warm elderly voice"
history = history + [
{"role": "user", "content": message},
{"role": "assistant", "content": response_text},
]
# Generate voice response (VoxCPM2)
audio_path = None
if enable_voice:
try:
r = requests.post(TTS_URL, json={
"text": response_text,
"voice_description": voice_desc,
}, timeout=180)
if r.status_code == 200:
import tempfile
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
f.write(r.content)
audio_path = f.name
except Exception:
pass
return "", history, audio_path
def load_personas():
for attempt in range(2):
try:
r = requests.get(LIST_PERSONAS_URL, timeout=90)
personas = r.json().get("personas", [])
if not personas:
return "No personas saved yet."
lines = [f"**{p['name']}** ({p['relationship']}) β€” ID: `{p['id']}`" for p in personas]
return "\n\n".join(lines)
except Exception as e:
if attempt == 0:
continue
return f"⚠️ Modal is waking up, please try again in 30 seconds."
# ── UI ────────────────────────────────────────────────────────────────────────
css = """
@import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400;0,600;1,400&family=Source+Sans+3:wght@300;400;600&display=swap');
* { box-sizing: border-box; }
body, .gradio-container { background: #0e0b08 !important; font-family: 'Source Sans 3', sans-serif !important; color: #e8dcc8 !important; }
.gradio-container { max-width: 900px !important; margin: 0 auto !important; }
h1, h2, h3 { font-family: 'Lora', serif !important; color: #d4a96a !important; }
.header-title { text-align: center; font-family: 'Lora', serif; font-size: 2.4em; color: #d4a96a; margin: 24px 0 4px 0; }
.header-sub { text-align: center; color: #8a7560; font-size: 1em; margin-bottom: 28px; font-style: italic; }
.divider { border: none; border-top: 1px solid #2a2015; margin: 20px 0; }
label { color: #8a7560 !important; font-size: 0.85em !important; letter-spacing: 0.08em !important; text-transform: uppercase !important; }
textarea, input[type="text"] { background: #1a1510 !important; border: 1px solid #3a2e1e !important; color: #e8dcc8 !important; border-radius: 6px !important; }
.model-badge { display: inline-block; background: #1f1710; border: 1px solid #3a2e1e; border-radius: 4px; padding: 2px 8px; font-size: 0.75em; color: #8a7560; margin: 2px; }
"""
with gr.Blocks(title="Memory Keeper") as demo:
gr.HTML("""
<div class="header-title">πŸ•―οΈ Memory Keeper</div>
<div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
<hr class="divider">
<div style="text-align:center; margin-bottom:16px;">
<span class="model-badge">🧠 Qwen2.5-32B</span>
<span class="model-badge">🎀 Cohere Transcribe</span>
<span class="model-badge">πŸ‘οΈ MiniCPM-V 4.6</span>
<span class="model-badge">πŸ“„ Nemotron Parse</span>
<span class="model-badge">πŸ”Š VoxCPM2</span>
<span class="model-badge">🌍 Tiny Aya Fire</span>
</div>
""")
with gr.Tabs():
# ── TAB 1: PRESERVE ──
with gr.Tab("πŸ“œ Preserve a Memory"):
gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")
with gr.Row():
name_input = gr.Textbox(label="Their Name", placeholder="e.g. Dadu, Nana, Abba...")
relationship_input = gr.Textbox(label="Your Relationship", placeholder="e.g. Grandfather, Mother...")
text_input = gr.Textbox(
label="πŸ“ Letters / Diary Entries / Writings",
placeholder="Paste their writings here. Separate multiple entries with ---",
lines=6,
)
with gr.Row():
photo_files = gr.File(
label="πŸ–ΌοΈ Photos (MiniCPM-V 4.6 will describe them)",
file_count="multiple", file_types=["image"],
)
scanned_files = gr.File(
label="πŸ“„ Scanned Letters/Docs (Nemotron Parse OCR)",
file_count="multiple", file_types=["image"],
)
photo_captions = gr.Textbox(
label="πŸ–ΌοΈ Manual Photo Captions (optional, one per line)",
placeholder="Or describe photos manually here...",
lines=3,
)
voice_input = gr.Audio(
label="🎀 Voice Note (Cohere Transcribe ASR)",
type="filepath", sources=["upload", "microphone"],
)
build_btn = gr.Button("✨ Preserve Their Memory", variant="primary")
build_output = gr.Markdown()
persona_id_state = gr.State()
persona_id_hidden = gr.Textbox(visible=False)
build_btn.click(
fn=build_persona,
inputs=[name_input, relationship_input, text_input, photo_captions,
voice_input, photo_files, scanned_files],
outputs=[build_output, persona_id_state, persona_id_hidden],
show_progress="full",
)
# ── TAB 2: TALK ──
with gr.Tab("πŸ’¬ Talk to Them"):
gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Enter the Persona ID and start a conversation. Enable voice to hear them speak.</p>")
with gr.Row():
persona_id_input = gr.Textbox(label="Persona ID", placeholder="e.g. a3f9c2b1")
language_select = gr.Dropdown(
label="Language", choices=["auto", "English", "Bengali", "Hindi", "Chinese", "Japanese", "Korean", "Thai"], value="auto",
)
enable_voice = gr.Checkbox(label="πŸ”Š Voice Response (VoxCPM2)", value=False)
chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*")
with gr.Row():
msg_input = gr.Textbox(label="Your message", placeholder="What would you like to say?", lines=2, scale=4)
send_btn = gr.Button("Send β†’", variant="primary", scale=1)
voice_output = gr.Audio(label="πŸ”Š Voice Response", visible=True, autoplay=True)
clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")
chat_history = gr.State([])
send_btn.click(
fn=chat_with_persona,
inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice],
outputs=[msg_input, chatbot, voice_output],
)
msg_input.submit(
fn=chat_with_persona,
inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice],
outputs=[msg_input, chatbot, voice_output],
)
clear_btn.click(lambda: ([], []), outputs=[chat_history, chatbot])
# ── TAB 3: SAVED ──
with gr.Tab("πŸ“ Saved Memories"):
refresh_btn = gr.Button("πŸ”„ Load Saved Memories", variant="secondary")
personas_output = gr.Markdown()
refresh_btn.click(fn=load_personas, outputs=personas_output)
gr.HTML("""
<hr class="divider">
<p style='text-align:center; color:#3a2e1e; font-size:0.8em; font-style:italic;'>
Built for Build Small Hackathon Β· 6 AI Models Β· Hosted on Modal + Hugging Face
</p>
""")
if __name__ == "__main__":
demo.launch(css=css, share=True)