Spaces:

build-small-hackathon
/

memory-bridge

Runtime error

File size: 15,240 Bytes

import gradio as gr
import requests
import uuid
import base64
import json
from pathlib import Path

# ── CONFIG ────────────────────────────────────────────────────────────────────
BUILD_PERSONA_URL = "https://sheikhmdrakib-career--build-persona.modal.run"
CHAT_URL          = "https://sheikhmdrakib-career--chat.modal.run"
TRANSCRIBE_URL    = "https://sheikhmdrakib-career--transcribe.modal.run"
VISION_URL        = "https://sheikhmdrakib-career--describe-photo.modal.run"
OCR_URL           = "https://sheikhmdrakib-career--ocr-document.modal.run"
TTS_URL           = "https://sheikhmdrakib-career--text-to-speech.modal.run"
LIST_PERSONAS_URL = "https://sheikhmdrakib-career--list-personas.modal.run"
# ─────────────────────────────────────────────────────────────────────────────


def encode_file(path):
    with open(path, "rb") as f:
        return base64.b64encode(f.read()).decode()


def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files):
    if not name.strip():
        return "❌ Please enter the person's name.", None, gr.update()

    texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
    captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
    voice_transcripts = []
    
    # We will build a step-by-step log to show the user exactly what succeeded/failed
    status_log = []

    if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
        return "❌ Please provide at least one input.", None, gr.update()

    # 1. Transcribe voice note (Cohere ASR)
    if voice_file is not None:
        try:
            r = requests.post(TRANSCRIBE_URL, json={
                "audio_b64": encode_file(voice_file),
                "filename": Path(voice_file).name,
            }, timeout=180)
            
            if r.status_code == 200:
                transcript = r.json().get("transcript", "")
                if transcript:
                    voice_transcripts.append(transcript)
                    status_log.append("✅ Voice note transcribed successfully.")
                else:
                    status_log.append("⚠️ Voice note processed, but no text was found.")
            else:
                status_log.append(f"❌ Voice transcription failed (HTTP {r.status_code}): {r.text}")
        except Exception as e:
            status_log.append(f"❌ Voice transcription failed: {e}")

    # 2. Describe uploaded photos (MiniCPM-V)
    if photo_files:
        success_count = 0
        for i, photo in enumerate(photo_files):
            try:
                r = requests.post(VISION_URL, json={"image_b64": encode_file(photo)}, timeout=180)
                if r.status_code == 200:
                    desc = r.json().get("description", "")
                    if desc:
                        captions.append(desc)
                        success_count += 1
                else:
                    status_log.append(f"❌ Photo {i+1} description failed (HTTP {r.status_code}).")
            except Exception as e:
                status_log.append(f"❌ Photo {i+1} description failed: {e}")
        if success_count > 0:
            status_log.append(f"✅ {success_count}/{len(photo_files)} photos described successfully.")

    # 3. OCR scanned letters (Nemotron Parse)
    if scanned_files:
        success_count = 0
        for i, scan in enumerate(scanned_files):
            try:
                r = requests.post(OCR_URL, json={"image_b64": encode_file(scan)}, timeout=180)
                if r.status_code == 200:
                    ocr_text = r.json().get("text", "")
                    if ocr_text:
                        texts.append(ocr_text)
                        success_count += 1
                else:
                    status_log.append(f"❌ Scan {i+1} OCR failed (HTTP {r.status_code}).")
            except Exception as e:
                status_log.append(f"❌ Scan {i+1} OCR failed: {e}")
        if success_count > 0:
            status_log.append(f"✅ {success_count}/{len(scanned_files)} scanned documents read successfully.")

    # Check if we have AT LEAST SOME data to build the persona
    if not texts and not captions and not voice_transcripts:
        status_log.append("\n❌ **ABORTED:** All AI processing failed, and no manual text/captions were provided. Cannot build persona.")
        return "\n\n".join(status_log), None, gr.update()

    # 4. Build persona (Qwen 32B)
    persona_id = str(uuid.uuid4())[:8]
    try:
        r = requests.post(BUILD_PERSONA_URL, json={
            "persona_id": persona_id, "name": name.strip(),
            "relationship": relationship.strip(),
            "texts": texts, "photo_captions": captions,
            "voice_transcripts": voice_transcripts,
        }, timeout=1200)
        
        if r.status_code == 200:
            result = r.json()
            if result.get("success"):
                persona = result["persona"]
                summary = f"""\n🎉 **{name}'s memory has been successfully preserved!**

**Persona ID:** `{persona_id}`
**Personality:** {', '.join(persona.get('personality_traits', [])[:3])}
**Language:** {persona.get('language', 'Auto')}
**Memories captured:** {len(persona.get('key_memories', []))}

Go to the **💬 Talk** tab and enter the Persona ID."""
                status_log.append(summary)
                return "\n".join(status_log), persona_id, gr.update(value=persona_id)
            else:
                status_log.append(f"\n❌ Persona builder failed: {result}")
        else:
            status_log.append(f"\n❌ Persona builder failed (HTTP {r.status_code}): {r.text}")
            
    except Exception as e:
        status_log.append(f"\n❌ Persona builder failed: {e}")

    # Fallback return if the final step failed
    return "\n\n".join(status_log), None, gr.update()


def chat_with_persona(persona_id, message, history, language, enable_voice):
    history = history or []

    if not persona_id.strip():
        history = history + [{"role": "assistant", "content": "⚠️ Please enter a Persona ID first."}]
        return "", history, None

    if not message.strip():
        return "", history, None

    try:
        r = requests.post(CHAT_URL, json={
            "persona_id": persona_id.strip(),
            "history": [{"role": m["role"], "content": m["content"]} for m in history],
            "message": message.strip(),
            "language": language,
        }, timeout=180)
        result = r.json()
        response_text = result.get("text", result.get("response", "..."))
        voice_desc = result.get("voice_description", "warm elderly voice")
    except Exception as e:
        response_text = f"⚠️ Error: {e}"
        voice_desc = "warm elderly voice"

    history = history + [
        {"role": "user", "content": message},
        {"role": "assistant", "content": response_text},
    ]

    # Generate voice response (VoxCPM2)
    audio_path = None
    if enable_voice:
        try:
            r = requests.post(TTS_URL, json={
                "text": response_text,
                "voice_description": voice_desc,
            }, timeout=180)
            if r.status_code == 200:
                import tempfile
                with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                    f.write(r.content)
                    audio_path = f.name
        except Exception:
            pass

    return "", history, audio_path


def load_personas():
    for attempt in range(2):
        try:
            r = requests.get(LIST_PERSONAS_URL, timeout=90)
            personas = r.json().get("personas", [])
            if not personas:
                return "No personas saved yet."
            lines = [f"**{p['name']}** ({p['relationship']}) — ID: `{p['id']}`" for p in personas]
            return "\n\n".join(lines)
        except Exception as e:
            if attempt == 0:
                continue
            return f"⚠️ Modal is waking up, please try again in 30 seconds."


# ── UI ────────────────────────────────────────────────────────────────────────

css = """
@import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400;0,600;1,400&family=Source+Sans+3:wght@300;400;600&display=swap');
* { box-sizing: border-box; }
body, .gradio-container { background: #0e0b08 !important; font-family: 'Source Sans 3', sans-serif !important; color: #e8dcc8 !important; }
.gradio-container { max-width: 900px !important; margin: 0 auto !important; }
h1, h2, h3 { font-family: 'Lora', serif !important; color: #d4a96a !important; }
.header-title { text-align: center; font-family: 'Lora', serif; font-size: 2.4em; color: #d4a96a; margin: 24px 0 4px 0; }
.header-sub { text-align: center; color: #8a7560; font-size: 1em; margin-bottom: 28px; font-style: italic; }
.divider { border: none; border-top: 1px solid #2a2015; margin: 20px 0; }
label { color: #8a7560 !important; font-size: 0.85em !important; letter-spacing: 0.08em !important; text-transform: uppercase !important; }
textarea, input[type="text"] { background: #1a1510 !important; border: 1px solid #3a2e1e !important; color: #e8dcc8 !important; border-radius: 6px !important; }
.model-badge { display: inline-block; background: #1f1710; border: 1px solid #3a2e1e; border-radius: 4px; padding: 2px 8px; font-size: 0.75em; color: #8a7560; margin: 2px; }
"""

with gr.Blocks(title="Memory Keeper") as demo:

    gr.HTML("""
        <div class="header-title">🕯️ Memory Keeper</div>
        <div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
        <hr class="divider">
        <div style="text-align:center; margin-bottom:16px;">
            <span class="model-badge">🧠 Qwen2.5-32B</span>
            <span class="model-badge">🎤 Cohere Transcribe</span>
            <span class="model-badge">👁️ MiniCPM-V 4.6</span>
            <span class="model-badge">📄 Nemotron Parse</span>
            <span class="model-badge">🔊 VoxCPM2</span>
            <span class="model-badge">🌍 Tiny Aya Fire</span>
        </div>
    """)

    with gr.Tabs():

        # ── TAB 1: PRESERVE ──
        with gr.Tab("📜 Preserve a Memory"):
            gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")

            with gr.Row():
                name_input = gr.Textbox(label="Their Name", placeholder="e.g. Dadu, Nana, Abba...")
                relationship_input = gr.Textbox(label="Your Relationship", placeholder="e.g. Grandfather, Mother...")

            text_input = gr.Textbox(
                label="📝 Letters / Diary Entries / Writings",
                placeholder="Paste their writings here. Separate multiple entries with ---",
                lines=6,
            )

            with gr.Row():
                photo_files = gr.File(
                    label="🖼️ Photos (MiniCPM-V 4.6 will describe them)",
                    file_count="multiple", file_types=["image"],
                )
                scanned_files = gr.File(
                    label="📄 Scanned Letters/Docs (Nemotron Parse OCR)",
                    file_count="multiple", file_types=["image"],
                )

            photo_captions = gr.Textbox(
                label="🖼️ Manual Photo Captions (optional, one per line)",
                placeholder="Or describe photos manually here...",
                lines=3,
            )

            voice_input = gr.Audio(
                label="🎤 Voice Note (Cohere Transcribe ASR)",
                type="filepath", sources=["upload", "microphone"],
            )

            build_btn = gr.Button("✨ Preserve Their Memory", variant="primary")
            build_output = gr.Markdown()
            persona_id_state = gr.State()
            persona_id_hidden = gr.Textbox(visible=False)

            build_btn.click(
                fn=build_persona,
                inputs=[name_input, relationship_input, text_input, photo_captions,
                        voice_input, photo_files, scanned_files],
                outputs=[build_output, persona_id_state, persona_id_hidden],
                show_progress="full",
            )

        # ── TAB 2: TALK ──
        with gr.Tab("💬 Talk to Them"):
            gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Enter the Persona ID and start a conversation. Enable voice to hear them speak.</p>")

            with gr.Row():
                persona_id_input = gr.Textbox(label="Persona ID", placeholder="e.g. a3f9c2b1")
                language_select = gr.Dropdown(
                    label="Language", choices=["auto", "English", "Bengali", "Hindi", "Chinese", "Japanese", "Korean", "Thai"], value="auto",
                )
                enable_voice = gr.Checkbox(label="🔊 Voice Response (VoxCPM2)", value=False)

            chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*")

            with gr.Row():
                msg_input = gr.Textbox(label="Your message", placeholder="What would you like to say?", lines=2, scale=4)
                send_btn = gr.Button("Send →", variant="primary", scale=1)

            voice_output = gr.Audio(label="🔊 Voice Response", visible=True, autoplay=True)
            clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm")
            chat_history = gr.State([])

            send_btn.click(
                fn=chat_with_persona,
                inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice],
                outputs=[msg_input, chatbot, voice_output],
            )
            msg_input.submit(
                fn=chat_with_persona,
                inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice],
                outputs=[msg_input, chatbot, voice_output],
            )
            clear_btn.click(lambda: ([], []), outputs=[chat_history, chatbot])

        # ── TAB 3: SAVED ──
        with gr.Tab("📁 Saved Memories"):
            refresh_btn = gr.Button("🔄 Load Saved Memories", variant="secondary")
            personas_output = gr.Markdown()
            refresh_btn.click(fn=load_personas, outputs=personas_output)

    gr.HTML("""
        <hr class="divider">
        <p style='text-align:center; color:#3a2e1e; font-size:0.8em; font-style:italic;'>
            Built for Build Small Hackathon · 6 AI Models · Hosted on Modal + Hugging Face
        </p>
    """)

if __name__ == "__main__":
    demo.launch(css=css, share=True)