Spaces:

build-small-hackathon
/

memory-bridge

Runtime error

App Files Files Community

Sheikh Mohammad Rakib commited on 16 days ago

Commit

c19f82c

1 Parent(s): a9ffeba

refactor: remove custom retry logic and progress tracking in favor of direct requests with status logging

Browse files

Files changed (1) hide show

app.py +116 -215

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import gradio as gr
-import spaces
 import requests
 import uuid
 import base64
-import time
 from pathlib import Path
 # ── CONFIG ────────────────────────────────────────────────────────────────────
@@ -22,213 +21,117 @@ def encode_file(path):
         return base64.b64encode(f.read()).decode()
-def modal_post(url, payload, timeout=300, retries=3, label="endpoint"):
-    """
-    POST to a Modal endpoint with retry logic for cold-start empty responses.
-    Returns (dict_result, error_string). One of them will be None.
-    """
-    last_err = None
-    for attempt in range(1, retries + 1):
-        try:
-            r = requests.post(url, json=payload, timeout=timeout)
-            # Empty body = Modal cold-starting or gateway hiccup — retry
-            if not r.text or not r.text.strip():
-                wait = attempt * 10
-                last_err = f"{label} returned HTTP {r.status_code} with empty body (attempt {attempt}/{retries})"
-                if attempt < retries:
-                    time.sleep(wait)
-                    continue
-                return None, last_err
-            try:
-                return r.json(), None
-            except Exception:
-                import json as _json
-                raw = r.text.strip()
-                try:
-                    obj, _ = _json.JSONDecoder().raw_decode(raw)
-                    return obj, None
-                except Exception:
-                    body = raw[:500]
-                    return None, f"{label} HTTP {r.status_code}: {body}"
-        except requests.exceptions.Timeout:
-            last_err = f"{label} timed out after {timeout}s (attempt {attempt}/{retries})"
-            if attempt < retries:
-                time.sleep(5)
-                continue
-        except requests.exceptions.ConnectionError as e:
-            last_err = f"{label} connection error: {e}"
-            if attempt < retries:
-                time.sleep(10)
-                continue
-        except Exception as e:
-            return None, f"{label} unexpected error: {e}"
-    return None, last_err
-def modal_get(url, timeout=90, retries=2, label="endpoint"):
-    """GET from a Modal endpoint with retry."""
-    last_err = None
-    for attempt in range(1, retries + 1):
-        try:
-            r = requests.get(url, timeout=timeout)
-            if not r.text or not r.text.strip():
-                last_err = f"{label} empty response (attempt {attempt}/{retries})"
-                if attempt < retries:
-                    time.sleep(15)
-                    continue
-                return None, last_err
-            try:
-                return r.json(), None
-            except Exception:
-                import json as _json
-                raw = r.text.strip()
-                try:
-                    obj, _ = _json.JSONDecoder().raw_decode(raw)
-                    return obj, None
-                except Exception:
-                    return None, f"{label} HTTP {r.status_code}: {raw[:300]}"
-        except Exception as e:
-            last_err = f"{label} error: {e}"
-            if attempt < retries:
-                time.sleep(10)
-    return None, last_err
-def _safe_json(r, label):
-    """Parse JSON from a response, tolerating extra data after the first object."""
-    import json as _json
-    try:
-        return r.json(), None
-    except Exception:
-        raw = r.text.strip()
-        try:
-            obj, _ = _json.JSONDecoder().raw_decode(raw)
-            return obj, None
-        except Exception:
-            return None, f"{label} HTTP {r.status_code}: {raw[:500]}"
-@spaces.GPU
-def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files,
-                  progress=gr.Progress(track_tqdm=False)):
     if not name.strip():
         return "❌ Please enter the person's name.", None, gr.update()
     texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
     captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
     voice_transcripts = []
     if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
         return "❌ Please provide at least one input.", None, gr.update()
-    # Work out how many steps we actually have so the bar fills evenly
-    n_photos   = len(photo_files)   if photo_files   else 0
-    n_scans    = len(scanned_files) if scanned_files else 0
-    has_voice  = voice_file is not None
-    total_steps = 1 + int(has_voice) + n_photos + n_scans  # 1 = build-persona
-    done = 0
-    def advance(msg):
-        nonlocal done
-        done += 1
-        progress(done / total_steps, desc=msg)
-    progress(0, desc="🕯️ Starting…")
     # 1. Transcribe voice note (Cohere ASR)
-    if has_voice:
-        progress(done / total_steps, desc="🎤 Transcribing voice note…")
-        result, err = modal_post(
-            TRANSCRIBE_URL,
-            {"audio_b64": encode_file(voice_file), "filename": Path(voice_file).name},
-            timeout=180, label="transcribe"
-        )
-        if err:
-            return f"❌ Voice transcription failed: {err}", None, gr.update()
-        transcript = result.get("transcript", "")
-        if transcript:
-            voice_transcripts.append(transcript)
-        advance("🎤 Voice transcribed")
     # 2. Describe uploaded photos (MiniCPM-V)
-    for i, photo in enumerate(photo_files or []):
-        progress(done / total_steps, desc=f"👁️ Analysing photo {i+1}/{n_photos}…")
-        result, err = modal_post(
-            VISION_URL, {"image_b64": encode_file(photo)},
-            timeout=180, label="vision"
-        )
-        if err:
-            captions.append(f"[Photo description failed: {err}]")
-        else:
-            desc = result.get("description", "")
-            if desc:
-                captions.append(desc)
-        advance(f"👁️ Photo {i+1} analysed")
     # 3. OCR scanned letters (Nemotron Parse)
-    for i, scan in enumerate(scanned_files or []):
-        progress(done / total_steps, desc=f"📄 OCR scan {i+1}/{n_scans}…")
-        result, err = modal_post(
-            OCR_URL, {"image_b64": encode_file(scan)},
-            timeout=180, label="ocr"
-        )
-        if err:
-            texts.append(f"[OCR failed: {err}]")
-        else:
-            ocr_text = result.get("text", "")
-            if ocr_text:
-                texts.append(ocr_text)
-        advance(f"📄 Scan {i+1} done")
-    # 4. Build persona (Qwen 32B) — long timeout, more retries for cold start
-    progress(done / total_steps, desc="🧠 Building persona — this may take 1–3 min…")
     persona_id = str(uuid.uuid4())[:8]
-    result, err = modal_post(
-        BUILD_PERSONA_URL,
-        {
             "persona_id": persona_id, "name": name.strip(),
             "relationship": relationship.strip(),
             "texts": texts, "photo_captions": captions,
             "voice_transcripts": voice_transcripts,
-        },
-        timeout=1200, retries=3, label="build-persona"
-    )
-    if err:
-        return f"❌ {err}", None, gr.update()
-    # Tolerate extra data in the JSON response
-    if isinstance(result, str):
-        import json as _json
-        try:
-            result, _ = _json.JSONDecoder().raw_decode(result.strip())
-        except Exception:
-            return f"❌ Backend error: could not parse response", None, gr.update()
-    progress(1.0, desc="✅ Done!")
-    if result.get("success"):
-        persona = result["persona"]
-        summary = f"""✅ **{name}'s memory has been preserved.**
 **Persona ID:** `{persona_id}`
 **Personality:** {', '.join(persona.get('personality_traits', [])[:3])}
 **Language:** {persona.get('language', 'Auto')}
 **Memories captured:** {len(persona.get('key_memories', []))}
-**Voice style:** {persona.get('voice_description', 'N/A')}
 Go to the **💬 Talk** tab and enter the Persona ID."""
-        return summary, persona_id, gr.update(value=persona_id)
-    else:
-        error_detail = result.get("error", str(result))
-        return f"❌ Backend error: {error_detail}", None, gr.update()
-@spaces.GPU
 def chat_with_persona(persona_id, message, history, language, enable_voice):
     history = history or []
@@ -239,38 +142,34 @@ def chat_with_persona(persona_id, message, history, language, enable_voice):
     if not message.strip():
         return "", history, None
-    result, err = modal_post(
-        CHAT_URL,
-        {
             "persona_id": persona_id.strip(),
             "history": [{"role": m["role"], "content": m["content"]} for m in history],
             "message": message.strip(),
             "language": language,
-        },
-        timeout=180, retries=2, label="chat"
-    )
-    if err:
-        response_text = f"⚠️ {err}"
-        voice_desc = "warm elderly voice"
-    else:
         response_text = result.get("text", result.get("response", "..."))
         voice_desc = result.get("voice_description", "warm elderly voice")
     history = history + [
         {"role": "user", "content": message},
         {"role": "assistant", "content": response_text},
     ]
-    # Generate voice response (VoxCPM2) — skip if response is an error
     audio_path = None
-    if enable_voice and not response_text.startswith("⚠️"):
         try:
             r = requests.post(TTS_URL, json={
                 "text": response_text,
                 "voice_description": voice_desc,
             }, timeout=180)
-            if r.status_code == 200 and r.content:
                 import tempfile
                 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                     f.write(r.content)
@@ -282,14 +181,18 @@ def chat_with_persona(persona_id, message, history, language, enable_voice):
 def load_personas():
-    result, err = modal_get(LIST_PERSONAS_URL, timeout=90, retries=2, label="list-personas")
-    if err:
-        return f"⚠️ {err}\n\nModal may be waking up — try again in 30 seconds."
-    personas = result.get("personas", [])
-    if not personas:
-        return "No personas saved yet."
-    lines = [f"**{p['name']}** ({p['relationship']}) — ID: `{p['id']}`" for p in personas]
-    return "\n\n".join(lines)
 # ── UI ────────────────────────────────────────────────────────────────────────
@@ -315,14 +218,12 @@ with gr.Blocks(title="Memory Keeper") as demo:
         <div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
         <hr class="divider">
         <div style="text-align:center; margin-bottom:16px;">
-            <span class="model-badge">🧠 MiniCPM4.1-8B (8B)</span>
-            <span class="model-badge">👁️ MiniCPM-V 4.6 (8B)</span>
-            <span class="model-badge">🎤 Cohere Transcribe (2B)</span>
-            <span class="model-badge">📄 Nemotron Parse v1.2 (&lt;1B)</span>
-            <span class="model-badge">🌍 Tiny Aya Fire (3.35B)</span>
-            <span class="model-badge">🌊 Tiny Aya Water (3.35B)</span>
-            <span class="model-badge">🔊 VoxCPM2 (~1B)</span>
-            <span class="model-badge">Total: ~26.7B params</span>
         </div>
     """)
@@ -333,12 +234,12 @@ with gr.Blocks(title="Memory Keeper") as demo:
             gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")
             with gr.Row():
-                name_input = gr.Textbox(label="Their Name", value="Dadu")
-                relationship_input = gr.Textbox(label="Your Relationship", value="Grandfather")
             text_input = gr.Textbox(
                 label="📝 Letters / Diary Entries / Writings",
-                value="Dear family,\n\nWork hard in your studies. Knowledge is the one thing no one can take from you. When I was young, I walked four miles to school every day — no shoes, no complaints. We were poor but we were honest.\n\nAlways be kind to your mother. She sacrifices more than you will ever know.\n\nSave your money. A good name lasts longer than any wealth.\n\nI am proud of all of you. Your Dadu misses your faces.\n---\nBeta, I heard you got good marks. This makes my heart so happy. Keep going.",
                 lines=6,
             )
@@ -354,7 +255,7 @@ with gr.Blocks(title="Memory Keeper") as demo:
             photo_captions = gr.Textbox(
                 label="🖼️ Manual Photo Captions (optional, one per line)",
-                value="Old black and white photo, Dadu sitting on a wooden chair, wearing white panjabi, smiling warmly\nEid gathering, Dadu surrounded by grandchildren, laughing out loud\nDadu in the garden early morning, watering plants carefully",
                 lines=3,
             )
@@ -390,7 +291,7 @@ with gr.Blocks(title="Memory Keeper") as demo:
             chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*")
             with gr.Row():
-                msg_input = gr.Textbox(label="Your message", value="What is the most important lesson in life?", lines=2, scale=4)
                 send_btn = gr.Button("Send →", variant="primary", scale=1)
             voice_output = gr.Audio(label="🔊 Voice Response", visible=True, autoplay=True)

 import gradio as gr
 import requests
 import uuid
 import base64
+import json
 from pathlib import Path
 # ── CONFIG ────────────────────────────────────────────────────────────────────
         return base64.b64encode(f.read()).decode()
+def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files):
     if not name.strip():
         return "❌ Please enter the person's name.", None, gr.update()
     texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
     captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
     voice_transcripts = []
+    # We will build a step-by-step log to show the user exactly what succeeded/failed
+    status_log = []
     if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
         return "❌ Please provide at least one input.", None, gr.update()
     # 1. Transcribe voice note (Cohere ASR)
+    if voice_file is not None:
+        try:
+            r = requests.post(TRANSCRIBE_URL, json={
+                "audio_b64": encode_file(voice_file),
+                "filename": Path(voice_file).name,
+            }, timeout=180)
+            if r.status_code == 200:
+                transcript = r.json().get("transcript", "")
+                if transcript:
+                    voice_transcripts.append(transcript)
+                    status_log.append("✅ Voice note transcribed successfully.")
+                else:
+                    status_log.append("⚠️ Voice note processed, but no text was found.")
+            else:
+                status_log.append(f"❌ Voice transcription failed (HTTP {r.status_code}): {r.text}")
+        except Exception as e:
+            status_log.append(f"❌ Voice transcription failed: {e}")
     # 2. Describe uploaded photos (MiniCPM-V)
+    if photo_files:
+        success_count = 0
+        for i, photo in enumerate(photo_files):
+            try:
+                r = requests.post(VISION_URL, json={"image_b64": encode_file(photo)}, timeout=180)
+                if r.status_code == 200:
+                    desc = r.json().get("description", "")
+                    if desc:
+                        captions.append(desc)
+                        success_count += 1
+                else:
+                    status_log.append(f"❌ Photo {i+1} description failed (HTTP {r.status_code}).")
+            except Exception as e:
+                status_log.append(f"❌ Photo {i+1} description failed: {e}")
+        if success_count > 0:
+            status_log.append(f"✅ {success_count}/{len(photo_files)} photos described successfully.")
     # 3. OCR scanned letters (Nemotron Parse)
+    if scanned_files:
+        success_count = 0
+        for i, scan in enumerate(scanned_files):
+            try:
+                r = requests.post(OCR_URL, json={"image_b64": encode_file(scan)}, timeout=180)
+                if r.status_code == 200:
+                    ocr_text = r.json().get("text", "")
+                    if ocr_text:
+                        texts.append(ocr_text)
+                        success_count += 1
+                else:
+                    status_log.append(f"❌ Scan {i+1} OCR failed (HTTP {r.status_code}).")
+            except Exception as e:
+                status_log.append(f"❌ Scan {i+1} OCR failed: {e}")
+        if success_count > 0:
+            status_log.append(f"✅ {success_count}/{len(scanned_files)} scanned documents read successfully.")
+    # Check if we have AT LEAST SOME data to build the persona
+    if not texts and not captions and not voice_transcripts:
+        status_log.append("\n❌ **ABORTED:** All AI processing failed, and no manual text/captions were provided. Cannot build persona.")
+        return "\n\n".join(status_log), None, gr.update()
+    # 4. Build persona (Qwen 32B)
     persona_id = str(uuid.uuid4())[:8]
+    try:
+        r = requests.post(BUILD_PERSONA_URL, json={
             "persona_id": persona_id, "name": name.strip(),
             "relationship": relationship.strip(),
             "texts": texts, "photo_captions": captions,
             "voice_transcripts": voice_transcripts,
+        }, timeout=1200)
+        if r.status_code == 200:
+            result = r.json()
+            if result.get("success"):
+                persona = result["persona"]
+                summary = f"""\n🎉 **{name}'s memory has been successfully preserved!**
 **Persona ID:** `{persona_id}`
 **Personality:** {', '.join(persona.get('personality_traits', [])[:3])}
 **Language:** {persona.get('language', 'Auto')}
 **Memories captured:** {len(persona.get('key_memories', []))}
 Go to the **💬 Talk** tab and enter the Persona ID."""
+                status_log.append(summary)
+                return "\n".join(status_log), persona_id, gr.update(value=persona_id)
+            else:
+                status_log.append(f"\n❌ Persona builder failed: {result}")
+        else:
+            status_log.append(f"\n❌ Persona builder failed (HTTP {r.status_code}): {r.text}")
+    except Exception as e:
+        status_log.append(f"\n❌ Persona builder failed: {e}")
+    # Fallback return if the final step failed
+    return "\n\n".join(status_log), None, gr.update()
 def chat_with_persona(persona_id, message, history, language, enable_voice):
     history = history or []
     if not message.strip():
         return "", history, None
+    try:
+        r = requests.post(CHAT_URL, json={
             "persona_id": persona_id.strip(),
             "history": [{"role": m["role"], "content": m["content"]} for m in history],
             "message": message.strip(),
             "language": language,
+        }, timeout=180)
+        result = r.json()
         response_text = result.get("text", result.get("response", "..."))
         voice_desc = result.get("voice_description", "warm elderly voice")
+    except Exception as e:
+        response_text = f"⚠️ Error: {e}"
+        voice_desc = "warm elderly voice"
     history = history + [
         {"role": "user", "content": message},
         {"role": "assistant", "content": response_text},
     ]
+    # Generate voice response (VoxCPM2)
     audio_path = None
+    if enable_voice:
         try:
             r = requests.post(TTS_URL, json={
                 "text": response_text,
                 "voice_description": voice_desc,
             }, timeout=180)
+            if r.status_code == 200:
                 import tempfile
                 with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
                     f.write(r.content)
 def load_personas():
+    for attempt in range(2):
+        try:
+            r = requests.get(LIST_PERSONAS_URL, timeout=90)
+            personas = r.json().get("personas", [])
+            if not personas:
+                return "No personas saved yet."
+            lines = [f"**{p['name']}** ({p['relationship']}) — ID: `{p['id']}`" for p in personas]
+            return "\n\n".join(lines)
+        except Exception as e:
+            if attempt == 0:
+                continue
+            return f"⚠️ Modal is waking up, please try again in 30 seconds."
 # ── UI ────────────────────────────────────────────────────────────────────────
         <div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
         <hr class="divider">
         <div style="text-align:center; margin-bottom:16px;">
+            <span class="model-badge">🧠 Qwen2.5-32B</span>
+            <span class="model-badge">🎤 Cohere Transcribe</span>
+            <span class="model-badge">👁️ MiniCPM-V 4.6</span>
+            <span class="model-badge">📄 Nemotron Parse</span>
+            <span class="model-badge">🔊 VoxCPM2</span>
+            <span class="model-badge">🌍 Tiny Aya Fire</span>
         </div>
     """)
             gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")
             with gr.Row():
+                name_input = gr.Textbox(label="Their Name", placeholder="e.g. Dadu, Nana, Abba...")
+                relationship_input = gr.Textbox(label="Your Relationship", placeholder="e.g. Grandfather, Mother...")
             text_input = gr.Textbox(
                 label="📝 Letters / Diary Entries / Writings",
+                placeholder="Paste their writings here. Separate multiple entries with ---",
                 lines=6,
             )
             photo_captions = gr.Textbox(
                 label="🖼️ Manual Photo Captions (optional, one per line)",
+                placeholder="Or describe photos manually here...",
                 lines=3,
             )
             chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*")
             with gr.Row():
+                msg_input = gr.Textbox(label="Your message", placeholder="What would you like to say?", lines=2, scale=4)
                 send_btn = gr.Button("Send →", variant="primary", scale=1)
             voice_output = gr.Audio(label="🔊 Voice Response", visible=True, autoplay=True)