Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import requests | |
| import uuid | |
| import base64 | |
| import json | |
| from pathlib import Path | |
| # ββ CONFIG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| BUILD_PERSONA_URL = "https://sheikhmdrakib-career--build-persona.modal.run" | |
| CHAT_URL = "https://sheikhmdrakib-career--chat.modal.run" | |
| TRANSCRIBE_URL = "https://sheikhmdrakib-career--transcribe.modal.run" | |
| VISION_URL = "https://sheikhmdrakib-career--describe-photo.modal.run" | |
| OCR_URL = "https://sheikhmdrakib-career--ocr-document.modal.run" | |
| TTS_URL = "https://sheikhmdrakib-career--text-to-speech.modal.run" | |
| LIST_PERSONAS_URL = "https://sheikhmdrakib-career--list-personas.modal.run" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def encode_file(path): | |
| with open(path, "rb") as f: | |
| return base64.b64encode(f.read()).decode() | |
| def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files): | |
| if not name.strip(): | |
| return "β Please enter the person's name.", None, gr.update() | |
| texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else [] | |
| captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else [] | |
| voice_transcripts = [] | |
| # We will build a step-by-step log to show the user exactly what succeeded/failed | |
| status_log = [] | |
| if not texts and not captions and voice_file is None and not photo_files and not scanned_files: | |
| return "β Please provide at least one input.", None, gr.update() | |
| # 1. Transcribe voice note (Cohere ASR) | |
| if voice_file is not None: | |
| try: | |
| r = requests.post(TRANSCRIBE_URL, json={ | |
| "audio_b64": encode_file(voice_file), | |
| "filename": Path(voice_file).name, | |
| }, timeout=180) | |
| if r.status_code == 200: | |
| transcript = r.json().get("transcript", "") | |
| if transcript: | |
| voice_transcripts.append(transcript) | |
| status_log.append("β Voice note transcribed successfully.") | |
| else: | |
| status_log.append("β οΈ Voice note processed, but no text was found.") | |
| else: | |
| status_log.append(f"β Voice transcription failed (HTTP {r.status_code}): {r.text}") | |
| except Exception as e: | |
| status_log.append(f"β Voice transcription failed: {e}") | |
| # 2. Describe uploaded photos (MiniCPM-V) | |
| if photo_files: | |
| success_count = 0 | |
| for i, photo in enumerate(photo_files): | |
| try: | |
| r = requests.post(VISION_URL, json={"image_b64": encode_file(photo)}, timeout=180) | |
| if r.status_code == 200: | |
| desc = r.json().get("description", "") | |
| if desc: | |
| captions.append(desc) | |
| success_count += 1 | |
| else: | |
| status_log.append(f"β Photo {i+1} description failed (HTTP {r.status_code}).") | |
| except Exception as e: | |
| status_log.append(f"β Photo {i+1} description failed: {e}") | |
| if success_count > 0: | |
| status_log.append(f"β {success_count}/{len(photo_files)} photos described successfully.") | |
| # 3. OCR scanned letters (Nemotron Parse) | |
| if scanned_files: | |
| success_count = 0 | |
| for i, scan in enumerate(scanned_files): | |
| try: | |
| r = requests.post(OCR_URL, json={"image_b64": encode_file(scan)}, timeout=180) | |
| if r.status_code == 200: | |
| ocr_text = r.json().get("text", "") | |
| if ocr_text: | |
| texts.append(ocr_text) | |
| success_count += 1 | |
| else: | |
| status_log.append(f"β Scan {i+1} OCR failed (HTTP {r.status_code}).") | |
| except Exception as e: | |
| status_log.append(f"β Scan {i+1} OCR failed: {e}") | |
| if success_count > 0: | |
| status_log.append(f"β {success_count}/{len(scanned_files)} scanned documents read successfully.") | |
| # Check if we have AT LEAST SOME data to build the persona | |
| if not texts and not captions and not voice_transcripts: | |
| status_log.append("\nβ **ABORTED:** All AI processing failed, and no manual text/captions were provided. Cannot build persona.") | |
| return "\n\n".join(status_log), None, gr.update() | |
| # 4. Build persona (Qwen 32B) | |
| persona_id = str(uuid.uuid4())[:8] | |
| try: | |
| r = requests.post(BUILD_PERSONA_URL, json={ | |
| "persona_id": persona_id, "name": name.strip(), | |
| "relationship": relationship.strip(), | |
| "texts": texts, "photo_captions": captions, | |
| "voice_transcripts": voice_transcripts, | |
| }, timeout=1200) | |
| if r.status_code == 200: | |
| result = r.json() | |
| if result.get("success"): | |
| persona = result["persona"] | |
| summary = f"""\nπ **{name}'s memory has been successfully preserved!** | |
| **Persona ID:** `{persona_id}` | |
| **Personality:** {', '.join(persona.get('personality_traits', [])[:3])} | |
| **Language:** {persona.get('language', 'Auto')} | |
| **Memories captured:** {len(persona.get('key_memories', []))} | |
| Go to the **π¬ Talk** tab and enter the Persona ID.""" | |
| status_log.append(summary) | |
| return "\n".join(status_log), persona_id, gr.update(value=persona_id) | |
| else: | |
| status_log.append(f"\nβ Persona builder failed: {result}") | |
| else: | |
| status_log.append(f"\nβ Persona builder failed (HTTP {r.status_code}): {r.text}") | |
| except Exception as e: | |
| status_log.append(f"\nβ Persona builder failed: {e}") | |
| # Fallback return if the final step failed | |
| return "\n\n".join(status_log), None, gr.update() | |
| def chat_with_persona(persona_id, message, history, language, enable_voice): | |
| history = history or [] | |
| if not persona_id.strip(): | |
| history = history + [{"role": "assistant", "content": "β οΈ Please enter a Persona ID first."}] | |
| return "", history, None | |
| if not message.strip(): | |
| return "", history, None | |
| try: | |
| r = requests.post(CHAT_URL, json={ | |
| "persona_id": persona_id.strip(), | |
| "history": [{"role": m["role"], "content": m["content"]} for m in history], | |
| "message": message.strip(), | |
| "language": language, | |
| }, timeout=180) | |
| result = r.json() | |
| response_text = result.get("text", result.get("response", "...")) | |
| voice_desc = result.get("voice_description", "warm elderly voice") | |
| except Exception as e: | |
| response_text = f"β οΈ Error: {e}" | |
| voice_desc = "warm elderly voice" | |
| history = history + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": response_text}, | |
| ] | |
| # Generate voice response (VoxCPM2) | |
| audio_path = None | |
| if enable_voice: | |
| try: | |
| r = requests.post(TTS_URL, json={ | |
| "text": response_text, | |
| "voice_description": voice_desc, | |
| }, timeout=180) | |
| if r.status_code == 200: | |
| import tempfile | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: | |
| f.write(r.content) | |
| audio_path = f.name | |
| except Exception: | |
| pass | |
| return "", history, audio_path | |
| def load_personas(): | |
| for attempt in range(2): | |
| try: | |
| r = requests.get(LIST_PERSONAS_URL, timeout=90) | |
| personas = r.json().get("personas", []) | |
| if not personas: | |
| return "No personas saved yet." | |
| lines = [f"**{p['name']}** ({p['relationship']}) β ID: `{p['id']}`" for p in personas] | |
| return "\n\n".join(lines) | |
| except Exception as e: | |
| if attempt == 0: | |
| continue | |
| return f"β οΈ Modal is waking up, please try again in 30 seconds." | |
| # ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400;0,600;1,400&family=Source+Sans+3:wght@300;400;600&display=swap'); | |
| * { box-sizing: border-box; } | |
| body, .gradio-container { background: #0e0b08 !important; font-family: 'Source Sans 3', sans-serif !important; color: #e8dcc8 !important; } | |
| .gradio-container { max-width: 900px !important; margin: 0 auto !important; } | |
| h1, h2, h3 { font-family: 'Lora', serif !important; color: #d4a96a !important; } | |
| .header-title { text-align: center; font-family: 'Lora', serif; font-size: 2.4em; color: #d4a96a; margin: 24px 0 4px 0; } | |
| .header-sub { text-align: center; color: #8a7560; font-size: 1em; margin-bottom: 28px; font-style: italic; } | |
| .divider { border: none; border-top: 1px solid #2a2015; margin: 20px 0; } | |
| label { color: #8a7560 !important; font-size: 0.85em !important; letter-spacing: 0.08em !important; text-transform: uppercase !important; } | |
| textarea, input[type="text"] { background: #1a1510 !important; border: 1px solid #3a2e1e !important; color: #e8dcc8 !important; border-radius: 6px !important; } | |
| .model-badge { display: inline-block; background: #1f1710; border: 1px solid #3a2e1e; border-radius: 4px; padding: 2px 8px; font-size: 0.75em; color: #8a7560; margin: 2px; } | |
| """ | |
| with gr.Blocks(title="Memory Keeper") as demo: | |
| gr.HTML(""" | |
| <div class="header-title">π―οΈ Memory Keeper</div> | |
| <div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div> | |
| <hr class="divider"> | |
| <div style="text-align:center; margin-bottom:16px;"> | |
| <span class="model-badge">π§ Qwen2.5-32B</span> | |
| <span class="model-badge">π€ Cohere Transcribe</span> | |
| <span class="model-badge">ποΈ MiniCPM-V 4.6</span> | |
| <span class="model-badge">π Nemotron Parse</span> | |
| <span class="model-badge">π VoxCPM2</span> | |
| <span class="model-badge">π Tiny Aya Fire</span> | |
| </div> | |
| """) | |
| with gr.Tabs(): | |
| # ββ TAB 1: PRESERVE ββ | |
| with gr.Tab("π Preserve a Memory"): | |
| gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>") | |
| with gr.Row(): | |
| name_input = gr.Textbox(label="Their Name", placeholder="e.g. Dadu, Nana, Abba...") | |
| relationship_input = gr.Textbox(label="Your Relationship", placeholder="e.g. Grandfather, Mother...") | |
| text_input = gr.Textbox( | |
| label="π Letters / Diary Entries / Writings", | |
| placeholder="Paste their writings here. Separate multiple entries with ---", | |
| lines=6, | |
| ) | |
| with gr.Row(): | |
| photo_files = gr.File( | |
| label="πΌοΈ Photos (MiniCPM-V 4.6 will describe them)", | |
| file_count="multiple", file_types=["image"], | |
| ) | |
| scanned_files = gr.File( | |
| label="π Scanned Letters/Docs (Nemotron Parse OCR)", | |
| file_count="multiple", file_types=["image"], | |
| ) | |
| photo_captions = gr.Textbox( | |
| label="πΌοΈ Manual Photo Captions (optional, one per line)", | |
| placeholder="Or describe photos manually here...", | |
| lines=3, | |
| ) | |
| voice_input = gr.Audio( | |
| label="π€ Voice Note (Cohere Transcribe ASR)", | |
| type="filepath", sources=["upload", "microphone"], | |
| ) | |
| build_btn = gr.Button("β¨ Preserve Their Memory", variant="primary") | |
| build_output = gr.Markdown() | |
| persona_id_state = gr.State() | |
| persona_id_hidden = gr.Textbox(visible=False) | |
| build_btn.click( | |
| fn=build_persona, | |
| inputs=[name_input, relationship_input, text_input, photo_captions, | |
| voice_input, photo_files, scanned_files], | |
| outputs=[build_output, persona_id_state, persona_id_hidden], | |
| show_progress="full", | |
| ) | |
| # ββ TAB 2: TALK ββ | |
| with gr.Tab("π¬ Talk to Them"): | |
| gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Enter the Persona ID and start a conversation. Enable voice to hear them speak.</p>") | |
| with gr.Row(): | |
| persona_id_input = gr.Textbox(label="Persona ID", placeholder="e.g. a3f9c2b1") | |
| language_select = gr.Dropdown( | |
| label="Language", choices=["auto", "English", "Bengali", "Hindi", "Chinese", "Japanese", "Korean", "Thai"], value="auto", | |
| ) | |
| enable_voice = gr.Checkbox(label="π Voice Response (VoxCPM2)", value=False) | |
| chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*") | |
| with gr.Row(): | |
| msg_input = gr.Textbox(label="Your message", placeholder="What would you like to say?", lines=2, scale=4) | |
| send_btn = gr.Button("Send β", variant="primary", scale=1) | |
| voice_output = gr.Audio(label="π Voice Response", visible=True, autoplay=True) | |
| clear_btn = gr.Button("Clear conversation", variant="secondary", size="sm") | |
| chat_history = gr.State([]) | |
| send_btn.click( | |
| fn=chat_with_persona, | |
| inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice], | |
| outputs=[msg_input, chatbot, voice_output], | |
| ) | |
| msg_input.submit( | |
| fn=chat_with_persona, | |
| inputs=[persona_id_input, msg_input, chat_history, language_select, enable_voice], | |
| outputs=[msg_input, chatbot, voice_output], | |
| ) | |
| clear_btn.click(lambda: ([], []), outputs=[chat_history, chatbot]) | |
| # ββ TAB 3: SAVED ββ | |
| with gr.Tab("π Saved Memories"): | |
| refresh_btn = gr.Button("π Load Saved Memories", variant="secondary") | |
| personas_output = gr.Markdown() | |
| refresh_btn.click(fn=load_personas, outputs=personas_output) | |
| gr.HTML(""" | |
| <hr class="divider"> | |
| <p style='text-align:center; color:#3a2e1e; font-size:0.8em; font-style:italic;'> | |
| Built for Build Small Hackathon Β· 6 AI Models Β· Hosted on Modal + Hugging Face | |
| </p> | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch(css=css, share=True) |