Spaces:
Runtime error
Runtime error
Sheikh Mohammad Rakib commited on
Commit Β·
c19f82c
1
Parent(s): a9ffeba
refactor: remove custom retry logic and progress tracking in favor of direct requests with status logging
Browse files
app.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
import spaces
|
| 3 |
import requests
|
| 4 |
import uuid
|
| 5 |
import base64
|
| 6 |
-
import
|
| 7 |
from pathlib import Path
|
| 8 |
|
| 9 |
# ββ CONFIG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -22,213 +21,117 @@ def encode_file(path):
|
|
| 22 |
return base64.b64encode(f.read()).decode()
|
| 23 |
|
| 24 |
|
| 25 |
-
def
|
| 26 |
-
"""
|
| 27 |
-
POST to a Modal endpoint with retry logic for cold-start empty responses.
|
| 28 |
-
Returns (dict_result, error_string). One of them will be None.
|
| 29 |
-
"""
|
| 30 |
-
last_err = None
|
| 31 |
-
for attempt in range(1, retries + 1):
|
| 32 |
-
try:
|
| 33 |
-
r = requests.post(url, json=payload, timeout=timeout)
|
| 34 |
-
|
| 35 |
-
# Empty body = Modal cold-starting or gateway hiccup β retry
|
| 36 |
-
if not r.text or not r.text.strip():
|
| 37 |
-
wait = attempt * 10
|
| 38 |
-
last_err = f"{label} returned HTTP {r.status_code} with empty body (attempt {attempt}/{retries})"
|
| 39 |
-
if attempt < retries:
|
| 40 |
-
time.sleep(wait)
|
| 41 |
-
continue
|
| 42 |
-
return None, last_err
|
| 43 |
-
|
| 44 |
-
try:
|
| 45 |
-
return r.json(), None
|
| 46 |
-
except Exception:
|
| 47 |
-
import json as _json
|
| 48 |
-
raw = r.text.strip()
|
| 49 |
-
try:
|
| 50 |
-
obj, _ = _json.JSONDecoder().raw_decode(raw)
|
| 51 |
-
return obj, None
|
| 52 |
-
except Exception:
|
| 53 |
-
body = raw[:500]
|
| 54 |
-
return None, f"{label} HTTP {r.status_code}: {body}"
|
| 55 |
-
|
| 56 |
-
except requests.exceptions.Timeout:
|
| 57 |
-
last_err = f"{label} timed out after {timeout}s (attempt {attempt}/{retries})"
|
| 58 |
-
if attempt < retries:
|
| 59 |
-
time.sleep(5)
|
| 60 |
-
continue
|
| 61 |
-
except requests.exceptions.ConnectionError as e:
|
| 62 |
-
last_err = f"{label} connection error: {e}"
|
| 63 |
-
if attempt < retries:
|
| 64 |
-
time.sleep(10)
|
| 65 |
-
continue
|
| 66 |
-
except Exception as e:
|
| 67 |
-
return None, f"{label} unexpected error: {e}"
|
| 68 |
-
|
| 69 |
-
return None, last_err
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
def modal_get(url, timeout=90, retries=2, label="endpoint"):
|
| 73 |
-
"""GET from a Modal endpoint with retry."""
|
| 74 |
-
last_err = None
|
| 75 |
-
for attempt in range(1, retries + 1):
|
| 76 |
-
try:
|
| 77 |
-
r = requests.get(url, timeout=timeout)
|
| 78 |
-
if not r.text or not r.text.strip():
|
| 79 |
-
last_err = f"{label} empty response (attempt {attempt}/{retries})"
|
| 80 |
-
if attempt < retries:
|
| 81 |
-
time.sleep(15)
|
| 82 |
-
continue
|
| 83 |
-
return None, last_err
|
| 84 |
-
try:
|
| 85 |
-
return r.json(), None
|
| 86 |
-
except Exception:
|
| 87 |
-
import json as _json
|
| 88 |
-
raw = r.text.strip()
|
| 89 |
-
try:
|
| 90 |
-
obj, _ = _json.JSONDecoder().raw_decode(raw)
|
| 91 |
-
return obj, None
|
| 92 |
-
except Exception:
|
| 93 |
-
return None, f"{label} HTTP {r.status_code}: {raw[:300]}"
|
| 94 |
-
except Exception as e:
|
| 95 |
-
last_err = f"{label} error: {e}"
|
| 96 |
-
if attempt < retries:
|
| 97 |
-
time.sleep(10)
|
| 98 |
-
return None, last_err
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def _safe_json(r, label):
|
| 102 |
-
"""Parse JSON from a response, tolerating extra data after the first object."""
|
| 103 |
-
import json as _json
|
| 104 |
-
try:
|
| 105 |
-
return r.json(), None
|
| 106 |
-
except Exception:
|
| 107 |
-
raw = r.text.strip()
|
| 108 |
-
try:
|
| 109 |
-
obj, _ = _json.JSONDecoder().raw_decode(raw)
|
| 110 |
-
return obj, None
|
| 111 |
-
except Exception:
|
| 112 |
-
return None, f"{label} HTTP {r.status_code}: {raw[:500]}"
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
@spaces.GPU
|
| 116 |
-
def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files,
|
| 117 |
-
progress=gr.Progress(track_tqdm=False)):
|
| 118 |
if not name.strip():
|
| 119 |
return "β Please enter the person's name.", None, gr.update()
|
| 120 |
|
| 121 |
texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
|
| 122 |
captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
|
| 123 |
voice_transcripts = []
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
|
| 126 |
return "β Please provide at least one input.", None, gr.update()
|
| 127 |
|
| 128 |
-
# Work out how many steps we actually have so the bar fills evenly
|
| 129 |
-
n_photos = len(photo_files) if photo_files else 0
|
| 130 |
-
n_scans = len(scanned_files) if scanned_files else 0
|
| 131 |
-
has_voice = voice_file is not None
|
| 132 |
-
total_steps = 1 + int(has_voice) + n_photos + n_scans # 1 = build-persona
|
| 133 |
-
done = 0
|
| 134 |
-
|
| 135 |
-
def advance(msg):
|
| 136 |
-
nonlocal done
|
| 137 |
-
done += 1
|
| 138 |
-
progress(done / total_steps, desc=msg)
|
| 139 |
-
|
| 140 |
-
progress(0, desc="π―οΈ Startingβ¦")
|
| 141 |
-
|
| 142 |
# 1. Transcribe voice note (Cohere ASR)
|
| 143 |
-
if
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
# 2. Describe uploaded photos (MiniCPM-V)
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
# 3. OCR scanned letters (Nemotron Parse)
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
persona_id = str(uuid.uuid4())[:8]
|
| 190 |
-
|
| 191 |
-
BUILD_PERSONA_URL,
|
| 192 |
-
{
|
| 193 |
"persona_id": persona_id, "name": name.strip(),
|
| 194 |
"relationship": relationship.strip(),
|
| 195 |
"texts": texts, "photo_captions": captions,
|
| 196 |
"voice_transcripts": voice_transcripts,
|
| 197 |
-
},
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
# Tolerate extra data in the JSON response
|
| 205 |
-
if isinstance(result, str):
|
| 206 |
-
import json as _json
|
| 207 |
-
try:
|
| 208 |
-
result, _ = _json.JSONDecoder().raw_decode(result.strip())
|
| 209 |
-
except Exception:
|
| 210 |
-
return f"β Backend error: could not parse response", None, gr.update()
|
| 211 |
-
|
| 212 |
-
progress(1.0, desc="β
Done!")
|
| 213 |
-
|
| 214 |
-
if result.get("success"):
|
| 215 |
-
persona = result["persona"]
|
| 216 |
-
summary = f"""β
**{name}'s memory has been preserved.**
|
| 217 |
|
| 218 |
**Persona ID:** `{persona_id}`
|
| 219 |
**Personality:** {', '.join(persona.get('personality_traits', [])[:3])}
|
| 220 |
**Language:** {persona.get('language', 'Auto')}
|
| 221 |
**Memories captured:** {len(persona.get('key_memories', []))}
|
| 222 |
-
**Voice style:** {persona.get('voice_description', 'N/A')}
|
| 223 |
|
| 224 |
Go to the **π¬ Talk** tab and enter the Persona ID."""
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
|
| 231 |
-
@spaces.GPU
|
| 232 |
def chat_with_persona(persona_id, message, history, language, enable_voice):
|
| 233 |
history = history or []
|
| 234 |
|
|
@@ -239,38 +142,34 @@ def chat_with_persona(persona_id, message, history, language, enable_voice):
|
|
| 239 |
if not message.strip():
|
| 240 |
return "", history, None
|
| 241 |
|
| 242 |
-
|
| 243 |
-
CHAT_URL,
|
| 244 |
-
{
|
| 245 |
"persona_id": persona_id.strip(),
|
| 246 |
"history": [{"role": m["role"], "content": m["content"]} for m in history],
|
| 247 |
"message": message.strip(),
|
| 248 |
"language": language,
|
| 249 |
-
},
|
| 250 |
-
|
| 251 |
-
)
|
| 252 |
-
|
| 253 |
-
if err:
|
| 254 |
-
response_text = f"β οΈ {err}"
|
| 255 |
-
voice_desc = "warm elderly voice"
|
| 256 |
-
else:
|
| 257 |
response_text = result.get("text", result.get("response", "..."))
|
| 258 |
voice_desc = result.get("voice_description", "warm elderly voice")
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
history = history + [
|
| 261 |
{"role": "user", "content": message},
|
| 262 |
{"role": "assistant", "content": response_text},
|
| 263 |
]
|
| 264 |
|
| 265 |
-
# Generate voice response (VoxCPM2)
|
| 266 |
audio_path = None
|
| 267 |
-
if enable_voice
|
| 268 |
try:
|
| 269 |
r = requests.post(TTS_URL, json={
|
| 270 |
"text": response_text,
|
| 271 |
"voice_description": voice_desc,
|
| 272 |
}, timeout=180)
|
| 273 |
-
if r.status_code == 200
|
| 274 |
import tempfile
|
| 275 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 276 |
f.write(r.content)
|
|
@@ -282,14 +181,18 @@ def chat_with_persona(persona_id, message, history, language, enable_voice):
|
|
| 282 |
|
| 283 |
|
| 284 |
def load_personas():
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
|
| 294 |
|
| 295 |
# ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -315,14 +218,12 @@ with gr.Blocks(title="Memory Keeper") as demo:
|
|
| 315 |
<div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
|
| 316 |
<hr class="divider">
|
| 317 |
<div style="text-align:center; margin-bottom:16px;">
|
| 318 |
-
<span class="model-badge">π§
|
| 319 |
-
<span class="model-badge">
|
| 320 |
-
<span class="model-badge">
|
| 321 |
-
<span class="model-badge">π Nemotron Parse
|
| 322 |
-
<span class="model-badge">
|
| 323 |
-
<span class="model-badge">
|
| 324 |
-
<span class="model-badge">π VoxCPM2 (~1B)</span>
|
| 325 |
-
<span class="model-badge">Total: ~26.7B params</span>
|
| 326 |
</div>
|
| 327 |
""")
|
| 328 |
|
|
@@ -333,12 +234,12 @@ with gr.Blocks(title="Memory Keeper") as demo:
|
|
| 333 |
gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")
|
| 334 |
|
| 335 |
with gr.Row():
|
| 336 |
-
name_input = gr.Textbox(label="Their Name",
|
| 337 |
-
relationship_input = gr.Textbox(label="Your Relationship",
|
| 338 |
|
| 339 |
text_input = gr.Textbox(
|
| 340 |
label="π Letters / Diary Entries / Writings",
|
| 341 |
-
|
| 342 |
lines=6,
|
| 343 |
)
|
| 344 |
|
|
@@ -354,7 +255,7 @@ with gr.Blocks(title="Memory Keeper") as demo:
|
|
| 354 |
|
| 355 |
photo_captions = gr.Textbox(
|
| 356 |
label="πΌοΈ Manual Photo Captions (optional, one per line)",
|
| 357 |
-
|
| 358 |
lines=3,
|
| 359 |
)
|
| 360 |
|
|
@@ -390,7 +291,7 @@ with gr.Blocks(title="Memory Keeper") as demo:
|
|
| 390 |
chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*")
|
| 391 |
|
| 392 |
with gr.Row():
|
| 393 |
-
msg_input = gr.Textbox(label="Your message",
|
| 394 |
send_btn = gr.Button("Send β", variant="primary", scale=1)
|
| 395 |
|
| 396 |
voice_output = gr.Audio(label="π Voice Response", visible=True, autoplay=True)
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
import requests
|
| 3 |
import uuid
|
| 4 |
import base64
|
| 5 |
+
import json
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
# ββ CONFIG ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 21 |
return base64.b64encode(f.read()).decode()
|
| 22 |
|
| 23 |
|
| 24 |
+
def build_persona(name, relationship, text_input, photo_captions, voice_file, photo_files, scanned_files):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
if not name.strip():
|
| 26 |
return "β Please enter the person's name.", None, gr.update()
|
| 27 |
|
| 28 |
texts = [t.strip() for t in text_input.strip().split("---") if t.strip()] if text_input.strip() else []
|
| 29 |
captions = [c.strip() for c in photo_captions.strip().split("\n") if c.strip()] if photo_captions.strip() else []
|
| 30 |
voice_transcripts = []
|
| 31 |
+
|
| 32 |
+
# We will build a step-by-step log to show the user exactly what succeeded/failed
|
| 33 |
+
status_log = []
|
| 34 |
|
| 35 |
if not texts and not captions and voice_file is None and not photo_files and not scanned_files:
|
| 36 |
return "β Please provide at least one input.", None, gr.update()
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# 1. Transcribe voice note (Cohere ASR)
|
| 39 |
+
if voice_file is not None:
|
| 40 |
+
try:
|
| 41 |
+
r = requests.post(TRANSCRIBE_URL, json={
|
| 42 |
+
"audio_b64": encode_file(voice_file),
|
| 43 |
+
"filename": Path(voice_file).name,
|
| 44 |
+
}, timeout=180)
|
| 45 |
+
|
| 46 |
+
if r.status_code == 200:
|
| 47 |
+
transcript = r.json().get("transcript", "")
|
| 48 |
+
if transcript:
|
| 49 |
+
voice_transcripts.append(transcript)
|
| 50 |
+
status_log.append("β
Voice note transcribed successfully.")
|
| 51 |
+
else:
|
| 52 |
+
status_log.append("β οΈ Voice note processed, but no text was found.")
|
| 53 |
+
else:
|
| 54 |
+
status_log.append(f"β Voice transcription failed (HTTP {r.status_code}): {r.text}")
|
| 55 |
+
except Exception as e:
|
| 56 |
+
status_log.append(f"β Voice transcription failed: {e}")
|
| 57 |
|
| 58 |
# 2. Describe uploaded photos (MiniCPM-V)
|
| 59 |
+
if photo_files:
|
| 60 |
+
success_count = 0
|
| 61 |
+
for i, photo in enumerate(photo_files):
|
| 62 |
+
try:
|
| 63 |
+
r = requests.post(VISION_URL, json={"image_b64": encode_file(photo)}, timeout=180)
|
| 64 |
+
if r.status_code == 200:
|
| 65 |
+
desc = r.json().get("description", "")
|
| 66 |
+
if desc:
|
| 67 |
+
captions.append(desc)
|
| 68 |
+
success_count += 1
|
| 69 |
+
else:
|
| 70 |
+
status_log.append(f"β Photo {i+1} description failed (HTTP {r.status_code}).")
|
| 71 |
+
except Exception as e:
|
| 72 |
+
status_log.append(f"β Photo {i+1} description failed: {e}")
|
| 73 |
+
if success_count > 0:
|
| 74 |
+
status_log.append(f"β
{success_count}/{len(photo_files)} photos described successfully.")
|
| 75 |
|
| 76 |
# 3. OCR scanned letters (Nemotron Parse)
|
| 77 |
+
if scanned_files:
|
| 78 |
+
success_count = 0
|
| 79 |
+
for i, scan in enumerate(scanned_files):
|
| 80 |
+
try:
|
| 81 |
+
r = requests.post(OCR_URL, json={"image_b64": encode_file(scan)}, timeout=180)
|
| 82 |
+
if r.status_code == 200:
|
| 83 |
+
ocr_text = r.json().get("text", "")
|
| 84 |
+
if ocr_text:
|
| 85 |
+
texts.append(ocr_text)
|
| 86 |
+
success_count += 1
|
| 87 |
+
else:
|
| 88 |
+
status_log.append(f"β Scan {i+1} OCR failed (HTTP {r.status_code}).")
|
| 89 |
+
except Exception as e:
|
| 90 |
+
status_log.append(f"β Scan {i+1} OCR failed: {e}")
|
| 91 |
+
if success_count > 0:
|
| 92 |
+
status_log.append(f"β
{success_count}/{len(scanned_files)} scanned documents read successfully.")
|
| 93 |
+
|
| 94 |
+
# Check if we have AT LEAST SOME data to build the persona
|
| 95 |
+
if not texts and not captions and not voice_transcripts:
|
| 96 |
+
status_log.append("\nβ **ABORTED:** All AI processing failed, and no manual text/captions were provided. Cannot build persona.")
|
| 97 |
+
return "\n\n".join(status_log), None, gr.update()
|
| 98 |
+
|
| 99 |
+
# 4. Build persona (Qwen 32B)
|
| 100 |
persona_id = str(uuid.uuid4())[:8]
|
| 101 |
+
try:
|
| 102 |
+
r = requests.post(BUILD_PERSONA_URL, json={
|
|
|
|
| 103 |
"persona_id": persona_id, "name": name.strip(),
|
| 104 |
"relationship": relationship.strip(),
|
| 105 |
"texts": texts, "photo_captions": captions,
|
| 106 |
"voice_transcripts": voice_transcripts,
|
| 107 |
+
}, timeout=1200)
|
| 108 |
+
|
| 109 |
+
if r.status_code == 200:
|
| 110 |
+
result = r.json()
|
| 111 |
+
if result.get("success"):
|
| 112 |
+
persona = result["persona"]
|
| 113 |
+
summary = f"""\nπ **{name}'s memory has been successfully preserved!**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
**Persona ID:** `{persona_id}`
|
| 116 |
**Personality:** {', '.join(persona.get('personality_traits', [])[:3])}
|
| 117 |
**Language:** {persona.get('language', 'Auto')}
|
| 118 |
**Memories captured:** {len(persona.get('key_memories', []))}
|
|
|
|
| 119 |
|
| 120 |
Go to the **π¬ Talk** tab and enter the Persona ID."""
|
| 121 |
+
status_log.append(summary)
|
| 122 |
+
return "\n".join(status_log), persona_id, gr.update(value=persona_id)
|
| 123 |
+
else:
|
| 124 |
+
status_log.append(f"\nβ Persona builder failed: {result}")
|
| 125 |
+
else:
|
| 126 |
+
status_log.append(f"\nβ Persona builder failed (HTTP {r.status_code}): {r.text}")
|
| 127 |
+
|
| 128 |
+
except Exception as e:
|
| 129 |
+
status_log.append(f"\nβ Persona builder failed: {e}")
|
| 130 |
+
|
| 131 |
+
# Fallback return if the final step failed
|
| 132 |
+
return "\n\n".join(status_log), None, gr.update()
|
| 133 |
|
| 134 |
|
|
|
|
| 135 |
def chat_with_persona(persona_id, message, history, language, enable_voice):
|
| 136 |
history = history or []
|
| 137 |
|
|
|
|
| 142 |
if not message.strip():
|
| 143 |
return "", history, None
|
| 144 |
|
| 145 |
+
try:
|
| 146 |
+
r = requests.post(CHAT_URL, json={
|
|
|
|
| 147 |
"persona_id": persona_id.strip(),
|
| 148 |
"history": [{"role": m["role"], "content": m["content"]} for m in history],
|
| 149 |
"message": message.strip(),
|
| 150 |
"language": language,
|
| 151 |
+
}, timeout=180)
|
| 152 |
+
result = r.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
response_text = result.get("text", result.get("response", "..."))
|
| 154 |
voice_desc = result.get("voice_description", "warm elderly voice")
|
| 155 |
+
except Exception as e:
|
| 156 |
+
response_text = f"β οΈ Error: {e}"
|
| 157 |
+
voice_desc = "warm elderly voice"
|
| 158 |
|
| 159 |
history = history + [
|
| 160 |
{"role": "user", "content": message},
|
| 161 |
{"role": "assistant", "content": response_text},
|
| 162 |
]
|
| 163 |
|
| 164 |
+
# Generate voice response (VoxCPM2)
|
| 165 |
audio_path = None
|
| 166 |
+
if enable_voice:
|
| 167 |
try:
|
| 168 |
r = requests.post(TTS_URL, json={
|
| 169 |
"text": response_text,
|
| 170 |
"voice_description": voice_desc,
|
| 171 |
}, timeout=180)
|
| 172 |
+
if r.status_code == 200:
|
| 173 |
import tempfile
|
| 174 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
|
| 175 |
f.write(r.content)
|
|
|
|
| 181 |
|
| 182 |
|
| 183 |
def load_personas():
|
| 184 |
+
for attempt in range(2):
|
| 185 |
+
try:
|
| 186 |
+
r = requests.get(LIST_PERSONAS_URL, timeout=90)
|
| 187 |
+
personas = r.json().get("personas", [])
|
| 188 |
+
if not personas:
|
| 189 |
+
return "No personas saved yet."
|
| 190 |
+
lines = [f"**{p['name']}** ({p['relationship']}) β ID: `{p['id']}`" for p in personas]
|
| 191 |
+
return "\n\n".join(lines)
|
| 192 |
+
except Exception as e:
|
| 193 |
+
if attempt == 0:
|
| 194 |
+
continue
|
| 195 |
+
return f"β οΈ Modal is waking up, please try again in 30 seconds."
|
| 196 |
|
| 197 |
|
| 198 |
# ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 218 |
<div class="header-sub">Preserve the voice of someone you love. Talk to them again.</div>
|
| 219 |
<hr class="divider">
|
| 220 |
<div style="text-align:center; margin-bottom:16px;">
|
| 221 |
+
<span class="model-badge">π§ Qwen2.5-32B</span>
|
| 222 |
+
<span class="model-badge">π€ Cohere Transcribe</span>
|
| 223 |
+
<span class="model-badge">ποΈ MiniCPM-V 4.6</span>
|
| 224 |
+
<span class="model-badge">π Nemotron Parse</span>
|
| 225 |
+
<span class="model-badge">π VoxCPM2</span>
|
| 226 |
+
<span class="model-badge">π Tiny Aya Fire</span>
|
|
|
|
|
|
|
| 227 |
</div>
|
| 228 |
""")
|
| 229 |
|
|
|
|
| 234 |
gr.HTML("<p style='color:#8a7560; font-style:italic; margin-bottom:16px;'>Upload letters, photos, voice notes, or scanned documents. Each is processed by a specialized AI model.</p>")
|
| 235 |
|
| 236 |
with gr.Row():
|
| 237 |
+
name_input = gr.Textbox(label="Their Name", placeholder="e.g. Dadu, Nana, Abba...")
|
| 238 |
+
relationship_input = gr.Textbox(label="Your Relationship", placeholder="e.g. Grandfather, Mother...")
|
| 239 |
|
| 240 |
text_input = gr.Textbox(
|
| 241 |
label="π Letters / Diary Entries / Writings",
|
| 242 |
+
placeholder="Paste their writings here. Separate multiple entries with ---",
|
| 243 |
lines=6,
|
| 244 |
)
|
| 245 |
|
|
|
|
| 255 |
|
| 256 |
photo_captions = gr.Textbox(
|
| 257 |
label="πΌοΈ Manual Photo Captions (optional, one per line)",
|
| 258 |
+
placeholder="Or describe photos manually here...",
|
| 259 |
lines=3,
|
| 260 |
)
|
| 261 |
|
|
|
|
| 291 |
chatbot = gr.Chatbot(label="", height=420, placeholder="*Their words will appear here...*")
|
| 292 |
|
| 293 |
with gr.Row():
|
| 294 |
+
msg_input = gr.Textbox(label="Your message", placeholder="What would you like to say?", lines=2, scale=4)
|
| 295 |
send_btn = gr.Button("Send β", variant="primary", scale=1)
|
| 296 |
|
| 297 |
voice_output = gr.Audio(label="π Voice Response", visible=True, autoplay=True)
|