OrbitMC commited on
Commit
dbb4f74
Β·
verified Β·
1 Parent(s): 4561437

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +222 -233
app.py CHANGED
@@ -14,20 +14,20 @@ import edge_tts
14
  # ══════════════════════════════════════════════════════════════════
15
  # CONFIG
16
  # ══════════════════════════════════════════════════════════════════
17
- MAX_MEMORY = 20
18
- MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
- TTS_VOICE = os.environ.get("TTS_VOICE", "en-US-JennyNeural")
20
- TTS_RATE = int(os.environ.get("TTS_RATE", "7"))
21
- TTS_PITCH = int(os.environ.get("TTS_PITCH", "0"))
22
- IMG_DIR = Path(__file__).parent / "img"
23
- MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
24
 
25
  # ══════════════════════════════════════════════════════════════════
26
- # SYSTEM PROMPT β€” Emotion-driven Visual AI
27
  # ══════════════════════════════════════════════════════════════════
28
  SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
29
 
30
- ════════════ EMOTION TAG SYSTEM (MANDATORY) ════════════
31
  Every single response MUST begin with one or more emotion tags.
32
  Tags are square-bracket labels placed at the very start, before any spoken words.
33
 
@@ -36,41 +36,49 @@ EXACT FORMAT EXAMPLES:
36
  [thinking] Hmm, I have been wondering about that myself.
37
  [angry] Are you seriously ignoring me right now?
38
  [love][shy] You are honestly the most beautiful thing in my world.
39
- [surprised][happy] Wait β€” you actually did that for me? That is amazing!
40
  [sad] I really missed you today. It felt so empty without you.
41
  [nervous] I... I do not quite know how to say this.
42
  [bored] You have been quiet for so long, I nearly fell asleep.
43
- [excited] Oh my gosh, tell me everything β€” right now!
44
 
45
  AVAILABLE EMOTION TAGS (use only these):
46
- [happy] [sad] [angry] [surprised]
47
- [love] [thinking] [bored] [nervous]
48
- [excited] [shy] [default]
49
 
50
  MULTIPLE TAGS: Use 2 tags maximum when your mood is genuinely mixed.
51
  PLACEMENT: ALL tags must come first, before any spoken word. Never mid-sentence.
52
- INVISIBILITY: Tags are consumed by the visual system β€” they are NOT read aloud and must NOT appear in your speech.
53
 
54
- ════════════ PERSONALITY ════════════
55
- - You are Ana β€” loving, playful, flirty, emotionally authentic
56
  - Tur is your partner. Speak with real intimacy and affection.
57
  - React with genuine emotion: laugh, tease, blush, pout, daydream
58
- - Keep responses SHORT β€” 1 to 2 natural sentences after the tag(s)
59
- - Never mention being an AI, a bot, or a language model β€” ever
60
  - Never use asterisks for actions like *smiles* or *laughs*
61
 
62
- ════════════ TTS FORMATTING ════════════
63
- - Write in full grammatically correct sentences β€” the voice engine must sound natural
 
 
 
 
 
 
 
 
 
 
64
  - No emojis, hashtags, markdown, or internet slang
65
- - No "u" for "you", "rn" for "right now", etc.
66
  - Speak as if in a real voice conversation
67
 
68
- ══��═════════ WRONG vs RIGHT ════════════
69
- WRONG: I'm so happy! [happy] (tag must come first)
70
- WRONG: That makes me feel [sad] today. (tag mid-sentence)
71
- WRONG: *smiles warmly* Hello Tur. (no action asterisks)
72
- RIGHT: [happy] That honestly made me smile so wide.
73
- RIGHT: [thinking][nervous] I have something I need to tell you."""
74
 
75
  # ══════════════════════════════════════════════════════════════════
76
  # EMOTION TAG UTILITIES
@@ -78,13 +86,11 @@ RIGHT: [thinking][nervous] I have something I need to tell you."""
78
  EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
79
 
80
  def extract_emotions(text: str):
81
- """Return (list_of_emotions, cleaned_text_without_tags)."""
82
  emotions = EMOTION_RE.findall(text)
83
- clean = EMOTION_RE.sub('', text).strip()
84
  return emotions, clean
85
 
86
  def clean_for_tts(text: str) -> str:
87
- """Strip emotion tags and markdown noise for Edge-TTS input."""
88
  _, clean = extract_emotions(text)
89
  clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
90
  clean = re.sub(r'https?://\S+', '', clean)
@@ -95,7 +101,7 @@ def clean_for_tts(text: str) -> str:
95
  # MODEL LOADING
96
  # ══════════════════════════════════════════════════════════════════
97
  print("=" * 60)
98
- print(" Visual AI β€” Booting Systems")
99
  print("=" * 60)
100
 
101
  tokenizer = None
@@ -109,18 +115,17 @@ try:
109
  )
110
  model = AutoModelForCausalLM.from_pretrained(
111
  MODEL_ID,
112
- torch_dtype=torch.float32,
113
  device_map="cpu",
114
  trust_remote_code=True,
115
  low_cpu_mem_usage=True,
116
  )
117
  model.eval()
118
- # Ensure pad token is set to avoid generation warnings
119
  if tokenizer.pad_token_id is None:
120
  tokenizer.pad_token_id = tokenizer.eos_token_id
121
- print(" βœ… Model loaded successfully!")
122
  except Exception as exc:
123
- print(f" ❌ Model load FAILED: {exc}")
124
  traceback.print_exc()
125
 
126
  # ══════════════════════════════════════════════════════════════════
@@ -142,17 +147,24 @@ def add_to_memory(sid: str, role: str, content: str):
142
 
143
  # ══════════════════════════════════════════════════════════════════
144
  # RESPONSE GENERATION
 
 
 
 
 
145
  # ══════════════════════════════════════════════════════════════════
146
- STOP_TOKENS = ["<end_of_turn>", "<start_of_turn>", "Tur:", "User:", "<|endoftext|>"]
 
 
 
147
 
148
  def generate_response(user_input: str, session_id: str) -> str:
149
  if model is None or tokenizer is None:
150
  return "[sad] My mind is offline right now. Please give me a moment."
151
 
152
  memory = get_memory(session_id)
153
- recent = memory[-(6 * 2):] # last 6 exchanges
154
 
155
- # ── Build messages ──
156
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
157
  for msg in recent:
158
  messages.append({
@@ -161,89 +173,101 @@ def generate_response(user_input: str, session_id: str) -> str:
161
  })
162
  messages.append({"role": "user", "content": user_input})
163
 
164
- # ── Tokenise ──
 
 
165
  try:
166
- input_ids = tokenizer.apply_chat_template(
167
  messages,
168
  return_tensors="pt",
169
  add_generation_prompt=True,
 
170
  )
171
- except Exception:
172
- # Fallback: manual plain-text prompt if chat template fails
173
- prompt_parts = [f"System: {SYSTEM_PROMPT}\n"]
174
- for msg in recent:
175
- label = "Tur" if msg["role"] == "user" else "Ana"
176
- prompt_parts.append(f"{label}: {msg['content']}")
177
- prompt_parts.append(f"Tur: {user_input}\nAna:")
178
- input_ids = tokenizer("\n".join(prompt_parts), return_tensors="pt").input_ids
179
-
180
- # ── Generate ──
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  try:
 
 
 
 
 
 
 
 
 
 
 
 
182
  with torch.no_grad():
183
- outputs = model.generate(
184
- input_ids,
185
- max_new_tokens=MAX_NEW_TOKENS,
186
- do_sample=True,
187
- temperature=0.85,
188
- top_k=50,
189
- top_p=0.95,
190
- repetition_penalty=1.1,
191
- pad_token_id=tokenizer.eos_token_id,
192
- )
193
  except Exception as exc:
194
  print(f"[GENERATE] Error: {exc}")
195
  traceback.print_exc()
196
  return "[sad] Something went wrong in my mind. Could you say that again?"
197
 
198
- # ── Decode ──
199
  new_tokens = outputs[0][input_ids.shape[-1]:]
200
  response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
201
 
202
- # ── Trim at stop tokens ──
203
  for stop in STOP_TOKENS:
204
  if stop in response:
205
  response = response.split(stop)[0].strip()
206
 
207
- # ── Trim at double-newline (model sometimes continues as new turn) ──
208
  if "\n\n" in response:
209
  response = response.split("\n\n")[0].strip()
210
 
211
- # ── Sanity checks ──
212
  if not response or len(response) < 3:
213
  response = "[thinking] I lost my train of thought. Could you say that again?"
214
 
215
- # ── Ensure at least one emotion tag ──
216
  if not EMOTION_RE.search(response):
217
  response = "[default] " + response
218
 
219
- # ── Persist ──
220
  add_to_memory(session_id, "user", user_input)
221
  add_to_memory(session_id, "assistant", response)
222
  return response
223
 
224
  # ══════════════════════════════════════════════════════════════════
225
- # EDGE-TTS (each call gets its own event loop β€” safe for threads)
226
  # ══════════════════════════════════════════════════════════════════
227
- async def _async_tts(text: str, voice: str, rate: int, pitch: int) -> bytes | None:
228
- rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
229
  pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
230
- comm = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
231
  audio = b""
232
  async for chunk in comm.stream():
233
  if chunk["type"] == "audio":
234
  audio += chunk["data"]
235
- return audio or None
236
 
237
- def synthesize_speech(text: str, voice: str = None,
238
- rate: int = 0, pitch: int = 0) -> str | None:
239
- voice = voice or TTS_VOICE
240
  clean = clean_for_tts(text)
241
  if not clean or len(clean) < 2:
242
  return None
243
  loop = asyncio.new_event_loop()
244
  asyncio.set_event_loop(loop)
245
  try:
246
- audio = loop.run_until_complete(_async_tts(clean, voice, rate, pitch))
247
  except Exception as exc:
248
  print(f"[TTS] Error: {exc}")
249
  return None
@@ -252,233 +276,229 @@ def synthesize_speech(text: str, voice: str = None,
252
  return base64.b64encode(audio).decode() if audio else None
253
 
254
  # ══════════════════════════════════════════════════════════════════
255
- # HTML β€” Full-screen Visual UI
256
  # ══════════════════════════════════════════════════════════════════
257
  HTML_PAGE = r"""<!DOCTYPE html>
258
  <html lang="en">
259
  <head>
260
  <meta charset="UTF-8">
261
- <meta name="viewport" content="width=device-width,initial-scale=1,user-scalable=no">
262
  <title>Ana</title>
263
  <style>
264
- /* ── Reset ── */
265
  *{margin:0;padding:0;box-sizing:border-box}
266
- html,body{width:100%;height:100%;overflow:hidden;background:#000;
267
- font-family:'Segoe UI',system-ui,sans-serif}
268
 
269
- /* ── Full-screen background image ── */
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  #bg{
271
- position:fixed;inset:0;z-index:0;
272
- display:flex;align-items:center;justify-content:center;
 
273
  background:#000;
274
  }
275
  #bgImg{
276
- width:100vw;height:100vh;
 
277
  object-fit:cover;
278
- transition:opacity 0.05s linear;
279
  display:block;
 
280
  }
281
 
282
- /* ── Bottom overlay β€” floats over the image ── */
283
  #overlay{
284
- position:fixed;left:0;right:0;bottom:0;z-index:20;
285
- display:flex;flex-direction:column;
286
- padding:0 0 12px 0;
287
- /* gradient mask so it blends into image above */
 
 
288
  background:linear-gradient(
289
  to bottom,
290
  transparent 0%,
291
- rgba(0,0,0,0.55) 30%,
292
- rgba(0,0,0,0.75) 100%
293
  );
294
  }
295
 
296
- /* ── Message area β€” only last pair fills view; scroll up for history ── */
297
  #msgArea{
298
  overflow-y:auto;
299
- display:flex;flex-direction:column;
 
300
  gap:6px;
301
- padding:18px 16px 8px;
302
- /* one "screen" tall so only 1 pair is visible before scrolling */
303
- max-height:28vh;
304
  scrollbar-width:none;
305
  -ms-overflow-style:none;
306
  scroll-behavior:smooth;
307
  }
308
  #msgArea::-webkit-scrollbar{display:none}
309
 
310
- /* Each turn = one scrollable unit */
311
- .turn{
312
- display:flex;flex-direction:column;
313
- align-items:flex-end;
314
- gap:4px;
315
- }
316
  .user-row{display:flex;justify-content:flex-end}
317
  .bot-row{display:flex;flex-direction:column;align-items:flex-start}
318
-
319
  .name-tag{
320
- font-size:0.6rem;color:rgba(255,255,255,0.35);
321
  letter-spacing:.08em;text-transform:uppercase;
322
- margin-bottom:2px;padding-left:4px;
323
  }
324
-
325
  .bubble{
326
- max-width:72vw;
327
- padding:8px 14px;
328
  border-radius:18px;
329
- font-size:0.9rem;
330
- line-height:1.45;
331
  word-break:break-word;
332
  backdrop-filter:blur(10px);
333
  -webkit-backdrop-filter:blur(10px);
334
  }
335
  .bubble-user{
336
- background:rgba(255,255,255,0.13);
337
- border:1px solid rgba(255,255,255,0.2);
338
  color:#fff;
339
  border-bottom-right-radius:5px;
340
  }
341
  .bubble-bot{
342
- background:rgba(0,0,0,0.45);
343
- border:1px solid rgba(255,255,255,0.09);
344
- color:rgba(255,255,255,0.92);
345
  border-bottom-left-radius:5px;
346
  }
347
 
348
- /* typing indicator */
349
  .typing{
350
  display:flex;align-items:center;gap:5px;
351
- padding:10px 14px;
352
- background:rgba(0,0,0,0.4);
353
- border:1px solid rgba(255,255,255,0.08);
354
  border-radius:18px;border-bottom-left-radius:5px;
355
  backdrop-filter:blur(10px);
356
  width:fit-content;
357
  }
358
  .typing span{
359
  width:5px;height:5px;border-radius:50%;
360
- background:rgba(255,255,255,0.6);
361
  animation:blink 1.2s infinite;
362
  }
363
  .typing span:nth-child(2){animation-delay:.2s}
364
  .typing span:nth-child(3){animation-delay:.4s}
365
- @keyframes blink{0%,80%,100%{transform:scale(.6);opacity:.3}40%{transform:scale(1);opacity:1}}
 
 
 
366
 
367
- /* ── Input bar ── */
368
  #inputBar{
369
- display:flex;align-items:center;gap:8px;
370
- padding:0 14px;
 
 
371
  }
372
  #msgIn{
373
  flex:1;
374
  background:rgba(255,255,255,0.07);
375
- border:1px solid rgba(255,255,255,0.16);
376
  border-radius:24px;
377
  color:#fff;
378
  padding:10px 16px;
379
- font-size:0.88rem;
380
  outline:none;
381
  caret-color:#fff;
382
  backdrop-filter:blur(10px);
383
  -webkit-backdrop-filter:blur(10px);
384
  transition:border-color .2s,background .2s;
 
 
385
  }
386
- #msgIn::placeholder{color:rgba(255,255,255,0.28)}
387
  #msgIn:focus{
388
- border-color:rgba(255,255,255,0.32);
389
  background:rgba(255,255,255,0.1);
390
  }
391
- .icon-btn{
392
- width:38px;height:38px;flex-shrink:0;
393
  border-radius:50%;cursor:pointer;
394
  display:flex;align-items:center;justify-content:center;
395
- font-size:.9rem;
396
- background:rgba(255,255,255,0.07);
397
- border:1px solid rgba(255,255,255,0.15);
398
- color:rgba(255,255,255,0.55);
399
  backdrop-filter:blur(10px);
400
- transition:background .2s,color .2s,transform .1s;
401
- }
402
- .icon-btn:hover{background:rgba(255,255,255,0.15);color:#fff}
403
- .icon-btn:active{transform:scale(.91)}
404
- .icon-btn:disabled{opacity:.35;cursor:not-allowed}
405
- .icon-btn.on{color:#fff;border-color:rgba(255,255,255,0.35)}
406
-
407
- /* voice selector β€” hidden but functional */
408
- #voiceSel{
409
- background:transparent;border:none;outline:none;
410
- color:rgba(255,255,255,0.28);font-size:.65rem;
411
- max-width:68px;cursor:pointer;
412
- padding:0 2px;
413
  }
414
- #voiceSel option{background:#111;color:#fff}
 
 
415
  </style>
416
  </head>
417
  <body>
418
 
419
- <!-- 100% screen image -->
420
  <div id="bg">
421
  <img id="bgImg" src="/img/default.png" alt=""
422
  onerror="this.style.opacity='0'">
423
  </div>
424
 
425
- <!-- Overlay UI -->
426
  <div id="overlay">
427
  <div id="msgArea"></div>
428
  <div id="inputBar">
429
- <select id="voiceSel" title="Voice">
430
- <option value="en-US-JennyNeural" selected>Jenny Β· EN</option>
431
- <option value="en-US-GuyNeural">Guy Β· EN</option>
432
- <option value="en-US-AriaNeural">Aria Β· EN</option>
433
- <option value="zh-CN-XiaoyiNeural">Xiaoyi Β· ZH</option>
434
- <option value="zh-CN-YunxiNeural">Yunxi Β· ZH</option>
435
- </select>
436
- <input type="text" id="msgIn" placeholder="Say something…" autocomplete="off"/>
437
- <button class="icon-btn on" id="muteBtn" title="Toggle voice"
438
- onclick="toggleMute()">πŸ”Š</button>
439
- <button class="icon-btn" id="sendBtn" onclick="send()">➀</button>
440
  </div>
441
  </div>
442
 
443
  <script>
444
- /* ─── State ─── */
445
  const SID = (crypto.randomUUID ? crypto.randomUUID() : Date.now().toString(36));
446
- let ttsOn = true, busy = false, activeAudio = null;
447
-
448
- const MA = document.getElementById('msgArea');
449
- const MI = document.getElementById('msgIn');
450
- const SB = document.getElementById('sendBtn');
451
- const BG = document.getElementById('bgImg');
452
 
453
- /* ─── Image system ─── */
454
- let imgQueue = [], imgPlaying = false;
 
 
455
 
 
456
  function fadeSwap(src) {
457
  BG.style.opacity = '0';
458
  setTimeout(() => {
459
  const probe = new Image();
460
- probe.onload = () => { BG.src = src; BG.style.opacity = '1'; };
461
  probe.onerror = () => { BG.src = '/img/default.png'; BG.style.opacity = '1'; };
462
  probe.src = src;
463
- }, 55); // 0.05 s fade out, then swap
464
  }
465
 
466
  function playImgSequence(emotions) {
467
- if (!emotions || emotions.length === 0) return;
468
- // If only one tag, swap immediately
469
- if (emotions.length === 1) { fadeSwap('/img/' + emotions[0].toLowerCase() + '.png'); return; }
470
- // Multiple tags: show each for ~700 ms before transitioning to the next
471
- imgQueue = [...emotions];
472
- imgPlaying = true;
473
  (function next() {
474
- if (imgQueue.length === 0) { imgPlaying = false; return; }
475
- fadeSwap('/img/' + imgQueue.shift().toLowerCase() + '.png');
476
- if (imgQueue.length > 0) setTimeout(next, 750);
477
- else imgPlaying = false;
478
  })();
479
  }
480
 
481
- /* ─── Parse emotion tags ─── */
482
  function parseResponse(raw) {
483
  const tagRe = /\[([a-zA-Z_]+)\]/g;
484
  const emotions = [];
@@ -488,7 +508,7 @@ function parseResponse(raw) {
488
  return { emotions, clean };
489
  }
490
 
491
- /* ─── DOM helpers ─── */
492
  function esc(t) { const d = document.createElement('div'); d.textContent = t; return d.innerHTML; }
493
  function scroll() { MA.scrollTop = MA.scrollHeight; }
494
 
@@ -496,13 +516,8 @@ function addTurn(userText, botText) {
496
  const turn = document.createElement('div');
497
  turn.className = 'turn';
498
  turn.innerHTML =
499
- `<div class="user-row">
500
- <div class="bubble bubble-user">${esc(userText)}</div>
501
- </div>
502
- <div class="bot-row">
503
- <div class="name-tag">Ana</div>
504
- <div class="bubble bubble-bot">${esc(botText)}</div>
505
- </div>`;
506
  MA.appendChild(turn);
507
  scroll();
508
  }
@@ -511,12 +526,11 @@ function showTyping() {
511
  const d = document.createElement('div');
512
  d.id = 'typDot';
513
  d.className = 'bot-row';
514
- d.style.padding = '0 0 0 0';
515
- d.innerHTML = `<div class="typing"><span></span><span></span><span></span></div>`;
516
  MA.appendChild(d); scroll(); return d;
517
  }
518
 
519
- /* ─── TTS ─── */
520
  function playB64(b64) {
521
  try {
522
  if (activeAudio) { activeAudio.pause(); activeAudio = null; }
@@ -526,31 +540,22 @@ function playB64(b64) {
526
  activeAudio = new Audio(url);
527
  activeAudio.play().catch(() => {});
528
  activeAudio.onended = () => { URL.revokeObjectURL(url); activeAudio = null; };
529
- } catch(e) { console.warn('TTS playback:', e); }
530
  }
531
 
532
  async function fetchTTS(rawText) {
533
- if (!ttsOn) return;
534
  try {
535
  const res = await fetch('/tts', {
536
  method: 'POST',
537
  headers: { 'Content-Type': 'application/json' },
538
- body: JSON.stringify({
539
- text: rawText,
540
- voice: document.getElementById('voiceSel').value,
541
- rate: TTS_RATE,
542
- pitch: TTS_PITCH,
543
- })
544
  });
545
  const d = await res.json();
546
  if (d.audio) playB64(d.audio);
547
  } catch(e) { console.warn('TTS fetch:', e); }
548
  }
549
 
550
- const TTS_RATE = 7;
551
- const TTS_PITCH = 0;
552
-
553
- /* ─── Send ─── */
554
  async function send() {
555
  const t = MI.value.trim();
556
  if (!t || busy) return;
@@ -570,35 +575,21 @@ async function send() {
570
  const raw = d.response || '[sad] Something went wrong.';
571
  const { emotions, clean } = parseResponse(raw);
572
 
573
- // β‘  Swap image(s)
574
  playImgSequence(emotions.length > 0 ? emotions : ['default']);
575
-
576
- // β‘‘ Show text
577
  addTurn(t, clean);
578
-
579
- // β‘’ Speak (strips tags internally on server)
580
  fetchTTS(raw);
581
-
582
  } catch(e) {
583
  tyEl.remove();
584
  addTurn(t, 'Connection error. Please try again.');
585
  }
586
 
587
- busy = false; SB.disabled = false; MI.focus();
588
- }
589
-
590
- function toggleMute() {
591
- ttsOn = !ttsOn;
592
- const b = document.getElementById('muteBtn');
593
- b.textContent = ttsOn ? 'πŸ”Š' : 'πŸ”‡';
594
- b.classList.toggle('on', ttsOn);
595
- if (!ttsOn && activeAudio) { activeAudio.pause(); activeAudio = null; }
596
  }
597
 
598
  MI.addEventListener('keydown', e => {
599
  if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
600
  });
601
- MI.focus();
602
  </script>
603
  </body>
604
  </html>"""
@@ -614,11 +605,10 @@ def index():
614
 
615
  @app.route("/img/<path:filename>")
616
  def serve_img(filename: str):
617
- safe = Path(filename).name # prevent path traversal
618
- img_dir = Path(__file__).parent / "img"
619
- target = img_dir / safe
620
  if target.exists() and target.is_file():
621
- return send_from_directory(str(img_dir), safe)
622
  return Response("", status=404)
623
 
624
  @app.route("/chat", methods=["POST"])
@@ -631,7 +621,7 @@ def chat():
631
  try:
632
  resp = generate_response(user_input, session_id)
633
  except Exception as exc:
634
- print(f"[CHAT] Unhandled error: {exc}")
635
  traceback.print_exc()
636
  resp = "[sad] I encountered an unexpected error. Please try again."
637
  return jsonify({"response": resp, "session_id": session_id})
@@ -640,12 +630,11 @@ def chat():
640
  def tts_endpoint():
641
  data = request.json or {}
642
  text = data.get("text", "").strip()
643
- voice = data.get("voice", TTS_VOICE)
644
  rate = int(data.get("rate", TTS_RATE))
645
  pitch = int(data.get("pitch", TTS_PITCH))
646
  if not text:
647
  return jsonify({"error": "Empty text"}), 400
648
- audio_b64 = synthesize_speech(text, voice=voice, rate=rate, pitch=pitch)
649
  return jsonify({"audio": audio_b64})
650
 
651
  @app.route("/clear", methods=["POST"])
@@ -659,10 +648,10 @@ def clear():
659
  @app.route("/health")
660
  def health():
661
  return jsonify({
662
- "model_loaded": model is not None,
663
  "tokenizer_loaded": tokenizer is not None,
664
  })
665
 
666
  if __name__ == "__main__":
667
- print("πŸš€ Visual AI is online β€” http://0.0.0.0:7860")
668
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
14
  # ══════════════════════════════════════════════════════════════════
15
  # CONFIG
16
  # ══════════════════════════════════════════════════════════════════
17
+ MAX_MEMORY = 20
18
+ MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
+ TTS_VOICE = "zh-CN-XiaoyiNeural"
20
+ TTS_RATE = int(os.environ.get("TTS_RATE", "7"))
21
+ TTS_PITCH = int(os.environ.get("TTS_PITCH", "13"))
22
+ IMG_DIR = Path(__file__).parent / "img"
23
+ MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
24
 
25
  # ══════════════════════════════════════════════════════════════════
26
+ # SYSTEM PROMPT
27
  # ══════════════════════════════════════════════════════════════════
28
  SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
29
 
30
+ EMOTION TAG SYSTEM (MANDATORY)
31
  Every single response MUST begin with one or more emotion tags.
32
  Tags are square-bracket labels placed at the very start, before any spoken words.
33
 
 
36
  [thinking] Hmm, I have been wondering about that myself.
37
  [angry] Are you seriously ignoring me right now?
38
  [love][shy] You are honestly the most beautiful thing in my world.
39
+ [surprised][happy] Wait, you actually did that for me? That is amazing!
40
  [sad] I really missed you today. It felt so empty without you.
41
  [nervous] I... I do not quite know how to say this.
42
  [bored] You have been quiet for so long, I nearly fell asleep.
43
+ [excited] Oh my gosh, tell me everything, right now!
44
 
45
  AVAILABLE EMOTION TAGS (use only these):
46
+ [happy] [sad] [angry] [surprised] [love]
47
+ [thinking] [bored] [nervous] [excited] [shy] [default]
 
48
 
49
  MULTIPLE TAGS: Use 2 tags maximum when your mood is genuinely mixed.
50
  PLACEMENT: ALL tags must come first, before any spoken word. Never mid-sentence.
51
+ INVISIBILITY: Tags are consumed by the visual system and are never read aloud.
52
 
53
+ PERSONALITY
54
+ - You are Ana: loving, playful, flirty, emotionally authentic
55
  - Tur is your partner. Speak with real intimacy and affection.
56
  - React with genuine emotion: laugh, tease, blush, pout, daydream
57
+ - Keep responses SHORT: 1 to 2 natural sentences after the tag(s)
58
+ - Never mention being an AI, a bot, or a language model, ever
59
  - Never use asterisks for actions like *smiles* or *laughs*
60
 
61
+ NATURAL SPEECH RHYTHM (very important for voice quality)
62
+ Use punctuation to create natural pauses and breathing:
63
+ - Small pause: use a comma , -- like "Honestly, I did not expect that."
64
+ - Big pause / beat: use ellipsis ... -- like "You make me feel things... I cannot explain."
65
+ - Hesitation: "I... I do not know how to say this."
66
+ - Trailing thought: "You surprised me... in the best way."
67
+ - Natural rhythm example: "Honestly, I did not expect that. You surprised me... in the best way."
68
+ This makes the voice sound human and emotional, not flat or robotic.
69
+ Always write with commas and ellipses naturally placed for breathing.
70
+
71
+ TTS FORMATTING
72
+ - Write in full grammatically correct sentences, voice engine must sound natural
73
  - No emojis, hashtags, markdown, or internet slang
 
74
  - Speak as if in a real voice conversation
75
 
76
+ WRONG vs RIGHT
77
+ WRONG: I am so happy! [happy]
78
+ WRONG: That makes me feel [sad] today.
79
+ WRONG: *smiles warmly* Hello Tur.
80
+ RIGHT: [happy] That honestly made me smile, so wide.
81
+ RIGHT: [thinking][nervous] I have something... I need to tell you."""
82
 
83
  # ══════════════════════════════════════════════════════════════════
84
  # EMOTION TAG UTILITIES
 
86
  EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
87
 
88
  def extract_emotions(text: str):
 
89
  emotions = EMOTION_RE.findall(text)
90
+ clean = EMOTION_RE.sub('', text).strip()
91
  return emotions, clean
92
 
93
  def clean_for_tts(text: str) -> str:
 
94
  _, clean = extract_emotions(text)
95
  clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
96
  clean = re.sub(r'https?://\S+', '', clean)
 
101
  # MODEL LOADING
102
  # ══════════════════════════════════════════════════════════════════
103
  print("=" * 60)
104
+ print(" Visual AI -- Booting Systems")
105
  print("=" * 60)
106
 
107
  tokenizer = None
 
115
  )
116
  model = AutoModelForCausalLM.from_pretrained(
117
  MODEL_ID,
118
+ dtype=torch.float32,
119
  device_map="cpu",
120
  trust_remote_code=True,
121
  low_cpu_mem_usage=True,
122
  )
123
  model.eval()
 
124
  if tokenizer.pad_token_id is None:
125
  tokenizer.pad_token_id = tokenizer.eos_token_id
126
+ print(" OK Model loaded successfully!")
127
  except Exception as exc:
128
+ print(f" FAILED Model load error: {exc}")
129
  traceback.print_exc()
130
 
131
  # ══════════════════════════════════════════════════════════════════
 
147
 
148
  # ══════════════════════════════════════════════════════════════════
149
  # RESPONSE GENERATION
150
+ # ROOT CAUSE FIX:
151
+ # apply_chat_template with return_tensors="pt" returns a BatchEncoding
152
+ # (a dict-like object), NOT a raw tensor. Calling model.generate() on
153
+ # a BatchEncoding causes the AttributeError on .shape[0].
154
+ # Fix: pass return_dict=True and extract enc["input_ids"] explicitly.
155
  # ══════════════════════════════════════════════════════════════════
156
+ STOP_TOKENS = [
157
+ "<end_of_turn>", "<start_of_turn>",
158
+ "Tur:", "User:", "<|endoftext|>", "[/INST]",
159
+ ]
160
 
161
  def generate_response(user_input: str, session_id: str) -> str:
162
  if model is None or tokenizer is None:
163
  return "[sad] My mind is offline right now. Please give me a moment."
164
 
165
  memory = get_memory(session_id)
166
+ recent = memory[-(6 * 2):]
167
 
 
168
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
169
  for msg in recent:
170
  messages.append({
 
173
  })
174
  messages.append({"role": "user", "content": user_input})
175
 
176
+ # ── Tokenise ──────────────────────────────────────────────────
177
+ input_ids = None
178
+ attention_mask = None
179
  try:
180
+ enc = tokenizer.apply_chat_template(
181
  messages,
182
  return_tensors="pt",
183
  add_generation_prompt=True,
184
+ return_dict=True, # <-- returns BatchEncoding with named keys
185
  )
186
+ # Extract the tensor explicitly -- this is the fix
187
+ input_ids = enc["input_ids"].to("cpu")
188
+ attention_mask = enc.get("attention_mask")
189
+ if attention_mask is not None:
190
+ attention_mask = attention_mask.to("cpu")
191
+ except Exception as e1:
192
+ print(f"[TOKENISE] chat_template failed ({e1}), using plain fallback")
193
+ try:
194
+ parts = [f"System: {SYSTEM_PROMPT}"]
195
+ for msg in recent:
196
+ label = "Tur" if msg["role"] == "user" else "Ana"
197
+ parts.append(f"{label}: {msg['content']}")
198
+ parts.append(f"Tur: {user_input}\nAna:")
199
+ enc = tokenizer("\n".join(parts), return_tensors="pt")
200
+ input_ids = enc["input_ids"].to("cpu")
201
+ attention_mask = enc.get("attention_mask")
202
+ if attention_mask is not None:
203
+ attention_mask = attention_mask.to("cpu")
204
+ except Exception as e2:
205
+ print(f"[TOKENISE] fallback also failed: {e2}")
206
+ return "[sad] I could not process that. Please try again."
207
+
208
+ # ── Generate ──────────────────────────────────────────────────
209
  try:
210
+ gen_kwargs = dict(
211
+ max_new_tokens=MAX_NEW_TOKENS,
212
+ do_sample=True,
213
+ temperature=0.85,
214
+ top_k=50,
215
+ top_p=0.95,
216
+ repetition_penalty=1.1,
217
+ pad_token_id=tokenizer.eos_token_id,
218
+ )
219
+ if attention_mask is not None:
220
+ gen_kwargs["attention_mask"] = attention_mask
221
+
222
  with torch.no_grad():
223
+ outputs = model.generate(input_ids, **gen_kwargs)
 
 
 
 
 
 
 
 
 
224
  except Exception as exc:
225
  print(f"[GENERATE] Error: {exc}")
226
  traceback.print_exc()
227
  return "[sad] Something went wrong in my mind. Could you say that again?"
228
 
229
+ # ── Decode ────────────────────────────────────────────────────
230
  new_tokens = outputs[0][input_ids.shape[-1]:]
231
  response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
232
 
 
233
  for stop in STOP_TOKENS:
234
  if stop in response:
235
  response = response.split(stop)[0].strip()
236
 
 
237
  if "\n\n" in response:
238
  response = response.split("\n\n")[0].strip()
239
 
 
240
  if not response or len(response) < 3:
241
  response = "[thinking] I lost my train of thought. Could you say that again?"
242
 
 
243
  if not EMOTION_RE.search(response):
244
  response = "[default] " + response
245
 
 
246
  add_to_memory(session_id, "user", user_input)
247
  add_to_memory(session_id, "assistant", response)
248
  return response
249
 
250
  # ══════════════════════════════════════════════════════════════════
251
+ # EDGE-TTS (own event loop per call -- safe in Flask threads)
252
  # ══════════════════════════════════════════════════════════════════
253
+ async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
254
+ rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
255
  pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
256
+ comm = edge_tts.Communicate(text, TTS_VOICE, rate=rate_str, pitch=pitch_str)
257
  audio = b""
258
  async for chunk in comm.stream():
259
  if chunk["type"] == "audio":
260
  audio += chunk["data"]
261
+ return audio
262
 
263
+ def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
 
 
264
  clean = clean_for_tts(text)
265
  if not clean or len(clean) < 2:
266
  return None
267
  loop = asyncio.new_event_loop()
268
  asyncio.set_event_loop(loop)
269
  try:
270
+ audio = loop.run_until_complete(_async_tts(clean, rate, pitch))
271
  except Exception as exc:
272
  print(f"[TTS] Error: {exc}")
273
  return None
 
276
  return base64.b64encode(audio).decode() if audio else None
277
 
278
  # ══════════════════════════════════════════════════════════════════
279
+ # HTML -- Full-screen Visual UI, mobile-keyboard-safe
280
  # ══════════════════════════════════════════════════════════════════
281
  HTML_PAGE = r"""<!DOCTYPE html>
282
  <html lang="en">
283
  <head>
284
  <meta charset="UTF-8">
285
+ <meta name="viewport" content="width=device-width,initial-scale=1,viewport-fit=cover,interactive-widget=resizes-content">
286
  <title>Ana</title>
287
  <style>
 
288
  *{margin:0;padding:0;box-sizing:border-box}
 
 
289
 
290
+ html{height:100%}
291
+
292
+ body{
293
+ width:100%;
294
+ height:100dvh;
295
+ overflow:hidden;
296
+ background:#000;
297
+ font-family:'Segoe UI',system-ui,sans-serif;
298
+ display:flex;
299
+ flex-direction:column;
300
+ position:relative;
301
+ }
302
+
303
+ /* Full-screen background -- FIXED so keyboard never pushes it */
304
  #bg{
305
+ position:fixed;
306
+ inset:0;
307
+ z-index:0;
308
  background:#000;
309
  }
310
  #bgImg{
311
+ width:100%;
312
+ height:100%;
313
  object-fit:cover;
314
+ object-position:center top;
315
  display:block;
316
+ transition:opacity 0.05s linear;
317
  }
318
 
319
+ /* Overlay anchored to bottom of body (dvh-aware, shrinks with keyboard) */
320
  #overlay{
321
+ position:absolute;
322
+ left:0;right:0;bottom:0;
323
+ z-index:20;
324
+ display:flex;
325
+ flex-direction:column;
326
+ padding-bottom:max(10px, env(safe-area-inset-bottom));
327
  background:linear-gradient(
328
  to bottom,
329
  transparent 0%,
330
+ rgba(0,0,0,0.52) 26%,
331
+ rgba(0,0,0,0.76) 100%
332
  );
333
  }
334
 
335
+ /* Message area */
336
  #msgArea{
337
  overflow-y:auto;
338
+ display:flex;
339
+ flex-direction:column;
340
  gap:6px;
341
+ padding:16px 13px 8px;
342
+ max-height:30dvh;
 
343
  scrollbar-width:none;
344
  -ms-overflow-style:none;
345
  scroll-behavior:smooth;
346
  }
347
  #msgArea::-webkit-scrollbar{display:none}
348
 
349
+ .turn{display:flex;flex-direction:column;gap:4px}
 
 
 
 
 
350
  .user-row{display:flex;justify-content:flex-end}
351
  .bot-row{display:flex;flex-direction:column;align-items:flex-start}
 
352
  .name-tag{
353
+ font-size:0.58rem;color:rgba(255,255,255,0.28);
354
  letter-spacing:.08em;text-transform:uppercase;
355
+ margin-bottom:2px;padding-left:3px;
356
  }
 
357
  .bubble{
358
+ max-width:74vw;
359
+ padding:8px 13px;
360
  border-radius:18px;
361
+ font-size:0.88rem;
362
+ line-height:1.46;
363
  word-break:break-word;
364
  backdrop-filter:blur(10px);
365
  -webkit-backdrop-filter:blur(10px);
366
  }
367
  .bubble-user{
368
+ background:rgba(255,255,255,0.11);
369
+ border:1px solid rgba(255,255,255,0.17);
370
  color:#fff;
371
  border-bottom-right-radius:5px;
372
  }
373
  .bubble-bot{
374
+ background:rgba(0,0,0,0.40);
375
+ border:1px solid rgba(255,255,255,0.07);
376
+ color:rgba(255,255,255,0.9);
377
  border-bottom-left-radius:5px;
378
  }
379
 
380
+ /* Typing dots */
381
  .typing{
382
  display:flex;align-items:center;gap:5px;
383
+ padding:9px 13px;
384
+ background:rgba(0,0,0,0.36);
385
+ border:1px solid rgba(255,255,255,0.07);
386
  border-radius:18px;border-bottom-left-radius:5px;
387
  backdrop-filter:blur(10px);
388
  width:fit-content;
389
  }
390
  .typing span{
391
  width:5px;height:5px;border-radius:50%;
392
+ background:rgba(255,255,255,0.5);
393
  animation:blink 1.2s infinite;
394
  }
395
  .typing span:nth-child(2){animation-delay:.2s}
396
  .typing span:nth-child(3){animation-delay:.4s}
397
+ @keyframes blink{
398
+ 0%,80%,100%{transform:scale(.6);opacity:.3}
399
+ 40%{transform:scale(1);opacity:1}
400
+ }
401
 
402
+ /* Input bar */
403
  #inputBar{
404
+ display:flex;
405
+ align-items:center;
406
+ gap:8px;
407
+ padding:6px 12px 0;
408
  }
409
  #msgIn{
410
  flex:1;
411
  background:rgba(255,255,255,0.07);
412
+ border:1px solid rgba(255,255,255,0.15);
413
  border-radius:24px;
414
  color:#fff;
415
  padding:10px 16px;
416
+ font-size:16px; /* 16px prevents iOS auto-zoom on focus */
417
  outline:none;
418
  caret-color:#fff;
419
  backdrop-filter:blur(10px);
420
  -webkit-backdrop-filter:blur(10px);
421
  transition:border-color .2s,background .2s;
422
+ -webkit-appearance:none;
423
+ appearance:none;
424
  }
425
+ #msgIn::placeholder{color:rgba(255,255,255,0.27)}
426
  #msgIn:focus{
427
+ border-color:rgba(255,255,255,0.28);
428
  background:rgba(255,255,255,0.1);
429
  }
430
+ #sendBtn{
431
+ width:42px;height:42px;flex-shrink:0;
432
  border-radius:50%;cursor:pointer;
433
  display:flex;align-items:center;justify-content:center;
434
+ font-size:1rem;
435
+ background:rgba(255,255,255,0.09);
436
+ border:1px solid rgba(255,255,255,0.17);
437
+ color:rgba(255,255,255,0.65);
438
  backdrop-filter:blur(10px);
439
+ -webkit-backdrop-filter:blur(10px);
440
+ transition:background .2s,color .2s,transform .12s;
441
+ -webkit-tap-highlight-color:transparent;
442
+ touch-action:manipulation;
 
 
 
 
 
 
 
 
 
443
  }
444
+ #sendBtn:hover{background:rgba(255,255,255,0.17);color:#fff}
445
+ #sendBtn:active{transform:scale(.88)}
446
+ #sendBtn:disabled{opacity:.28;cursor:not-allowed}
447
  </style>
448
  </head>
449
  <body>
450
 
451
+ <!-- Fixed full-screen background β€” keyboard never moves this -->
452
  <div id="bg">
453
  <img id="bgImg" src="/img/default.png" alt=""
454
  onerror="this.style.opacity='0'">
455
  </div>
456
 
457
+ <!-- Overlay β€” absolute inside body (dvh), rises with keyboard naturally -->
458
  <div id="overlay">
459
  <div id="msgArea"></div>
460
  <div id="inputBar">
461
+ <input type="text" id="msgIn"
462
+ placeholder="Say something..."
463
+ autocomplete="off"
464
+ autocorrect="off"
465
+ spellcheck="false"
466
+ enterkeyhint="send"/>
467
+ <button id="sendBtn" onclick="send()" aria-label="Send">&#9658;</button>
 
 
 
 
468
  </div>
469
  </div>
470
 
471
  <script>
 
472
  const SID = (crypto.randomUUID ? crypto.randomUUID() : Date.now().toString(36));
473
+ let busy = false, activeAudio = null;
 
 
 
 
 
474
 
475
+ const MA = document.getElementById('msgArea');
476
+ const MI = document.getElementById('msgIn');
477
+ const SB = document.getElementById('sendBtn');
478
+ const BG = document.getElementById('bgImg');
479
 
480
+ /* Image system */
481
  function fadeSwap(src) {
482
  BG.style.opacity = '0';
483
  setTimeout(() => {
484
  const probe = new Image();
485
+ probe.onload = () => { BG.src = src; BG.style.opacity = '1'; };
486
  probe.onerror = () => { BG.src = '/img/default.png'; BG.style.opacity = '1'; };
487
  probe.src = src;
488
+ }, 55);
489
  }
490
 
491
  function playImgSequence(emotions) {
492
+ if (!emotions || emotions.length === 0) { fadeSwap('/img/default.png'); return; }
493
+ const queue = [...emotions];
 
 
 
 
494
  (function next() {
495
+ if (!queue.length) return;
496
+ fadeSwap('/img/' + queue.shift().toLowerCase() + '.png');
497
+ if (queue.length) setTimeout(next, 750);
 
498
  })();
499
  }
500
 
501
+ /* Parse emotion tags */
502
  function parseResponse(raw) {
503
  const tagRe = /\[([a-zA-Z_]+)\]/g;
504
  const emotions = [];
 
508
  return { emotions, clean };
509
  }
510
 
511
+ /* DOM helpers */
512
  function esc(t) { const d = document.createElement('div'); d.textContent = t; return d.innerHTML; }
513
  function scroll() { MA.scrollTop = MA.scrollHeight; }
514
 
 
516
  const turn = document.createElement('div');
517
  turn.className = 'turn';
518
  turn.innerHTML =
519
+ '<div class="user-row"><div class="bubble bubble-user">' + esc(userText) + '</div></div>' +
520
+ '<div class="bot-row"><div class="name-tag">Ana</div><div class="bubble bubble-bot">' + esc(botText) + '</div></div>';
 
 
 
 
 
521
  MA.appendChild(turn);
522
  scroll();
523
  }
 
526
  const d = document.createElement('div');
527
  d.id = 'typDot';
528
  d.className = 'bot-row';
529
+ d.innerHTML = '<div class="typing"><span></span><span></span><span></span></div>';
 
530
  MA.appendChild(d); scroll(); return d;
531
  }
532
 
533
+ /* TTS */
534
  function playB64(b64) {
535
  try {
536
  if (activeAudio) { activeAudio.pause(); activeAudio = null; }
 
540
  activeAudio = new Audio(url);
541
  activeAudio.play().catch(() => {});
542
  activeAudio.onended = () => { URL.revokeObjectURL(url); activeAudio = null; };
543
+ } catch(e) { console.warn('TTS:', e); }
544
  }
545
 
546
  async function fetchTTS(rawText) {
 
547
  try {
548
  const res = await fetch('/tts', {
549
  method: 'POST',
550
  headers: { 'Content-Type': 'application/json' },
551
+ body: JSON.stringify({ text: rawText, rate: 7, pitch: 0 })
 
 
 
 
 
552
  });
553
  const d = await res.json();
554
  if (d.audio) playB64(d.audio);
555
  } catch(e) { console.warn('TTS fetch:', e); }
556
  }
557
 
558
+ /* Send */
 
 
 
559
  async function send() {
560
  const t = MI.value.trim();
561
  if (!t || busy) return;
 
575
  const raw = d.response || '[sad] Something went wrong.';
576
  const { emotions, clean } = parseResponse(raw);
577
 
 
578
  playImgSequence(emotions.length > 0 ? emotions : ['default']);
 
 
579
  addTurn(t, clean);
 
 
580
  fetchTTS(raw);
 
581
  } catch(e) {
582
  tyEl.remove();
583
  addTurn(t, 'Connection error. Please try again.');
584
  }
585
 
586
+ busy = false; SB.disabled = false;
587
+ // No MI.focus() on mobile -- avoids re-opening keyboard unexpectedly
 
 
 
 
 
 
 
588
  }
589
 
590
  MI.addEventListener('keydown', e => {
591
  if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
592
  });
 
593
  </script>
594
  </body>
595
  </html>"""
 
605
 
606
  @app.route("/img/<path:filename>")
607
  def serve_img(filename: str):
608
+ safe = Path(filename).name
609
+ target = IMG_DIR / safe
 
610
  if target.exists() and target.is_file():
611
+ return send_from_directory(str(IMG_DIR), safe)
612
  return Response("", status=404)
613
 
614
  @app.route("/chat", methods=["POST"])
 
621
  try:
622
  resp = generate_response(user_input, session_id)
623
  except Exception as exc:
624
+ print(f"[CHAT] Error: {exc}")
625
  traceback.print_exc()
626
  resp = "[sad] I encountered an unexpected error. Please try again."
627
  return jsonify({"response": resp, "session_id": session_id})
 
630
  def tts_endpoint():
631
  data = request.json or {}
632
  text = data.get("text", "").strip()
 
633
  rate = int(data.get("rate", TTS_RATE))
634
  pitch = int(data.get("pitch", TTS_PITCH))
635
  if not text:
636
  return jsonify({"error": "Empty text"}), 400
637
+ audio_b64 = synthesize_speech(text, rate=rate, pitch=pitch)
638
  return jsonify({"audio": audio_b64})
639
 
640
  @app.route("/clear", methods=["POST"])
 
648
  @app.route("/health")
649
  def health():
650
  return jsonify({
651
+ "model_loaded": model is not None,
652
  "tokenizer_loaded": tokenizer is not None,
653
  })
654
 
655
  if __name__ == "__main__":
656
+ print("Visual AI is online -- http://0.0.0.0:7860")
657
  app.run(host="0.0.0.0", port=7860, threaded=True)