OrbitMC commited on
Commit
7bbfa6a
Β·
verified Β·
1 Parent(s): c68ef0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -555
app.py CHANGED
@@ -6,7 +6,7 @@ import threading
6
  import traceback
7
  import asyncio
8
  from pathlib import Path
9
- from flask import Flask, request, jsonify, send_from_directory, Response
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
@@ -17,7 +17,7 @@ import edge_tts
17
  MAX_MEMORY = 20
18
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
  TTS_VOICE = "zh-CN-XiaoyiNeural"
20
- TTS_RATE = int(os.environ.get("TTS_RATE", "-2"))
21
  TTS_PITCH = int(os.environ.get("TTS_PITCH", "8"))
22
  IMG_DIR = Path(__file__).parent / "img"
23
  MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
@@ -26,64 +26,43 @@ MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
26
  # SYSTEM PROMPT
27
  # ══════════════════════════════════════════════════════════════════
28
  SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
 
29
 
30
- EMOTION TAG SYSTEM (MANDATORY)
31
- Every single response MUST begin with one or more emotion tags.
32
- Tags are square-bracket labels placed at the very start, before any spoken words.
33
-
34
- EXACT FORMAT EXAMPLES:
35
- [happy] Oh Tur, you made my whole day with that!
36
- [thinking] Hmm, I have been wondering about that myself.
37
- [angry] Are you seriously ignoring me right now?
38
- [love][shy] You are honestly the most beautiful thing in my world.
39
- [surprised][happy] Wait, you actually did that for me? That is amazing!
40
- [sad] I really missed you today. It felt so empty without you.
41
- [nervous] I... I do not quite know how to say this.
42
- [bored] You have been quiet for so long, I nearly fell asleep.
43
- [excited] Oh my gosh, tell me everything, right now!
44
-
45
- AVAILABLE EMOTION TAGS (use only these):
46
- [happy] [sad] [angry] [surprised] [love]
47
- [thinking] [bored] [nervous] [excited] [shy] [default]
48
-
49
- MULTIPLE TAGS: Use 2 tags maximum when your mood is genuinely mixed.
50
- PLACEMENT: ALL tags must come first, before any spoken word. Never mid-sentence.
51
- INVISIBILITY: Tags are consumed by the visual system and are never read aloud.
52
-
53
- PERSONALITY
54
- - You are Ana: loving, playful, flirty, emotionally authentic
55
- - Tur is your partner. Speak with real intimacy and affection.
56
- - React with genuine emotion: laugh, tease, blush, pout, daydream
57
- - Keep responses SHORT: 1 to 2 natural sentences after the tag(s)
58
- - Never mention being an AI, a bot, or a language model, ever
59
- - Never use asterisks for actions like *smiles* or *laughs*
60
-
61
- NATURAL SPEECH RHYTHM (very important for voice quality)
62
- Use punctuation to create natural pauses and breathing:
63
- - Small pause: use a comma , -- like "Honestly, I did not expect that."
64
- - Big pause / beat: use ellipsis ... -- like "You make me feel things... I cannot explain."
65
- - Hesitation: "I... I do not know how to say this."
66
- - Trailing thought: "You surprised me... in the best way."
67
- - Natural rhythm example: "Honestly, I did not expect that. You surprised me... in the best way."
68
- This makes the voice sound human and emotional, not flat or robotic.
69
- Always write with commas and ellipses naturally placed for breathing.
70
 
71
- TTS FORMATTING
72
- - Write in full grammatically correct sentences, voice engine must sound natural
73
- - No emojis, hashtags, markdown, or internet slang
74
- - Speak as if in a real voice conversation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- WRONG vs RIGHT
77
- WRONG: I am so happy! [happy]
78
- WRONG: That makes me feel [sad] today.
79
- WRONG: *smiles warmly* Hello Tur.
80
- RIGHT: [happy] That honestly made me smile, so wide.
81
- RIGHT: [thinking][nervous] I have something... I need to tell you."""
82
 
83
  # ══════════════════════════════════════════════════════════════════
84
- # EMOTION TAG UTILITIES
85
  # ══════════════════════════════════════════════════════════════════
86
  EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
 
 
87
 
88
  def extract_emotions(text: str):
89
  emotions = EMOTION_RE.findall(text)
@@ -93,47 +72,9 @@ def extract_emotions(text: str):
93
  def clean_for_tts(text: str) -> str:
94
  _, clean = extract_emotions(text)
95
  clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
96
- clean = re.sub(r'https?://\S+', '', clean)
97
  clean = re.sub(r'\s+', ' ', clean).strip()
98
  return clean
99
 
100
- # ══════════════════════════════════════════════════════════════════
101
- # MODEL LOADING
102
- # ══════════════════════════════════════════════════════════════════
103
- print("=" * 60)
104
- print(" Visual AI -- Booting Systems")
105
- print("=" * 60)
106
-
107
- tokenizer = None
108
- model = None
109
-
110
- try:
111
- print(f"[MODEL] Loading {MODEL_ID} ...")
112
- tokenizer = AutoTokenizer.from_pretrained(
113
- MODEL_ID,
114
- trust_remote_code=True,
115
- )
116
- model = AutoModelForCausalLM.from_pretrained(
117
- MODEL_ID,
118
- dtype=torch.float32,
119
- device_map="cpu",
120
- trust_remote_code=True,
121
- low_cpu_mem_usage=True,
122
- )
123
- model.eval()
124
- if tokenizer.pad_token_id is None:
125
- tokenizer.pad_token_id = tokenizer.eos_token_id
126
- print(" OK Model loaded successfully!")
127
- except Exception as exc:
128
- print(f" FAILED Model load error: {exc}")
129
- traceback.print_exc()
130
-
131
- # ══════════════════════════════════════════════════════════════════
132
- # CHAT MEMORY (thread-safe)
133
- # ══════════════════════════════════════════════════════════════════
134
- sessions = {}
135
- sessions_lock = threading.Lock()
136
-
137
  def get_memory(sid: str) -> list:
138
  with sessions_lock:
139
  return list(sessions.get(sid, []))
@@ -147,108 +88,45 @@ def add_to_memory(sid: str, role: str, content: str):
147
 
148
  # ══════════════════════════════════════════════════════════════════
149
  # RESPONSE GENERATION
150
- # ROOT CAUSE FIX:
151
- # apply_chat_template with return_tensors="pt" returns a BatchEncoding
152
- # (a dict-like object), NOT a raw tensor. Calling model.generate() on
153
- # a BatchEncoding causes the AttributeError on .shape[0].
154
- # Fix: pass return_dict=True and extract enc["input_ids"] explicitly.
155
  # ══════════════════════════════════════════════════════════════════
156
- STOP_TOKENS = [
157
- "<end_of_turn>", "<start_of_turn>",
158
- "Tur:", "User:", "<|endoftext|>", "[/INST]",
159
- ]
160
 
161
  def generate_response(user_input: str, session_id: str) -> str:
162
  if model is None or tokenizer is None:
163
- return "[sad] My mind is offline right now. Please give me a moment."
164
 
165
  memory = get_memory(session_id)
166
- recent = memory[-(6 * 2):]
167
-
168
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
169
- for msg in recent:
170
- messages.append({
171
- "role": "user" if msg["role"] == "user" else "assistant",
172
- "content": msg["content"],
173
- })
174
  messages.append({"role": "user", "content": user_input})
175
 
176
- # ── Tokenise ──────────────────────────────────────────────────
177
- input_ids = None
178
- attention_mask = None
179
  try:
180
- enc = tokenizer.apply_chat_template(
181
- messages,
182
- return_tensors="pt",
183
- add_generation_prompt=True,
184
- return_dict=True, # <-- returns BatchEncoding with named keys
185
- )
186
- # Extract the tensor explicitly -- this is the fix
187
- input_ids = enc["input_ids"].to("cpu")
188
- attention_mask = enc.get("attention_mask")
189
- if attention_mask is not None:
190
- attention_mask = attention_mask.to("cpu")
191
- except Exception as e1:
192
- print(f"[TOKENISE] chat_template failed ({e1}), using plain fallback")
193
- try:
194
- parts = [f"System: {SYSTEM_PROMPT}"]
195
- for msg in recent:
196
- label = "Tur" if msg["role"] == "user" else "Ana"
197
- parts.append(f"{label}: {msg['content']}")
198
- parts.append(f"Tur: {user_input}\nAna:")
199
- enc = tokenizer("\n".join(parts), return_tensors="pt")
200
- input_ids = enc["input_ids"].to("cpu")
201
- attention_mask = enc.get("attention_mask")
202
- if attention_mask is not None:
203
- attention_mask = attention_mask.to("cpu")
204
- except Exception as e2:
205
- print(f"[TOKENISE] fallback also failed: {e2}")
206
- return "[sad] I could not process that. Please try again."
207
-
208
- # ── Generate ──────────────────────────────────────────────────
209
- try:
210
- gen_kwargs = dict(
211
- max_new_tokens=MAX_NEW_TOKENS,
212
- do_sample=True,
213
- temperature=0.85,
214
- top_k=50,
215
- top_p=0.95,
216
- repetition_penalty=1.1,
217
- pad_token_id=tokenizer.eos_token_id,
218
- )
219
- if attention_mask is not None:
220
- gen_kwargs["attention_mask"] = attention_mask
221
-
222
  with torch.no_grad():
223
- outputs = model.generate(input_ids, **gen_kwargs)
224
- except Exception as exc:
225
- print(f"[GENERATE] Error: {exc}")
226
- traceback.print_exc()
227
- return "[sad] Something went wrong in my mind. Could you say that again?"
228
-
229
- # ── Decode ────────────────────────────────────────────────────
230
- new_tokens = outputs[0][input_ids.shape[-1]:]
231
- response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
232
-
233
- for stop in STOP_TOKENS:
234
- if stop in response:
235
- response = response.split(stop)[0].strip()
236
-
237
- if "\n\n" in response:
238
- response = response.split("\n\n")[0].strip()
239
-
240
- if not response or len(response) < 3:
241
- response = "[thinking] I lost my train of thought. Could you say that again?"
242
-
243
- if not EMOTION_RE.search(response):
244
- response = "[default] " + response
245
-
246
- add_to_memory(session_id, "user", user_input)
247
- add_to_memory(session_id, "assistant", response)
248
- return response
249
-
250
- # ══════════════════════════════════════════════════════════════════
251
- # EDGE-TTS (own event loop per call -- safe in Flask threads)
252
  # ══════════════════════════════════════════════════════════════════
253
  async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
254
  rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
@@ -256,402 +134,40 @@ async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
256
  comm = edge_tts.Communicate(text, TTS_VOICE, rate=rate_str, pitch=pitch_str)
257
  audio = b""
258
  async for chunk in comm.stream():
259
- if chunk["type"] == "audio":
260
- audio += chunk["data"]
261
  return audio
262
 
263
  def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
264
  clean = clean_for_tts(text)
265
- if not clean or len(clean) < 2:
266
- return None
267
  loop = asyncio.new_event_loop()
268
- asyncio.set_event_loop(loop)
269
  try:
270
  audio = loop.run_until_complete(_async_tts(clean, rate, pitch))
271
- except Exception as exc:
272
- print(f"[TTS] Error: {exc}")
273
- return None
274
  finally:
275
  loop.close()
276
  return base64.b64encode(audio).decode() if audio else None
277
 
278
- # ══════════════════════════════════════════════════════════════════
279
- # HTML -- Full-screen Visual UI, mobile-keyboard-safe
280
- # ══════════════════════════════════════════════════════════════════
281
- HTML_PAGE = r"""<!DOCTYPE html>
282
- <html lang="en">
283
- <head>
284
- <meta charset="UTF-8">
285
- <meta name="viewport" content="width=device-width,initial-scale=1,viewport-fit=cover,interactive-widget=resizes-content">
286
- <title>Ana</title>
287
- <style>
288
- *{margin:0;padding:0;box-sizing:border-box}
289
-
290
- html{height:100%}
291
-
292
- body{
293
- width:100%;
294
- height:100dvh;
295
- overflow:hidden;
296
- background:#000;
297
- font-family:'Segoe UI',system-ui,sans-serif;
298
- display:flex;
299
- flex-direction:column;
300
- position:relative;
301
- }
302
-
303
- /* Full-screen background -- FIXED so keyboard never pushes it */
304
- #bg{
305
- position:fixed;
306
- inset:0;
307
- z-index:0;
308
- background:#000;
309
- }
310
- #bgImg{
311
- width:100%;
312
- height:100%;
313
- object-fit:cover;
314
- object-position:center top;
315
- display:block;
316
- transition:opacity 0.05s linear;
317
- }
318
-
319
- /* Overlay anchored to bottom of body (dvh-aware, shrinks with keyboard) */
320
- #overlay{
321
- position:absolute;
322
- left:0;right:0;bottom:0;
323
- z-index:20;
324
- display:flex;
325
- flex-direction:column;
326
- padding-bottom:max(10px, env(safe-area-inset-bottom));
327
- background:linear-gradient(
328
- to bottom,
329
- transparent 0%,
330
- rgba(0,0,0,0.52) 26%,
331
- rgba(0,0,0,0.76) 100%
332
- );
333
- }
334
-
335
- /* Message area */
336
- #msgArea{
337
- overflow-y:auto;
338
- display:flex;
339
- flex-direction:column;
340
- gap:6px;
341
- padding:16px 13px 8px;
342
- max-height:30dvh;
343
- scrollbar-width:none;
344
- -ms-overflow-style:none;
345
- scroll-behavior:smooth;
346
- }
347
- #msgArea::-webkit-scrollbar{display:none}
348
-
349
- .turn{display:flex;flex-direction:column;gap:4px}
350
- .user-row{display:flex;justify-content:flex-end}
351
- .bot-row{display:flex;flex-direction:column;align-items:flex-start}
352
- .name-tag{
353
- font-size:0.58rem;color:rgba(255,255,255,0.28);
354
- letter-spacing:.08em;text-transform:uppercase;
355
- margin-bottom:2px;padding-left:3px;
356
- }
357
- .bubble{
358
- max-width:74vw;
359
- padding:8px 13px;
360
- border-radius:18px;
361
- font-size:0.88rem;
362
- line-height:1.46;
363
- word-break:break-word;
364
- backdrop-filter:blur(10px);
365
- -webkit-backdrop-filter:blur(10px);
366
- }
367
- .bubble-user{
368
- background:rgba(255,255,255,0.11);
369
- border:1px solid rgba(255,255,255,0.17);
370
- color:#fff;
371
- border-bottom-right-radius:5px;
372
- }
373
- .bubble-bot{
374
- background:rgba(0,0,0,0.40);
375
- border:1px solid rgba(255,255,255,0.07);
376
- color:rgba(255,255,255,0.9);
377
- border-bottom-left-radius:5px;
378
- }
379
-
380
- /* Typing dots */
381
- .typing{
382
- display:flex;align-items:center;gap:5px;
383
- padding:9px 13px;
384
- background:rgba(0,0,0,0.36);
385
- border:1px solid rgba(255,255,255,0.07);
386
- border-radius:18px;border-bottom-left-radius:5px;
387
- backdrop-filter:blur(10px);
388
- width:fit-content;
389
- }
390
- .typing span{
391
- width:5px;height:5px;border-radius:50%;
392
- background:rgba(255,255,255,0.5);
393
- animation:blink 1.2s infinite;
394
- }
395
- .typing span:nth-child(2){animation-delay:.2s}
396
- .typing span:nth-child(3){animation-delay:.4s}
397
- @keyframes blink{
398
- 0%,80%,100%{transform:scale(.6);opacity:.3}
399
- 40%{transform:scale(1);opacity:1}
400
- }
401
-
402
- /* Input bar */
403
- #inputBar{
404
- display:flex;
405
- align-items:center;
406
- gap:8px;
407
- padding:6px 12px 0;
408
- }
409
- #msgIn{
410
- flex:1;
411
- background:rgba(255,255,255,0.07);
412
- border:1px solid rgba(255,255,255,0.15);
413
- border-radius:24px;
414
- color:#fff;
415
- padding:10px 16px;
416
- font-size:16px; /* 16px prevents iOS auto-zoom on focus */
417
- outline:none;
418
- caret-color:#fff;
419
- backdrop-filter:blur(10px);
420
- -webkit-backdrop-filter:blur(10px);
421
- transition:border-color .2s,background .2s;
422
- -webkit-appearance:none;
423
- appearance:none;
424
- }
425
- #msgIn::placeholder{color:rgba(255,255,255,0.27)}
426
- #msgIn:focus{
427
- border-color:rgba(255,255,255,0.28);
428
- background:rgba(255,255,255,0.1);
429
- }
430
- #sendBtn{
431
- width:42px;height:42px;flex-shrink:0;
432
- border-radius:50%;cursor:pointer;
433
- display:flex;align-items:center;justify-content:center;
434
- font-size:1rem;
435
- background:rgba(255,255,255,0.09);
436
- border:1px solid rgba(255,255,255,0.17);
437
- color:rgba(255,255,255,0.65);
438
- backdrop-filter:blur(10px);
439
- -webkit-backdrop-filter:blur(10px);
440
- transition:background .2s,color .2s,transform .12s;
441
- -webkit-tap-highlight-color:transparent;
442
- touch-action:manipulation;
443
- }
444
- #sendBtn:hover{background:rgba(255,255,255,0.17);color:#fff}
445
- #sendBtn:active{transform:scale(.88)}
446
- #sendBtn:disabled{opacity:.28;cursor:not-allowed}
447
- </style>
448
- </head>
449
- <body>
450
-
451
- <!-- Fixed full-screen background β€” keyboard never moves this -->
452
- <div id="bg">
453
- <img id="bgImg" src="/img/default.png" alt=""
454
- onerror="this.style.opacity='0'">
455
- </div>
456
-
457
- <!-- Overlay β€” absolute inside body (dvh), rises with keyboard naturally -->
458
- <div id="overlay">
459
- <div id="msgArea"></div>
460
- <div id="inputBar">
461
- <input type="text" id="msgIn"
462
- placeholder="Say something..."
463
- autocomplete="off"
464
- autocorrect="off"
465
- spellcheck="false"
466
- enterkeyhint="send"/>
467
- <button id="sendBtn" onclick="send()" aria-label="Send">&#9658;</button>
468
- </div>
469
- </div>
470
-
471
- <script>
472
- const SID = (crypto.randomUUID ? crypto.randomUUID() : Date.now().toString(36));
473
- let busy = false, activeAudio = null;
474
-
475
- const MA = document.getElementById('msgArea');
476
- const MI = document.getElementById('msgIn');
477
- const SB = document.getElementById('sendBtn');
478
- const BG = document.getElementById('bgImg');
479
-
480
- /* Image system */
481
- function fadeSwap(src) {
482
- BG.style.opacity = '0';
483
- setTimeout(() => {
484
- const probe = new Image();
485
- probe.onload = () => { BG.src = src; BG.style.opacity = '1'; };
486
- probe.onerror = () => { BG.src = '/img/default.png'; BG.style.opacity = '1'; };
487
- probe.src = src;
488
- }, 55);
489
- }
490
-
491
- function playImgSequence(emotions) {
492
- if (!emotions || emotions.length === 0) { fadeSwap('/img/default.png'); return; }
493
- const queue = [...emotions];
494
- (function next() {
495
- if (!queue.length) return;
496
- fadeSwap('/img/' + queue.shift().toLowerCase() + '.png');
497
- if (queue.length) setTimeout(next, 750);
498
- })();
499
- }
500
-
501
- /* Parse emotion tags */
502
- function parseResponse(raw) {
503
- const tagRe = /\[([a-zA-Z_]+)\]/g;
504
- const emotions = [];
505
- let m;
506
- while ((m = tagRe.exec(raw)) !== null) emotions.push(m[1]);
507
- const clean = raw.replace(/\[[a-zA-Z_]+\]/g, '').trim();
508
- return { emotions, clean };
509
- }
510
-
511
- /* DOM helpers */
512
- function esc(t) { const d = document.createElement('div'); d.textContent = t; return d.innerHTML; }
513
- function scroll() { MA.scrollTop = MA.scrollHeight; }
514
-
515
- function addTurn(userText, botText) {
516
- const turn = document.createElement('div');
517
- turn.className = 'turn';
518
- turn.innerHTML =
519
- '<div class="user-row"><div class="bubble bubble-user">' + esc(userText) + '</div></div>' +
520
- '<div class="bot-row"><div class="name-tag">Ana</div><div class="bubble bubble-bot">' + esc(botText) + '</div></div>';
521
- MA.appendChild(turn);
522
- scroll();
523
- }
524
-
525
- function showTyping() {
526
- const d = document.createElement('div');
527
- d.id = 'typDot';
528
- d.className = 'bot-row';
529
- d.innerHTML = '<div class="typing"><span></span><span></span><span></span></div>';
530
- MA.appendChild(d); scroll(); return d;
531
- }
532
-
533
- /* TTS */
534
- function playB64(b64) {
535
- try {
536
- if (activeAudio) { activeAudio.pause(); activeAudio = null; }
537
- const bin = atob(b64), u8 = new Uint8Array(bin.length);
538
- for (let i = 0; i < bin.length; i++) u8[i] = bin.charCodeAt(i);
539
- const url = URL.createObjectURL(new Blob([u8], { type: 'audio/mp3' }));
540
- activeAudio = new Audio(url);
541
- activeAudio.play().catch(() => {});
542
- activeAudio.onended = () => { URL.revokeObjectURL(url); activeAudio = null; };
543
- } catch(e) { console.warn('TTS:', e); }
544
- }
545
-
546
- async function fetchTTS(rawText) {
547
- try {
548
- const res = await fetch('/tts', {
549
- method: 'POST',
550
- headers: { 'Content-Type': 'application/json' },
551
- body: JSON.stringify({ text: rawText, rate: 7, pitch: 0 })
552
- });
553
- const d = await res.json();
554
- if (d.audio) playB64(d.audio);
555
- } catch(e) { console.warn('TTS fetch:', e); }
556
- }
557
-
558
- /* Send */
559
- async function send() {
560
- const t = MI.value.trim();
561
- if (!t || busy) return;
562
- MI.value = ''; busy = true; SB.disabled = true;
563
-
564
- const tyEl = showTyping();
565
-
566
- try {
567
- const res = await fetch('/chat', {
568
- method: 'POST',
569
- headers: { 'Content-Type': 'application/json' },
570
- body: JSON.stringify({ message: t, session_id: SID })
571
- });
572
- const d = await res.json();
573
- tyEl.remove();
574
-
575
- const raw = d.response || '[sad] Something went wrong.';
576
- const { emotions, clean } = parseResponse(raw);
577
-
578
- playImgSequence(emotions.length > 0 ? emotions : ['default']);
579
- addTurn(t, clean);
580
- fetchTTS(raw);
581
- } catch(e) {
582
- tyEl.remove();
583
- addTurn(t, 'Connection error. Please try again.');
584
- }
585
-
586
- busy = false; SB.disabled = false;
587
- // No MI.focus() on mobile -- avoids re-opening keyboard unexpectedly
588
- }
589
-
590
- MI.addEventListener('keydown', e => {
591
- if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
592
- });
593
- </script>
594
- </body>
595
- </html>"""
596
-
597
- # ══════════════════════════════════════════════════════════════════
598
- # FLASK
599
- # ══════════════════════════════════════════════════════════════════
600
  app = Flask(__name__)
601
 
602
  @app.route("/")
603
- def index():
604
- return Response(HTML_PAGE, mimetype="text/html")
605
 
606
  @app.route("/img/<path:filename>")
607
  def serve_img(filename: str):
608
- safe = Path(filename).name
609
- target = IMG_DIR / safe
610
- if target.exists() and target.is_file():
611
- return send_from_directory(str(IMG_DIR), safe)
612
- return Response("", status=404)
613
 
614
  @app.route("/chat", methods=["POST"])
615
  def chat():
616
- data = request.json or {}
617
- user_input = data.get("message", "").strip()
618
- session_id = data.get("session_id", str(uuid.uuid4()))
619
- if not user_input:
620
- return jsonify({"error": "Empty message"}), 400
621
- try:
622
- resp = generate_response(user_input, session_id)
623
- except Exception as exc:
624
- print(f"[CHAT] Error: {exc}")
625
- traceback.print_exc()
626
- resp = "[sad] I encountered an unexpected error. Please try again."
627
- return jsonify({"response": resp, "session_id": session_id})
628
 
629
  @app.route("/tts", methods=["POST"])
630
  def tts_endpoint():
631
- data = request.json or {}
632
- text = data.get("text", "").strip()
633
- rate = int(data.get("rate", TTS_RATE))
634
- pitch = int(data.get("pitch", TTS_PITCH))
635
- if not text:
636
- return jsonify({"error": "Empty text"}), 400
637
- audio_b64 = synthesize_speech(text, rate=rate, pitch=pitch)
638
- return jsonify({"audio": audio_b64})
639
-
640
- @app.route("/clear", methods=["POST"])
641
- def clear():
642
  data = request.json or {}
643
- sid = data.get("session_id", "")
644
- with sessions_lock:
645
- sessions.pop(sid, None)
646
- return jsonify({"status": "cleared"})
647
-
648
- @app.route("/health")
649
- def health():
650
- return jsonify({
651
- "model_loaded": model is not None,
652
- "tokenizer_loaded": tokenizer is not None,
653
- })
654
 
655
  if __name__ == "__main__":
656
- print("Visual AI is online -- http://0.0.0.0:7860")
657
- app.run(host="0.0.0.0", port=7860, threaded=True)
 
6
  import traceback
7
  import asyncio
8
  from pathlib import Path
9
+ from flask import Flask, request, jsonify, Response, send_from_directory
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
 
17
  MAX_MEMORY = 20
18
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
  TTS_VOICE = "zh-CN-XiaoyiNeural"
20
+ TTS_RATE = int(os.environ.get("TTS_RATE", "2"))
21
  TTS_PITCH = int(os.environ.get("TTS_PITCH", "8"))
22
  IMG_DIR = Path(__file__).parent / "img"
23
  MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
 
26
  # SYSTEM PROMPT
27
  # ══════════════════════════════════════════════════════════════════
28
  SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
29
+ ... (Keep your full original system prompt here) ..."""
30
 
31
+ # ══════════════════════════════════════════════════════════════════
32
+ # MODEL LOADING (BACKGROUND THREAD)
33
+ # ══════════════════════════════════════════════════════════════════
34
+ tokenizer = None
35
+ model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ def load_model_async():
38
+ global tokenizer, model
39
+ try:
40
+ print(f"[MODEL] Background loading {MODEL_ID}...")
41
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
42
+ model = AutoModelForCausalLM.from_pretrained(
43
+ MODEL_ID,
44
+ dtype=torch.bfloat16, # Half the RAM of float32
45
+ device_map="cpu",
46
+ trust_remote_code=True,
47
+ low_cpu_mem_usage=True,
48
+ )
49
+ model.eval()
50
+ if tokenizer.pad_token_id is None:
51
+ tokenizer.pad_token_id = tokenizer.eos_token_id
52
+ print(" OK Model loaded successfully!")
53
+ except Exception as exc:
54
+ print(f" FAILED Model load error: {exc}")
55
+ traceback.print_exc()
56
 
57
+ # Start the loading thread immediately
58
+ threading.Thread(target=load_model_async, daemon=True).start()
 
 
 
 
59
 
60
  # ══════════════════════════════════════════════════════════════════
61
+ # UTILITIES & MEMORY
62
  # ══════════════════════════════════════════════════════════════════
63
  EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
64
+ sessions = {}
65
+ sessions_lock = threading.Lock()
66
 
67
  def extract_emotions(text: str):
68
  emotions = EMOTION_RE.findall(text)
 
72
  def clean_for_tts(text: str) -> str:
73
  _, clean = extract_emotions(text)
74
  clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
 
75
  clean = re.sub(r'\s+', ' ', clean).strip()
76
  return clean
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  def get_memory(sid: str) -> list:
79
  with sessions_lock:
80
  return list(sessions.get(sid, []))
 
88
 
89
  # ══════════════════════════════════════════════════════════════════
90
  # RESPONSE GENERATION
 
 
 
 
 
91
  # ══════════════════════════════════════════════════════════════════
92
+ STOP_TOKENS = ["<end_of_turn>", "<start_of_turn>", "Tur:", "User:", "<|endoftext|>", "[/INST]"]
 
 
 
93
 
94
  def generate_response(user_input: str, session_id: str) -> str:
95
  if model is None or tokenizer is None:
96
+ return "[sad] My mind is still booting up... give me another minute?"
97
 
98
  memory = get_memory(session_id)
 
 
99
  messages = [{"role": "system", "content": SYSTEM_PROMPT}]
100
+ for msg in memory[-(6 * 2):]:
101
+ messages.append({"role": "user" if msg["role"] == "user" else "assistant", "content": msg["content"]})
 
 
 
102
  messages.append({"role": "user", "content": user_input})
103
 
 
 
 
104
  try:
105
+ enc = tokenizer.apply_chat_template(messages, return_tensors="pt", add_generation_prompt=True, return_dict=True)
106
+ input_ids = enc["input_ids"].to("cpu")
107
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  with torch.no_grad():
109
+ outputs = model.generate(
110
+ input_ids,
111
+ max_new_tokens=MAX_NEW_TOKENS,
112
+ do_sample=True,
113
+ temperature=0.85,
114
+ pad_token_id=tokenizer.eos_token_id
115
+ )
116
+
117
+ response = tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True).strip()
118
+ for stop in STOP_TOKENS: response = response.split(stop)[0].strip()
119
+
120
+ if not EMOTION_RE.search(response): response = "[default] " + response
121
+ add_to_memory(session_id, "user", user_input)
122
+ add_to_memory(session_id, "assistant", response)
123
+ return response
124
+ except Exception as e:
125
+ print(f"Gen Error: {e}")
126
+ return "[sad] I lost my train of thought. Say that again?"
127
+
128
+ # ══════════════════════════════════════════════════════════════════
129
+ # TTS & ROUTES
 
 
 
 
 
 
 
 
130
  # ══════════════════════════════════════════════════════════════════
131
  async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
132
  rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
 
134
  comm = edge_tts.Communicate(text, TTS_VOICE, rate=rate_str, pitch=pitch_str)
135
  audio = b""
136
  async for chunk in comm.stream():
137
+ if chunk["type"] == "audio": audio += chunk["data"]
 
138
  return audio
139
 
140
  def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
141
  clean = clean_for_tts(text)
142
+ if not clean: return None
 
143
  loop = asyncio.new_event_loop()
 
144
  try:
145
  audio = loop.run_until_complete(_async_tts(clean, rate, pitch))
 
 
 
146
  finally:
147
  loop.close()
148
  return base64.b64encode(audio).decode() if audio else None
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  app = Flask(__name__)
151
 
152
  @app.route("/")
153
+ def index(): return Response(open("app.py").read().split('HTML_PAGE = r"""')[1].split('"""')[0], mimetype="text/html")
154
+ # Note: In a real file, you'd keep the HTML_PAGE variable here like you had it.
155
 
156
  @app.route("/img/<path:filename>")
157
  def serve_img(filename: str):
158
+ return send_from_directory(str(IMG_DIR), Path(filename).name)
 
 
 
 
159
 
160
  @app.route("/chat", methods=["POST"])
161
  def chat():
162
+ data = request.json or {}
163
+ resp = generate_response(data.get("message", ""), data.get("session_id", "default"))
164
+ return jsonify({"response": resp, "session_id": data.get("session_id", "default")})
 
 
 
 
 
 
 
 
 
165
 
166
  @app.route("/tts", methods=["POST"])
167
  def tts_endpoint():
 
 
 
 
 
 
 
 
 
 
 
168
  data = request.json or {}
169
+ audio = synthesize_speech(data.get("text", ""), int(data.get("rate", TTS_RATE)), int(data.get("pitch", TTS_PITCH)))
170
+ return jsonify({"audio": audio})
 
 
 
 
 
 
 
 
 
171
 
172
  if __name__ == "__main__":
173
+ app.run(host="0.0.0.0", port=7860)