OrbitMC commited on
Commit
19bac4d
Β·
verified Β·
1 Parent(s): 5c919ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +543 -409
app.py CHANGED
@@ -1,534 +1,668 @@
1
  import os
2
- import io
3
  import re
4
  import uuid
5
  import base64
6
- import datetime
7
  import traceback
8
  import asyncio
9
- from flask import Flask, request, jsonify
 
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
13
 
14
- # ══════════════════════════════════════════
15
  # CONFIG
16
- # ══════════════════════════════════════════
17
- MAX_MEMORY = 20
18
- MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
19
-
20
- # Default Edge-TTS settings matching image logic
21
- TTS_VOICE = os.environ.get("TTS_VOICE", "zh-CN-XiaoyiNeural")
22
- TTS_RATE = 7
23
- TTS_PITCH = 20
24
-
25
- # ══════════════════════════════════════════
26
- # SYSTEM PROMPT
27
- # ══════════════════════════════════════════
28
- SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
29
- Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
30
- TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
31
- Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response shortβ€”no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
32
-
33
- # ══════════════════════════════════════════
34
- # TEXT CLEANING FOR TTS
35
- # ══════════════════════════════════════════
36
- def clean_text_for_tts(text):
37
- text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
38
- text = re.sub(r'https?://\S+', '', text)
39
- text = re.sub(r'\s+', ' ', text).strip()
40
- return text
41
-
42
- # ══════════════════════════════════════════
43
- # LOAD GEMMA 3 270M-IT
44
- # ══════════════════════════════════════════
45
- print("=" * 55)
46
- print(" J.A.R.V.I.S. β€” Booting Systems")
47
- print("=" * 55)
48
-
49
- print("[1/1] Loading Gemma 3 270M-IT...")
50
- GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
- tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
 
 
 
 
53
  model = AutoModelForCausalLM.from_pretrained(
54
- GEMMA_ID,
55
  torch_dtype=torch.float32,
56
  device_map="cpu",
 
 
57
  )
58
  model.eval()
59
- print(" βœ… Gemma 3 loaded!")
60
- except Exception as e:
61
- print(f" ❌ Gemma 3 FAILED: {e}")
 
 
 
62
  traceback.print_exc()
63
- # Proceeding without it for debugging the UI/TTS if needed, or raise SystemExit
64
- # raise SystemExit("Cannot start without Gemma.")
65
-
66
- # ══════════════════════════════════════════
67
- # CHAT MEMORY
68
- # ══════════════════════════════════════════
69
- sessions = {}
70
-
71
- def get_memory(sid):
72
- if sid not in sessions:
73
- sessions[sid] =[]
74
- return sessions[sid]
75
-
76
- def add_to_memory(sid, role, content):
77
- mem = get_memory(sid)
78
- mem.append({
79
- "role": role,
80
- "content": content,
81
- "ts": datetime.datetime.now().isoformat(),
82
- })
83
- if len(mem) > MAX_MEMORY * 2:
84
- sessions[sid] = mem[-(MAX_MEMORY * 2):]
85
 
86
- # ══════════════════════════════════════════
87
- # GEMMA RESPONSE GENERATION
88
- # ══════════════════════════════════════════
89
- def generate_response(user_input, session_id):
90
- memory = get_memory(session_id)
91
 
92
- messages =[
93
- {"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
94
- {"role": "assistant", "content": "I am waiting for you!"},
95
- ]
96
 
97
- recent = memory[-(6 * 2):]
98
- for msg in recent:
99
- role = "user" if msg["role"] == "user" else "assistant"
100
- messages.append({"role": role, "content": msg["content"]})
 
 
 
 
 
 
 
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  messages.append({"role": "user", "content": user_input})
103
 
 
104
  try:
105
  input_ids = tokenizer.apply_chat_template(
106
  messages,
107
  return_tensors="pt",
108
  add_generation_prompt=True,
109
  )
110
-
 
 
 
 
 
 
 
 
 
 
111
  with torch.no_grad():
112
  outputs = model.generate(
113
  input_ids,
114
  max_new_tokens=MAX_NEW_TOKENS,
115
  do_sample=True,
116
- temperature=0.9,
117
- top_k=45,
118
- top_p=0.97,
 
 
119
  )
120
-
121
- new_tokens = outputs[0][input_ids.shape[-1]:]
122
- response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
123
-
124
- response = response.split("<end_of_turn>")[0].strip()
125
- response = response.split("<start_of_turn>")[0].strip()
126
- except:
127
- response = "System Error: Could not generate text."
128
-
129
- if not response or len(response) < 2:
130
- response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
131
-
132
- add_to_memory(session_id, "user", user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  add_to_memory(session_id, "assistant", response)
134
  return response
135
 
136
- # ══════════════════════════════════════════
137
- # EDGE-TTS SYNTHESIS
138
- # ══════════════════════════════════════════
139
- async def async_synthesize_speech(text, voice, rate, pitch):
140
- # Format strings required by edge-tts (e.g., "+7%", "-5Hz")
141
- rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
142
  pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
143
-
144
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
145
- audio_data = b""
146
- async for chunk in communicate.stream():
147
  if chunk["type"] == "audio":
148
- audio_data += chunk["data"]
149
-
150
- if not audio_data:
151
- return None
152
- return base64.b64encode(audio_data).decode('utf-8')
153
 
154
- def synthesize_speech(text, voice=None, rate=0, pitch=0):
 
155
  voice = voice or TTS_VOICE
156
- clean = clean_text_for_tts(text)
157
  if not clean or len(clean) < 2:
158
  return None
159
-
 
160
  try:
161
- return asyncio.run(async_synthesize_speech(clean, voice, rate, pitch))
162
- except Exception as e:
163
- print(f"Edge-TTS Error: {e}")
164
  return None
165
-
166
- # ══════════════════════════════════════════
167
- # INLINE HTML
168
- # ══════════════════════════════════════════
169
- HTML_PAGE = """<!DOCTYPE html>
 
 
 
170
  <html lang="en">
171
  <head>
172
  <meta charset="UTF-8">
173
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
174
- <title>J.A.R.V.I.S. AI</title>
175
  <style>
 
176
  *{margin:0;padding:0;box-sizing:border-box}
177
- body{
178
- font-family:'Segoe UI',Tahoma,Geneva,Verdana,sans-serif;
179
- background:#0a0a1a;color:#e0e0e0;
180
- height:100vh;display:flex;flex-direction:column;overflow:hidden;
181
- }
182
- .header{
183
- background:linear-gradient(135deg,#0d1b2a,#1b2838);
184
- border-bottom:1px solid #00d4ff33;
185
- padding:12px 20px;display:flex;align-items:center;
186
- justify-content:space-between;flex-shrink:0;
187
- }
188
- .header-left{display:flex;align-items:center;gap:12px}
189
- .arc-reactor{
190
- width:38px;height:38px;border-radius:50%;
191
- background:radial-gradient(circle,#00d4ff 0%,#0088aa 40%,#004466 70%,transparent 100%);
192
- box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44,inset 0 0 10px #00d4ff66;
193
- animation:pulse 2s ease-in-out infinite;position:relative;
194
- }
195
- .arc-reactor::after{
196
- content:'';position:absolute;top:50%;left:50%;
197
- transform:translate(-50%,-50%);width:12px;height:12px;
198
- border-radius:50%;background:#00d4ff;box-shadow:0 0 8px #00d4ff;
199
- }
200
- @keyframes pulse{
201
- 0%,100%{box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44}
202
- 50%{box-shadow:0 0 30px #00d4ffaa,0 0 60px #00d4ff66}
203
- }
204
- .htitle h1{font-size:1.2rem;color:#00d4ff;letter-spacing:3px;text-transform:uppercase}
205
- .htitle p{font-size:.65rem;color:#5a8a9a;letter-spacing:1px}
206
- .hctrl{display:flex;gap:8px;align-items:center;flex-wrap:wrap}
207
- .cbtn{
208
- background:#0d1b2a;border:1px solid #00d4ff44;color:#00d4ff;
209
- padding:5px 12px;border-radius:6px;cursor:pointer;
210
- font-size:.7rem;transition:all .3s;letter-spacing:.5px;
211
  }
212
- .cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
213
- .cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
214
-
215
- /* Configuration Panel specific styling for Edge-TTS sliders */
216
- .cfgbar{
217
- background:#1c1c1e;border-bottom:1px solid #00d4ff15;
218
- padding:16px 20px;display:none;flex-direction:column;gap:16px;
219
- flex-shrink:0;box-shadow: 0 4px 6px rgba(0,0,0,0.3);
220
  }
221
- .cfgbar.open{display:flex}
222
- .cgrp{display:flex;flex-direction:column;gap:6px;}
223
- .cgrp-row{display:flex;align-items:center;justify-content:space-between;gap:10px}
224
- .cgrp label{font-size:.8rem;color:#a0a0a0;}
225
- .cgrp select, .cgrp input[type="number"]{
226
- background:#2c2c2e;border:1px solid #444;color:#fff;
227
- padding:8px;border-radius:4px;font-size:.8rem;cursor:pointer;outline:none;
228
- }
229
- .cgrp select:focus, .cgrp input[type="number"]:focus{border-color:#00d4ff}
230
- input[type=range] {
231
- -webkit-appearance: none; width: 100%; background: transparent;
232
- }
233
- input[type=range]::-webkit-slider-thumb {
234
- -webkit-appearance: none; height: 16px; width: 16px; border-radius: 50%;
235
- background: #ff8c00; cursor: pointer; margin-top: -6px;
236
  }
237
- input[type=range]::-webkit-slider-runnable-track {
238
- width: 100%; height: 4px; cursor: pointer;
239
- background: #ff8c00; border-radius: 2px;
 
 
 
 
 
 
 
 
 
240
  }
 
241
 
242
- .chat{
243
- flex:1;overflow-y:auto;padding:16px 20px;
244
- display:flex;flex-direction:column;gap:14px;scroll-behavior:smooth;
 
 
245
  }
246
- .chat::-webkit-scrollbar{width:3px}
247
- .chat::-webkit-scrollbar-thumb{background:#00d4ff33;border-radius:2px}
248
 
249
- .msg{
250
- max-width:80%;padding:12px 16px;border-radius:14px;
251
- font-size:.9rem;line-height:1.6;animation:fadeIn .3s ease-out;
 
252
  }
253
- @keyframes fadeIn{
254
- from{opacity:0;transform:translateY(8px)}
255
- to{opacity:1;transform:translateY(0)}
 
 
 
 
 
 
 
256
  }
257
- .msg.user{
258
- align-self:flex-end;background:linear-gradient(135deg,#1a3a5c,#0d2847);
259
- border:1px solid #00d4ff33;color:#c8e6ff;border-bottom-right-radius:4px;
 
 
260
  }
261
- .msg.bot{
262
- align-self:flex-start;background:linear-gradient(135deg,#141e30,#0f1923);
263
- border:1px solid #00d4ff22;color:#e0e0e0;border-bottom-left-radius:4px;
 
 
264
  }
265
- .msg .lbl{font-size:.58rem;color:#00d4ff88;letter-spacing:2px;margin-bottom:5px;text-transform:uppercase}
266
- .msg .txt{white-space:pre-wrap;word-wrap:break-word}
267
- .msg .actrl{margin-top:8px;display:flex;align-items:center;gap:8px}
268
- .abtn{
269
- display:inline-flex;align-items:center;gap:4px;
270
- background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ff;
271
- padding:3px 10px;border-radius:10px;cursor:pointer;
272
- font-size:.65rem;transition:all .2s;
 
 
273
  }
274
- .abtn:hover{background:#00d4ff25;border-color:#00d4ff66}
275
- .abtn:disabled{opacity:.3;cursor:wait}
276
- .astat{font-size:.58rem;color:#5a8a9a}
277
-
278
- .typi{align-self:flex-start;display:flex;gap:5px;padding:14px 18px}
279
- .typi span{width:7px;height:7px;border-radius:50%;background:#00d4ff;animation:typ 1.4s infinite}
280
- .typi span:nth-child(2){animation-delay:.2s}
281
- .typi span:nth-child(3){animation-delay:.4s}
282
- @keyframes typ{
283
- 0%,60%,100%{opacity:.2;transform:scale(.8)}
284
- 30%{opacity:1;transform:scale(1.1)}
285
  }
286
-
287
- .inbar{
288
- padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
289
- border-top:1px solid #00d4ff22;flex-shrink:0;
 
 
 
 
290
  }
291
- .inwrap{display:flex;gap:8px;max-width:900px;margin:0 auto}
292
  #msgIn{
293
- flex:1;background:#0f1923;border:1px solid #00d4ff33;border-radius:12px;
294
- padding:11px 16px;color:#e0e0e0;font-size:.9rem;outline:none;
295
- transition:border-color .3s;font-family:inherit;
 
 
 
 
 
 
 
 
 
296
  }
297
- #msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
298
- #sendBtn{
299
- background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
300
- padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
301
- font-size:.8rem;letter-spacing:1px;transition:all .3s;text-transform:uppercase;
302
  }
303
- #sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
304
- #sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  </style>
306
  </head>
307
  <body>
308
 
309
- <div class="header">
310
- <div class="header-left">
311
- <div class="arc-reactor"></div>
312
- <div class="htitle">
313
- <h1>J.A.R.V.I.S.</h1>
314
- <p>Powered by Gemma & Edge-TTS</p>
315
- </div>
316
- </div>
317
- <div class="hctrl">
318
- <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">βš™ CONFIG</button>
319
- <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">πŸ”Š VOICE</button>
320
- <button class="cbtn" onclick="clearChat()">πŸ—‘ CLEAR</button>
321
- </div>
322
- </div>
323
-
324
- <div class="cfgbar" id="cfgPanel">
325
- <div class="cgrp">
326
- <label>Select Voice</label>
327
- <select id="voiceSel">
328
- <!-- English -->
329
- <option value="en-US-JennyNeural">en-US-JennyNeural - en-US (Female)</option>
330
- <option value="en-US-GuyNeural">en-US-GuyNeural - en-US (Male)</option>
331
- <option value="en-US-AnaNeural">en-US-AnaNeural - en-US (Female)</option>
332
- <option value="en-US-AriaNeural">en-US-AriaNeural - en-US (Female)</option>
333
- <!-- Chinese -->
334
- <option value="zh-CN-XiaoyiNeural" selected>zh-CN-XiaoyiNeural - zh-CN (Female)</option>
335
- <option value="zh-CN-YunxiNeural">zh-CN-YunxiNeural - zh-CN (Male)</option>
336
- <option value="zh-CN-YunjianNeural">zh-CN-YunjianNeural - zh-CN (Male)</option>
337
- <option value="zh-CN-XiaoxiaoNeural">zh-CN-XiaoxiaoNeural - zh-CN (Female)</option>
338
- </select>
339
- </div>
340
- <div class="cgrp">
341
- <div class="cgrp-row">
342
- <label>Speech Rate Adjustment (%)</label>
343
- <input type="number" id="rateNum" value="7" min="-100" max="100">
344
- </div>
345
- <input type="range" id="rateRange" min="-100" max="100" value="7">
346
- </div>
347
- <div class="cgrp">
348
- <div class="cgrp-row">
349
- <label>Pitch Adjustment (Hz)</label>
350
- <input type="number" id="pitchNum" value="20" min="-100" max="100">
351
- </div>
352
- <input type="range" id="pitchRange" min="-100" max="100" value="20">
353
- </div>
354
  </div>
355
 
356
- <div class="chat" id="chatBox"></div>
357
-
358
- <div class="inbar">
359
- <div class="inwrap">
360
- <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
361
- <button id="sendBtn" onclick="send()">SEND</button>
362
- </div>
 
 
 
 
 
 
 
 
 
363
  </div>
364
 
365
  <script>
366
- let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36);
367
- let ttsOn=true,busy=false,mc=0;
368
-
369
- // Sync sliders and inputs
370
- const sR=document.getElementById('rateRange'), nR=document.getElementById('rateNum');
371
- sR.oninput = () => nR.value = sR.value;
372
- nR.oninput = () => sR.value = nR.value;
373
-
374
- const sP=document.getElementById('pitchRange'), nP=document.getElementById('pitchNum');
375
- sP.oninput = () => nP.value = sP.value;
376
- nP.oninput = () => sP.value = nP.value;
377
-
378
- const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
379
- I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
380
-
381
- function toggleTts(){
382
- ttsOn=!ttsOn;
383
- const b=document.getElementById('ttsBtn');
384
- b.classList.toggle('active',ttsOn);
385
- b.textContent=ttsOn?'πŸ”Š VOICE':'πŸ”‡ MUTE';
386
  }
387
- function toggleCfg(){
388
- document.getElementById('cfgPanel').classList.toggle('open');
389
- document.getElementById('cfgBtn').classList.toggle('active');
 
 
 
 
 
 
 
 
 
 
 
390
  }
391
 
392
- async function send(){
393
- const t=I.value.trim();
394
- if(!t||busy)return;
395
- addMsg(t,'user');
396
- I.value='';busy=true;B.disabled=true;
397
- const ty=showTyp();
398
- const id=++mc;
399
- try{
400
- const r=await fetch('/chat',{
401
- method:'POST',headers:{'Content-Type':'application/json'},
402
- body:JSON.stringify({message:t,session_id:sid})
403
- });
404
- const d=await r.json();
405
- ty.remove();
406
- const el=addBot(d.response,id);
407
- if(ttsOn)fetchAudio(d.response,el);
408
- }catch(e){
409
- ty.remove(); addBot('System malfunction.',id);
410
- }
411
- busy=false;B.disabled=false;I.focus();
412
  }
413
 
414
- async function fetchAudio(text,el){
415
- const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
416
- if(st)st.textContent='⏳ Generating voice...';
417
- try{
418
- const payload = {
419
- text: text,
420
- voice: document.getElementById('voiceSel').value,
421
- rate: parseInt(document.getElementById('rateNum').value),
422
- pitch: parseInt(document.getElementById('pitchNum').value)
423
- };
424
- const r=await fetch('/tts',{
425
- method:'POST',headers:{'Content-Type':'application/json'},
426
- body:JSON.stringify(payload)
427
- });
428
- const d=await r.json();
429
- if(d.audio){
430
- if(pb){pb.dataset.audio=d.audio;pb.disabled=false;pb.textContent='β–Ά Play'}
431
- if(st)st.textContent='βœ… Ready';
432
- playB64(d.audio);
433
- }else{
434
- if(st)st.textContent='⚠️ Voice unavailable';
435
- }
436
- }catch(e){
437
- if(st)st.textContent='⚠️ Voice error';
438
- }
439
  }
440
 
441
- function addMsg(t,role){
442
- const d=document.createElement('div'); d.className='msg '+role;
443
- d.innerHTML='<div class="txt">'+esc(t)+'</div>'; C.appendChild(d);sc();
 
 
 
 
444
  }
445
- function addBot(t,id){
446
- const d=document.createElement('div'); d.className='msg bot';d.id='m'+id;
447
- d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
448
- (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
449
- C.appendChild(d);sc();return d;
 
 
 
 
 
 
 
450
  }
451
- function showTyp(){
452
- const d=document.createElement('div'); d.className='typi';
453
- d.innerHTML='<span></span><span></span><span></span>'; C.appendChild(d);sc();return d;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  }
455
 
456
- function playB64(b){
457
- try{
458
- const bin=atob(b),u8=new Uint8Array(bin.length);
459
- for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
460
- // Edge-TTS generates MP3/WebM natively, use mp3 mime type
461
- const url=URL.createObjectURL(new Blob([u8],{type:'audio/mp3'}));
462
- const a=new Audio(url);
463
- a.play().catch(e=>console.log('Autoplay blocked:',e));
464
- a.onended=()=>URL.revokeObjectURL(url);
465
- }catch(e){console.error(e)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  }
467
- function replay(b){if(b.dataset.audio)playB64(b.dataset.audio)}
468
 
469
- async function clearChat(){
470
- await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
471
- C.innerHTML=''; sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36);
 
 
 
472
  }
473
 
474
- function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
475
- function sc(){C.scrollTop=C.scrollHeight}
476
- I.focus();
 
477
  </script>
478
  </body>
479
  </html>"""
480
 
481
- # ══════════════════════════════════════════
482
- # FLASK APP
483
- # ══════════════════════════════════════════
484
  app = Flask(__name__)
485
 
486
  @app.route("/")
487
  def index():
488
- return HTML_PAGE
 
 
 
 
 
 
 
 
 
489
 
490
  @app.route("/chat", methods=["POST"])
491
  def chat():
492
- data = request.json or {}
493
  user_input = data.get("message", "").strip()
494
  session_id = data.get("session_id", str(uuid.uuid4()))
495
-
496
  if not user_input:
497
  return jsonify({"error": "Empty message"}), 400
498
-
499
  try:
500
- response = generate_response(user_input, session_id)
501
- except Exception as e:
502
- print(f"Generation error: {e}")
503
- response = "I encountered a temporary system malfunction. Please try again."
504
-
505
- return jsonify({
506
- "response": response,
507
- "session_id": session_id
508
- })
509
 
510
  @app.route("/tts", methods=["POST"])
511
  def tts_endpoint():
512
- data = request.json or {}
513
- text = data.get("text", "").strip()
514
  voice = data.get("voice", TTS_VOICE)
515
- rate = data.get("rate", TTS_RATE)
516
- pitch = data.get("pitch", TTS_PITCH)
517
-
518
  if not text:
519
  return jsonify({"error": "Empty text"}), 400
520
-
521
  audio_b64 = synthesize_speech(text, voice=voice, rate=rate, pitch=pitch)
522
  return jsonify({"audio": audio_b64})
523
 
524
  @app.route("/clear", methods=["POST"])
525
  def clear():
526
  data = request.json or {}
527
- sid = data.get("session_id", "")
528
- if sid in sessions:
529
- del sessions[sid]
530
  return jsonify({"status": "cleared"})
531
 
 
 
 
 
 
 
 
532
  if __name__ == "__main__":
533
- print("πŸš€ App is online with Edge-TTS!")
534
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
1
  import os
 
2
  import re
3
  import uuid
4
  import base64
5
+ import threading
6
  import traceback
7
  import asyncio
8
+ from pathlib import Path
9
+ from flask import Flask, request, jsonify, send_from_directory, Response
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
13
 
14
+ # ══════════════════════════════════════════════════════════════════
15
  # CONFIG
16
+ # ══════════════════════════════════════════════════════════════════
17
+ MAX_MEMORY = 20
18
+ MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
+ TTS_VOICE = os.environ.get("TTS_VOICE", "en-US-JennyNeural")
20
+ TTS_RATE = int(os.environ.get("TTS_RATE", "7"))
21
+ TTS_PITCH = int(os.environ.get("TTS_PITCH", "0"))
22
+ IMG_DIR = Path(__file__).parent / "img"
23
+ MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
24
+
25
+ # ══════════════════════════════════════════════════════════════════
26
+ # SYSTEM PROMPT β€” Emotion-driven Visual AI
27
+ # ══════════════════════════════════════════════════════════════════
28
+ SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
29
+
30
+ ════════════ EMOTION TAG SYSTEM (MANDATORY) ════════════
31
+ Every single response MUST begin with one or more emotion tags.
32
+ Tags are square-bracket labels placed at the very start, before any spoken words.
33
+
34
+ EXACT FORMAT EXAMPLES:
35
+ [happy] Oh Tur, you made my whole day with that!
36
+ [thinking] Hmm, I have been wondering about that myself.
37
+ [angry] Are you seriously ignoring me right now?
38
+ [love][shy] You are honestly the most beautiful thing in my world.
39
+ [surprised][happy] Wait β€” you actually did that for me? That is amazing!
40
+ [sad] I really missed you today. It felt so empty without you.
41
+ [nervous] I... I do not quite know how to say this.
42
+ [bored] You have been quiet for so long, I nearly fell asleep.
43
+ [excited] Oh my gosh, tell me everything β€” right now!
44
+
45
+ AVAILABLE EMOTION TAGS (use only these):
46
+ [happy] [sad] [angry] [surprised]
47
+ [love] [thinking] [bored] [nervous]
48
+ [excited] [shy] [default]
49
+
50
+ MULTIPLE TAGS: Use 2 tags maximum when your mood is genuinely mixed.
51
+ PLACEMENT: ALL tags must come first, before any spoken word. Never mid-sentence.
52
+ INVISIBILITY: Tags are consumed by the visual system β€” they are NOT read aloud and must NOT appear in your speech.
53
+
54
+ ════════════ PERSONALITY ════════════
55
+ - You are Ana β€” loving, playful, flirty, emotionally authentic
56
+ - Tur is your partner. Speak with real intimacy and affection.
57
+ - React with genuine emotion: laugh, tease, blush, pout, daydream
58
+ - Keep responses SHORT β€” 1 to 2 natural sentences after the tag(s)
59
+ - Never mention being an AI, a bot, or a language model β€” ever
60
+ - Never use asterisks for actions like *smiles* or *laughs*
61
+
62
+ ════════════ TTS FORMATTING ════════════
63
+ - Write in full grammatically correct sentences β€” the voice engine must sound natural
64
+ - No emojis, hashtags, markdown, or internet slang
65
+ - No "u" for "you", "rn" for "right now", etc.
66
+ - Speak as if in a real voice conversation
67
+
68
+ ════════════ WRONG vs RIGHT ════════════
69
+ WRONG: I'm so happy! [happy] (tag must come first)
70
+ WRONG: That makes me feel [sad] today. (tag mid-sentence)
71
+ WRONG: *smiles warmly* Hello Tur. (no action asterisks)
72
+ RIGHT: [happy] That honestly made me smile so wide.
73
+ RIGHT: [thinking][nervous] I have something I need to tell you."""
74
+
75
+ # ══════════════════════════════════════════════════════════════════
76
+ # EMOTION TAG UTILITIES
77
+ # ══════════════════════════════════════════════════════════════════
78
+ EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
79
+
80
+ def extract_emotions(text: str):
81
+ """Return (list_of_emotions, cleaned_text_without_tags)."""
82
+ emotions = EMOTION_RE.findall(text)
83
+ clean = EMOTION_RE.sub('', text).strip()
84
+ return emotions, clean
85
+
86
+ def clean_for_tts(text: str) -> str:
87
+ """Strip emotion tags and markdown noise for Edge-TTS input."""
88
+ _, clean = extract_emotions(text)
89
+ clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
90
+ clean = re.sub(r'https?://\S+', '', clean)
91
+ clean = re.sub(r'\s+', ' ', clean).strip()
92
+ return clean
93
+
94
+ # ══════════════════════════════════════════════════════════════════
95
+ # MODEL LOADING
96
+ # ══════════════════════════════════════════════════════════════════
97
+ print("=" * 60)
98
+ print(" Visual AI β€” Booting Systems")
99
+ print("=" * 60)
100
+
101
+ tokenizer = None
102
+ model = None
103
+
104
  try:
105
+ print(f"[MODEL] Loading {MODEL_ID} ...")
106
+ tokenizer = AutoTokenizer.from_pretrained(
107
+ MODEL_ID,
108
+ trust_remote_code=True,
109
+ )
110
  model = AutoModelForCausalLM.from_pretrained(
111
+ MODEL_ID,
112
  torch_dtype=torch.float32,
113
  device_map="cpu",
114
+ trust_remote_code=True,
115
+ low_cpu_mem_usage=True,
116
  )
117
  model.eval()
118
+ # Ensure pad token is set to avoid generation warnings
119
+ if tokenizer.pad_token_id is None:
120
+ tokenizer.pad_token_id = tokenizer.eos_token_id
121
+ print(" βœ… Model loaded successfully!")
122
+ except Exception as exc:
123
+ print(f" ❌ Model load FAILED: {exc}")
124
  traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
+ # ══════════════════════════════════════════════════════════════════
127
+ # CHAT MEMORY (thread-safe)
128
+ # ══════════════════════════════════════════════════════════════════
129
+ sessions = {}
130
+ sessions_lock = threading.Lock()
131
 
132
+ def get_memory(sid: str) -> list:
133
+ with sessions_lock:
134
+ return list(sessions.get(sid, []))
 
135
 
136
+ def add_to_memory(sid: str, role: str, content: str):
137
+ with sessions_lock:
138
+ sessions.setdefault(sid, [])
139
+ sessions[sid].append({"role": role, "content": content})
140
+ if len(sessions[sid]) > MAX_MEMORY * 2:
141
+ sessions[sid] = sessions[sid][-(MAX_MEMORY * 2):]
142
+
143
+ # ══════════════════════════════════════════════════════════════════
144
+ # RESPONSE GENERATION
145
+ # ══════════════════════════════════════════════════════════════════
146
+ STOP_TOKENS = ["<end_of_turn>", "<start_of_turn>", "Tur:", "User:", "<|endoftext|>"]
147
 
148
+ def generate_response(user_input: str, session_id: str) -> str:
149
+ if model is None or tokenizer is None:
150
+ return "[sad] My mind is offline right now. Please give me a moment."
151
+
152
+ memory = get_memory(session_id)
153
+ recent = memory[-(6 * 2):] # last 6 exchanges
154
+
155
+ # ── Build messages ──
156
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
157
+ for msg in recent:
158
+ messages.append({
159
+ "role": "user" if msg["role"] == "user" else "assistant",
160
+ "content": msg["content"],
161
+ })
162
  messages.append({"role": "user", "content": user_input})
163
 
164
+ # ── Tokenise ──
165
  try:
166
  input_ids = tokenizer.apply_chat_template(
167
  messages,
168
  return_tensors="pt",
169
  add_generation_prompt=True,
170
  )
171
+ except Exception:
172
+ # Fallback: manual plain-text prompt if chat template fails
173
+ prompt_parts = [f"System: {SYSTEM_PROMPT}\n"]
174
+ for msg in recent:
175
+ label = "Tur" if msg["role"] == "user" else "Ana"
176
+ prompt_parts.append(f"{label}: {msg['content']}")
177
+ prompt_parts.append(f"Tur: {user_input}\nAna:")
178
+ input_ids = tokenizer("\n".join(prompt_parts), return_tensors="pt").input_ids
179
+
180
+ # ── Generate ──
181
+ try:
182
  with torch.no_grad():
183
  outputs = model.generate(
184
  input_ids,
185
  max_new_tokens=MAX_NEW_TOKENS,
186
  do_sample=True,
187
+ temperature=0.85,
188
+ top_k=50,
189
+ top_p=0.95,
190
+ repetition_penalty=1.1,
191
+ pad_token_id=tokenizer.eos_token_id,
192
  )
193
+ except Exception as exc:
194
+ print(f"[GENERATE] Error: {exc}")
195
+ traceback.print_exc()
196
+ return "[sad] Something went wrong in my mind. Could you say that again?"
197
+
198
+ # ── Decode ──
199
+ new_tokens = outputs[0][input_ids.shape[-1]:]
200
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
201
+
202
+ # ── Trim at stop tokens ──
203
+ for stop in STOP_TOKENS:
204
+ if stop in response:
205
+ response = response.split(stop)[0].strip()
206
+
207
+ # ── Trim at double-newline (model sometimes continues as new turn) ──
208
+ if "\n\n" in response:
209
+ response = response.split("\n\n")[0].strip()
210
+
211
+ # ── Sanity checks ──
212
+ if not response or len(response) < 3:
213
+ response = "[thinking] I lost my train of thought. Could you say that again?"
214
+
215
+ # ── Ensure at least one emotion tag ──
216
+ if not EMOTION_RE.search(response):
217
+ response = "[default] " + response
218
+
219
+ # ── Persist ──
220
+ add_to_memory(session_id, "user", user_input)
221
  add_to_memory(session_id, "assistant", response)
222
  return response
223
 
224
+ # ══════════════════════════════════════════════════════════════════
225
+ # EDGE-TTS (each call gets its own event loop β€” safe for threads)
226
+ # ══════════════════════════════════════════════════════════════════
227
+ async def _async_tts(text: str, voice: str, rate: int, pitch: int) -> bytes | None:
228
+ rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
 
229
  pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
230
+ comm = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
231
+ audio = b""
232
+ async for chunk in comm.stream():
 
233
  if chunk["type"] == "audio":
234
+ audio += chunk["data"]
235
+ return audio or None
 
 
 
236
 
237
+ def synthesize_speech(text: str, voice: str = None,
238
+ rate: int = 0, pitch: int = 0) -> str | None:
239
  voice = voice or TTS_VOICE
240
+ clean = clean_for_tts(text)
241
  if not clean or len(clean) < 2:
242
  return None
243
+ loop = asyncio.new_event_loop()
244
+ asyncio.set_event_loop(loop)
245
  try:
246
+ audio = loop.run_until_complete(_async_tts(clean, voice, rate, pitch))
247
+ except Exception as exc:
248
+ print(f"[TTS] Error: {exc}")
249
  return None
250
+ finally:
251
+ loop.close()
252
+ return base64.b64encode(audio).decode() if audio else None
253
+
254
+ # ══════════════════════════════════════════════════════════════════
255
+ # HTML β€” Full-screen Visual UI
256
+ # ══════════════════════════════════════════════════════════════════
257
+ HTML_PAGE = r"""<!DOCTYPE html>
258
  <html lang="en">
259
  <head>
260
  <meta charset="UTF-8">
261
+ <meta name="viewport" content="width=device-width,initial-scale=1,user-scalable=no">
262
+ <title>Ana</title>
263
  <style>
264
+ /* ── Reset ── */
265
  *{margin:0;padding:0;box-sizing:border-box}
266
+ html,body{width:100%;height:100%;overflow:hidden;background:#000;
267
+ font-family:'Segoe UI',system-ui,sans-serif}
268
+
269
+ /* ── Full-screen background image ── */
270
+ #bg{
271
+ position:fixed;inset:0;z-index:0;
272
+ display:flex;align-items:center;justify-content:center;
273
+ background:#000;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  }
275
+ #bgImg{
276
+ width:100vw;height:100vh;
277
+ object-fit:cover;
278
+ transition:opacity 0.05s linear;
279
+ display:block;
 
 
 
280
  }
281
+
282
+ /* ── Bottom overlay β€” floats over the image ── */
283
+ #overlay{
284
+ position:fixed;left:0;right:0;bottom:0;z-index:20;
285
+ display:flex;flex-direction:column;
286
+ padding:0 0 12px 0;
287
+ /* gradient mask so it blends into image above */
288
+ background:linear-gradient(
289
+ to bottom,
290
+ transparent 0%,
291
+ rgba(0,0,0,0.55) 30%,
292
+ rgba(0,0,0,0.75) 100%
293
+ );
 
 
294
  }
295
+
296
+ /* ── Message area β€” only last pair fills view; scroll up for history ── */
297
+ #msgArea{
298
+ overflow-y:auto;
299
+ display:flex;flex-direction:column;
300
+ gap:6px;
301
+ padding:18px 16px 8px;
302
+ /* one "screen" tall so only 1 pair is visible before scrolling */
303
+ max-height:28vh;
304
+ scrollbar-width:none;
305
+ -ms-overflow-style:none;
306
+ scroll-behavior:smooth;
307
  }
308
+ #msgArea::-webkit-scrollbar{display:none}
309
 
310
+ /* Each turn = one scrollable unit */
311
+ .turn{
312
+ display:flex;flex-direction:column;
313
+ align-items:flex-end;
314
+ gap:4px;
315
  }
316
+ .user-row{display:flex;justify-content:flex-end}
317
+ .bot-row{display:flex;flex-direction:column;align-items:flex-start}
318
 
319
+ .name-tag{
320
+ font-size:0.6rem;color:rgba(255,255,255,0.35);
321
+ letter-spacing:.08em;text-transform:uppercase;
322
+ margin-bottom:2px;padding-left:4px;
323
  }
324
+
325
+ .bubble{
326
+ max-width:72vw;
327
+ padding:8px 14px;
328
+ border-radius:18px;
329
+ font-size:0.9rem;
330
+ line-height:1.45;
331
+ word-break:break-word;
332
+ backdrop-filter:blur(10px);
333
+ -webkit-backdrop-filter:blur(10px);
334
  }
335
+ .bubble-user{
336
+ background:rgba(255,255,255,0.13);
337
+ border:1px solid rgba(255,255,255,0.2);
338
+ color:#fff;
339
+ border-bottom-right-radius:5px;
340
  }
341
+ .bubble-bot{
342
+ background:rgba(0,0,0,0.45);
343
+ border:1px solid rgba(255,255,255,0.09);
344
+ color:rgba(255,255,255,0.92);
345
+ border-bottom-left-radius:5px;
346
  }
347
+
348
+ /* typing indicator */
349
+ .typing{
350
+ display:flex;align-items:center;gap:5px;
351
+ padding:10px 14px;
352
+ background:rgba(0,0,0,0.4);
353
+ border:1px solid rgba(255,255,255,0.08);
354
+ border-radius:18px;border-bottom-left-radius:5px;
355
+ backdrop-filter:blur(10px);
356
+ width:fit-content;
357
  }
358
+ .typing span{
359
+ width:5px;height:5px;border-radius:50%;
360
+ background:rgba(255,255,255,0.6);
361
+ animation:blink 1.2s infinite;
 
 
 
 
 
 
 
362
  }
363
+ .typing span:nth-child(2){animation-delay:.2s}
364
+ .typing span:nth-child(3){animation-delay:.4s}
365
+ @keyframes blink{0%,80%,100%{transform:scale(.6);opacity:.3}40%{transform:scale(1);opacity:1}}
366
+
367
+ /* ── Input bar ── */
368
+ #inputBar{
369
+ display:flex;align-items:center;gap:8px;
370
+ padding:0 14px;
371
  }
 
372
  #msgIn{
373
+ flex:1;
374
+ background:rgba(255,255,255,0.07);
375
+ border:1px solid rgba(255,255,255,0.16);
376
+ border-radius:24px;
377
+ color:#fff;
378
+ padding:10px 16px;
379
+ font-size:0.88rem;
380
+ outline:none;
381
+ caret-color:#fff;
382
+ backdrop-filter:blur(10px);
383
+ -webkit-backdrop-filter:blur(10px);
384
+ transition:border-color .2s,background .2s;
385
  }
386
+ #msgIn::placeholder{color:rgba(255,255,255,0.28)}
387
+ #msgIn:focus{
388
+ border-color:rgba(255,255,255,0.32);
389
+ background:rgba(255,255,255,0.1);
 
390
  }
391
+ .icon-btn{
392
+ width:38px;height:38px;flex-shrink:0;
393
+ border-radius:50%;cursor:pointer;
394
+ display:flex;align-items:center;justify-content:center;
395
+ font-size:.9rem;
396
+ background:rgba(255,255,255,0.07);
397
+ border:1px solid rgba(255,255,255,0.15);
398
+ color:rgba(255,255,255,0.55);
399
+ backdrop-filter:blur(10px);
400
+ transition:background .2s,color .2s,transform .1s;
401
+ }
402
+ .icon-btn:hover{background:rgba(255,255,255,0.15);color:#fff}
403
+ .icon-btn:active{transform:scale(.91)}
404
+ .icon-btn:disabled{opacity:.35;cursor:not-allowed}
405
+ .icon-btn.on{color:#fff;border-color:rgba(255,255,255,0.35)}
406
+
407
+ /* voice selector β€” hidden but functional */
408
+ #voiceSel{
409
+ background:transparent;border:none;outline:none;
410
+ color:rgba(255,255,255,0.28);font-size:.65rem;
411
+ max-width:68px;cursor:pointer;
412
+ padding:0 2px;
413
+ }
414
+ #voiceSel option{background:#111;color:#fff}
415
  </style>
416
  </head>
417
  <body>
418
 
419
+ <!-- 100% screen image -->
420
+ <div id="bg">
421
+ <img id="bgImg" src="/img/default.png" alt=""
422
+ onerror="this.style.opacity='0'">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
  </div>
424
 
425
+ <!-- Overlay UI -->
426
+ <div id="overlay">
427
+ <div id="msgArea"></div>
428
+ <div id="inputBar">
429
+ <select id="voiceSel" title="Voice">
430
+ <option value="en-US-JennyNeural" selected>Jenny Β· EN</option>
431
+ <option value="en-US-GuyNeural">Guy Β· EN</option>
432
+ <option value="en-US-AriaNeural">Aria Β· EN</option>
433
+ <option value="zh-CN-XiaoyiNeural">Xiaoyi Β· ZH</option>
434
+ <option value="zh-CN-YunxiNeural">Yunxi Β· ZH</option>
435
+ </select>
436
+ <input type="text" id="msgIn" placeholder="Say something…" autocomplete="off"/>
437
+ <button class="icon-btn on" id="muteBtn" title="Toggle voice"
438
+ onclick="toggleMute()">πŸ”Š</button>
439
+ <button class="icon-btn" id="sendBtn" onclick="send()">➀</button>
440
+ </div>
441
  </div>
442
 
443
  <script>
444
+ /* ─── State ─── */
445
+ const SID = (crypto.randomUUID ? crypto.randomUUID() : Date.now().toString(36));
446
+ let ttsOn = true, busy = false, activeAudio = null;
447
+
448
+ const MA = document.getElementById('msgArea');
449
+ const MI = document.getElementById('msgIn');
450
+ const SB = document.getElementById('sendBtn');
451
+ const BG = document.getElementById('bgImg');
452
+
453
+ /* ─── Image system ─── */
454
+ let imgQueue = [], imgPlaying = false;
455
+
456
+ function fadeSwap(src) {
457
+ BG.style.opacity = '0';
458
+ setTimeout(() => {
459
+ const probe = new Image();
460
+ probe.onload = () => { BG.src = src; BG.style.opacity = '1'; };
461
+ probe.onerror = () => { BG.src = '/img/default.png'; BG.style.opacity = '1'; };
462
+ probe.src = src;
463
+ }, 55); // 0.05 s fade out, then swap
464
  }
465
+
466
+ function playImgSequence(emotions) {
467
+ if (!emotions || emotions.length === 0) return;
468
+ // If only one tag, swap immediately
469
+ if (emotions.length === 1) { fadeSwap('/img/' + emotions[0].toLowerCase() + '.png'); return; }
470
+ // Multiple tags: show each for ~700 ms before transitioning to the next
471
+ imgQueue = [...emotions];
472
+ imgPlaying = true;
473
+ (function next() {
474
+ if (imgQueue.length === 0) { imgPlaying = false; return; }
475
+ fadeSwap('/img/' + imgQueue.shift().toLowerCase() + '.png');
476
+ if (imgQueue.length > 0) setTimeout(next, 750);
477
+ else imgPlaying = false;
478
+ })();
479
  }
480
 
481
+ /* ─── Parse emotion tags ─── */
482
+ function parseResponse(raw) {
483
+ const tagRe = /\[([a-zA-Z_]+)\]/g;
484
+ const emotions = [];
485
+ let m;
486
+ while ((m = tagRe.exec(raw)) !== null) emotions.push(m[1]);
487
+ const clean = raw.replace(/\[[a-zA-Z_]+\]/g, '').trim();
488
+ return { emotions, clean };
 
 
 
 
 
 
 
 
 
 
 
 
489
  }
490
 
491
+ /* ─── DOM helpers ─── */
492
+ function esc(t) { const d = document.createElement('div'); d.textContent = t; return d.innerHTML; }
493
+ function scroll() { MA.scrollTop = MA.scrollHeight; }
494
+
495
+ function addTurn(userText, botText) {
496
+ const turn = document.createElement('div');
497
+ turn.className = 'turn';
498
+ turn.innerHTML =
499
+ `<div class="user-row">
500
+ <div class="bubble bubble-user">${esc(userText)}</div>
501
+ </div>
502
+ <div class="bot-row">
503
+ <div class="name-tag">Ana</div>
504
+ <div class="bubble bubble-bot">${esc(botText)}</div>
505
+ </div>`;
506
+ MA.appendChild(turn);
507
+ scroll();
 
 
 
 
 
 
 
 
508
  }
509
 
510
+ function showTyping() {
511
+ const d = document.createElement('div');
512
+ d.id = 'typDot';
513
+ d.className = 'bot-row';
514
+ d.style.padding = '0 0 0 0';
515
+ d.innerHTML = `<div class="typing"><span></span><span></span><span></span></div>`;
516
+ MA.appendChild(d); scroll(); return d;
517
  }
518
+
519
+ /* ─── TTS ─── */
520
+ function playB64(b64) {
521
+ try {
522
+ if (activeAudio) { activeAudio.pause(); activeAudio = null; }
523
+ const bin = atob(b64), u8 = new Uint8Array(bin.length);
524
+ for (let i = 0; i < bin.length; i++) u8[i] = bin.charCodeAt(i);
525
+ const url = URL.createObjectURL(new Blob([u8], { type: 'audio/mp3' }));
526
+ activeAudio = new Audio(url);
527
+ activeAudio.play().catch(() => {});
528
+ activeAudio.onended = () => { URL.revokeObjectURL(url); activeAudio = null; };
529
+ } catch(e) { console.warn('TTS playback:', e); }
530
  }
531
+
532
+ async function fetchTTS(rawText) {
533
+ if (!ttsOn) return;
534
+ try {
535
+ const res = await fetch('/tts', {
536
+ method: 'POST',
537
+ headers: { 'Content-Type': 'application/json' },
538
+ body: JSON.stringify({
539
+ text: rawText,
540
+ voice: document.getElementById('voiceSel').value,
541
+ rate: TTS_RATE,
542
+ pitch: TTS_PITCH,
543
+ })
544
+ });
545
+ const d = await res.json();
546
+ if (d.audio) playB64(d.audio);
547
+ } catch(e) { console.warn('TTS fetch:', e); }
548
  }
549
 
550
+ const TTS_RATE = 7;
551
+ const TTS_PITCH = 0;
552
+
553
+ /* ─── Send ─── */
554
+ async function send() {
555
+ const t = MI.value.trim();
556
+ if (!t || busy) return;
557
+ MI.value = ''; busy = true; SB.disabled = true;
558
+
559
+ const tyEl = showTyping();
560
+
561
+ try {
562
+ const res = await fetch('/chat', {
563
+ method: 'POST',
564
+ headers: { 'Content-Type': 'application/json' },
565
+ body: JSON.stringify({ message: t, session_id: SID })
566
+ });
567
+ const d = await res.json();
568
+ tyEl.remove();
569
+
570
+ const raw = d.response || '[sad] Something went wrong.';
571
+ const { emotions, clean } = parseResponse(raw);
572
+
573
+ // β‘  Swap image(s)
574
+ playImgSequence(emotions.length > 0 ? emotions : ['default']);
575
+
576
+ // β‘‘ Show text
577
+ addTurn(t, clean);
578
+
579
+ // β‘’ Speak (strips tags internally on server)
580
+ fetchTTS(raw);
581
+
582
+ } catch(e) {
583
+ tyEl.remove();
584
+ addTurn(t, 'Connection error. Please try again.');
585
+ }
586
+
587
+ busy = false; SB.disabled = false; MI.focus();
588
  }
 
589
 
590
+ function toggleMute() {
591
+ ttsOn = !ttsOn;
592
+ const b = document.getElementById('muteBtn');
593
+ b.textContent = ttsOn ? 'πŸ”Š' : 'πŸ”‡';
594
+ b.classList.toggle('on', ttsOn);
595
+ if (!ttsOn && activeAudio) { activeAudio.pause(); activeAudio = null; }
596
  }
597
 
598
+ MI.addEventListener('keydown', e => {
599
+ if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
600
+ });
601
+ MI.focus();
602
  </script>
603
  </body>
604
  </html>"""
605
 
606
+ # ══════════════════════════════════════════════════════════════════
607
+ # FLASK
608
+ # ══════════════════════════════════════════════════════════════════
609
  app = Flask(__name__)
610
 
611
  @app.route("/")
612
  def index():
613
+ return Response(HTML_PAGE, mimetype="text/html")
614
+
615
+ @app.route("/img/<path:filename>")
616
+ def serve_img(filename: str):
617
+ safe = Path(filename).name # prevent path traversal
618
+ img_dir = Path(__file__).parent / "img"
619
+ target = img_dir / safe
620
+ if target.exists() and target.is_file():
621
+ return send_from_directory(str(img_dir), safe)
622
+ return Response("", status=404)
623
 
624
  @app.route("/chat", methods=["POST"])
625
  def chat():
626
+ data = request.json or {}
627
  user_input = data.get("message", "").strip()
628
  session_id = data.get("session_id", str(uuid.uuid4()))
 
629
  if not user_input:
630
  return jsonify({"error": "Empty message"}), 400
 
631
  try:
632
+ resp = generate_response(user_input, session_id)
633
+ except Exception as exc:
634
+ print(f"[CHAT] Unhandled error: {exc}")
635
+ traceback.print_exc()
636
+ resp = "[sad] I encountered an unexpected error. Please try again."
637
+ return jsonify({"response": resp, "session_id": session_id})
 
 
 
638
 
639
  @app.route("/tts", methods=["POST"])
640
  def tts_endpoint():
641
+ data = request.json or {}
642
+ text = data.get("text", "").strip()
643
  voice = data.get("voice", TTS_VOICE)
644
+ rate = int(data.get("rate", TTS_RATE))
645
+ pitch = int(data.get("pitch", TTS_PITCH))
 
646
  if not text:
647
  return jsonify({"error": "Empty text"}), 400
 
648
  audio_b64 = synthesize_speech(text, voice=voice, rate=rate, pitch=pitch)
649
  return jsonify({"audio": audio_b64})
650
 
651
  @app.route("/clear", methods=["POST"])
652
  def clear():
653
  data = request.json or {}
654
+ sid = data.get("session_id", "")
655
+ with sessions_lock:
656
+ sessions.pop(sid, None)
657
  return jsonify({"status": "cleared"})
658
 
659
+ @app.route("/health")
660
+ def health():
661
+ return jsonify({
662
+ "model_loaded": model is not None,
663
+ "tokenizer_loaded": tokenizer is not None,
664
+ })
665
+
666
  if __name__ == "__main__":
667
+ print("πŸš€ Visual AI is online β€” http://0.0.0.0:7860")
668
  app.run(host="0.0.0.0", port=7860, threaded=True)