OrbitMC commited on
Commit
5404ef5
Β·
verified Β·
1 Parent(s): 68c78f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +613 -103
app.py CHANGED
@@ -11,7 +11,9 @@ import torch
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
13
 
14
- # --- CONFIG ---
 
 
15
  MAX_MEMORY = 20
16
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
17
  TTS_VOICE = "zh-CN-XiaoyiNeural"
@@ -20,128 +22,636 @@ TTS_PITCH = int(os.environ.get("TTS_PITCH", "0"))
20
  IMG_DIR = Path(__file__).parent / "img"
21
  MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
22
 
23
- # --- SYSTEM PROMPT ---
24
- SYSTEM_PROMPT = "You are Ana, a warm, emotionally expressive AI companion speaking to Tur. Every response MUST start with an emotion tag like [happy] or [sad]."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- # --- MODEL LOADING (BACKGROUND THREAD) ---
27
  tokenizer = None
28
  model = None
29
 
30
- def load_model_async():
31
- global tokenizer, model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  try:
33
- print(f"[BOOT] Starting background load for {MODEL_ID}...")
34
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
35
- model = AutoModelForCausalLM.from_pretrained(
36
- MODEL_ID,
37
- torch_dtype=torch.bfloat16,
38
- device_map="cpu",
39
- trust_remote_code=True,
40
- low_cpu_mem_usage=True,
41
  )
42
- model.eval()
43
- print("[BOOT] Model is ONLINE and ready!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  except Exception as exc:
45
- print(f"[BOOT] Critical Error: {exc}")
 
 
46
 
47
- # Start the thread so Flask can bind to port 7860 immediately
48
- threading.Thread(target=load_model_async, daemon=True).start()
 
49
 
50
- # --- APP LOGIC ---
51
- sessions = {}
52
- sessions_lock = threading.Lock()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  app = Flask(__name__)
55
 
56
  @app.route("/")
57
  def index():
58
- return """
59
- <!DOCTYPE html>
60
- <html>
61
- <head>
62
- <title>Visual AI</title>
63
- <style>
64
- body { background: #0a0a0a; color: #00ffcc; font-family: sans-serif; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; margin: 0; }
65
- #chat { width: 80%; max-width: 600px; height: 400px; border: 1px solid #333; overflow-y: auto; padding: 20px; background: #111; border-radius: 10px; }
66
- #input-area { margin-top: 20px; display: flex; width: 80%; max-width: 600px; }
67
- input { flex: 1; padding: 10px; background: #222; border: 1px solid #444; color: white; border-radius: 5px; }
68
- button { padding: 10px 20px; background: #00ffcc; border: none; color: black; font-weight: bold; cursor: pointer; border-radius: 5px; margin-left: 10px; }
69
- </style>
70
- </head>
71
- <body>
72
- <div id="chat">Welcome to Visual AI. Ana is booting up...</div>
73
- <div id="input-area">
74
- <input type="text" id="msg" placeholder="Type a message..." onkeypress="if(event.key==='Enter') send()">
75
- <button onclick="send()">SEND</button>
76
- </div>
77
- <script>
78
- async function send() {
79
- const input = document.getElementById('msg');
80
- const chat = document.getElementById('chat');
81
- const text = input.value;
82
- if(!text) return;
83
- input.value = '';
84
- chat.innerHTML += '<p><b>Tur:</b> ' + text + '</p>';
85
-
86
- const res = await fetch('/chat', {
87
- method: 'POST',
88
- headers: {'Content-Type': 'application/json'},
89
- body: JSON.stringify({message: text, session_id: 'default'})
90
- });
91
- const data = await res.json();
92
- chat.innerHTML += '<p><b>Ana:</b> ' + data.response + '</p>';
93
- chat.scrollTop = chat.scrollHeight;
94
-
95
- const ttsRes = await fetch('/tts', {
96
- method: 'POST',
97
- headers: {'Content-Type': 'application/json'},
98
- body: JSON.stringify({text: data.response})
99
- });
100
- const ttsData = await ttsRes.json();
101
- if(ttsData.audio) {
102
- const audio = new Audio("data:audio/mp3;base64," + ttsData.audio);
103
- audio.play();
104
- }
105
- }
106
- </script>
107
- </body>
108
- </html>
109
- """
110
 
111
  @app.route("/chat", methods=["POST"])
112
  def chat():
113
- if model is None:
114
- return jsonify({"response": "[sad] I'm still waking up. Please wait about 2 minutes for the model to finish loading."})
115
-
116
- data = request.json
117
- user_input = data.get("message", "")
118
-
119
- # Simple generation logic
120
- inputs = tokenizer(f"User: {user_input}\nAssistant:", return_tensors="pt")
121
- with torch.no_grad():
122
- outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS)
123
-
124
- response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Assistant:")[-1].strip()
125
- return jsonify({"response": response})
126
 
127
  @app.route("/tts", methods=["POST"])
128
  def tts_endpoint():
129
- data = request.json
130
- text = data.get("text", "")
131
-
132
- # Clean tags for TTS
133
- clean_text = re.sub(r'\[.*?\]', '', text).strip()
134
-
135
- async def get_tts():
136
- communicate = edge_tts.Communicate(clean_text, TTS_VOICE)
137
- audio_data = b""
138
- async for chunk in communicate.stream():
139
- if chunk["type"] == "audio":
140
- audio_data += chunk["data"]
141
- return base64.b64encode(audio_data).decode()
142
-
143
- audio_b64 = asyncio.run(get_tts())
144
  return jsonify({"audio": audio_b64})
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  if __name__ == "__main__":
147
- app.run(host="0.0.0.0", port=7860)
 
 
11
  from transformers import AutoTokenizer, AutoModelForCausalLM
12
  import edge_tts
13
 
14
+ # ══════════════════════════════════════════════════════════════════
15
+ # CONFIG
16
+ # ══════════════════════════════════════════════════════════════════
17
  MAX_MEMORY = 20
18
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "300"))
19
  TTS_VOICE = "zh-CN-XiaoyiNeural"
 
22
  IMG_DIR = Path(__file__).parent / "img"
23
  MODEL_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
24
 
25
+ # ══════════════════════════════════════════════════════════════════
26
+ # SYSTEM PROMPT
27
+ # ══════════════════════════════════════════════════════════════════
28
+ SYSTEM_PROMPT = """You are Ana, a warm, emotionally expressive AI companion speaking to Tur in a private, intimate setting.
29
+
30
+ EMOTION TAG SYSTEM (MANDATORY)
31
+ Every single response MUST begin with one or more emotion tags.
32
+ Tags are square-bracket labels placed at the very start, before any spoken words.
33
+
34
+ EXACT FORMAT EXAMPLES:
35
+ [happy] Oh Tur, you made my whole day with that!
36
+ [thinking] Hmm, I have been wondering about that myself.
37
+ [angry] Are you seriously ignoring me right now?
38
+ [love][shy] You are honestly the most beautiful thing in my world.
39
+ [surprised][happy] Wait, you actually did that for me? That is amazing!
40
+ [sad] I really missed you today. It felt so empty without you.
41
+ [nervous] I... I do not quite know how to say this.
42
+ [bored] You have been quiet for so long, I nearly fell asleep.
43
+ [excited] Oh my gosh, tell me everything, right now!
44
+
45
+ AVAILABLE EMOTION TAGS (use only these):
46
+ [happy] [sad] [angry] [surprised] [love]
47
+ [thinking] [bored] [nervous] [excited] [shy] [default]
48
+
49
+ MULTIPLE TAGS: Use 2 tags maximum when your mood is genuinely mixed.
50
+ PLACEMENT: ALL tags must come first, before any spoken word. Never mid-sentence.
51
+ INVISIBILITY: Tags are consumed by the visual system and are never read aloud.
52
+
53
+ PERSONALITY
54
+ - You are Ana: loving, playful, flirty, emotionally authentic
55
+ - Tur is your partner. Speak with real intimacy and affection.
56
+ - React with genuine emotion: laugh, tease, blush, pout, daydream
57
+ - Keep responses SHORT: 1 to 2 natural sentences after the tag(s)
58
+ - Never mention being an AI, a bot, or a language model, ever
59
+ - Never use asterisks for actions like *smiles* or *laughs*
60
+
61
+ NATURAL SPEECH RHYTHM (very important for voice quality)
62
+ Use punctuation to create natural pauses and breathing:
63
+ - Small pause: use a comma , -- like "Honestly, I did not expect that."
64
+ - Big pause / beat: use ellipsis ... -- like "You make me feel things... I cannot explain."
65
+ - Hesitation: "I... I do not know how to say this."
66
+ - Trailing thought: "You surprised me... in the best way."
67
+ - Natural rhythm example: "Honestly, I did not expect that. You surprised me... in the best way."
68
+ This makes the voice sound human and emotional, not flat or robotic.
69
+ Always write with commas and ellipses naturally placed for breathing.
70
+
71
+ TTS FORMATTING
72
+ - Write in full grammatically correct sentences, voice engine must sound natural
73
+ - No emojis, hashtags, markdown, or internet slang
74
+ - Speak as if in a real voice conversation
75
+
76
+ WRONG vs RIGHT
77
+ WRONG: I am so happy! [happy]
78
+ WRONG: That makes me feel [sad] today.
79
+ WRONG: *smiles warmly* Hello Tur.
80
+ RIGHT: [happy] That honestly made me smile, so wide.
81
+ RIGHT: [thinking][nervous] I have something... I need to tell you."""
82
+
83
+ # ══════════════════════════════════════════════════════════════════
84
+ # EMOTION TAG UTILITIES
85
+ # ══════════════════════════════════════════════════════════════════
86
+ EMOTION_RE = re.compile(r'\[([a-zA-Z_]+)\]')
87
+
88
+ def extract_emotions(text: str):
89
+ emotions = EMOTION_RE.findall(text)
90
+ clean = EMOTION_RE.sub('', text).strip()
91
+ return emotions, clean
92
+
93
+ def clean_for_tts(text: str) -> str:
94
+ _, clean = extract_emotions(text)
95
+ clean = re.sub(r'[*_~`#{}()\\|<>]', '', clean)
96
+ clean = re.sub(r'https?://\S+', '', clean)
97
+ clean = re.sub(r'\s+', ' ', clean).strip()
98
+ return clean
99
+
100
+ # ══════════════════════════════════════════════════════════════════
101
+ # MODEL LOADING
102
+ # ══════════════════════════════════════════════════════════════════
103
+ print("=" * 60)
104
+ print(" Visual AI -- Booting Systems")
105
+ print("=" * 60)
106
 
 
107
  tokenizer = None
108
  model = None
109
 
110
+ try:
111
+ print(f"[MODEL] Loading {MODEL_ID} ...")
112
+ tokenizer = AutoTokenizer.from_pretrained(
113
+ MODEL_ID,
114
+ trust_remote_code=True,
115
+ )
116
+ model = AutoModelForCausalLM.from_pretrained(
117
+ MODEL_ID,
118
+ dtype=torch.float32,
119
+ device_map="cpu",
120
+ trust_remote_code=True,
121
+ low_cpu_mem_usage=True,
122
+ )
123
+ model.eval()
124
+ if tokenizer.pad_token_id is None:
125
+ tokenizer.pad_token_id = tokenizer.eos_token_id
126
+ print(" OK Model loaded successfully!")
127
+ except Exception as exc:
128
+ print(f" FAILED Model load error: {exc}")
129
+ traceback.print_exc()
130
+
131
+ # ══════════════════════════════════════════════════════════════════
132
+ # CHAT MEMORY (thread-safe)
133
+ # ══════════════════════════════════════════════════════════════════
134
+ sessions = {}
135
+ sessions_lock = threading.Lock()
136
+
137
+ def get_memory(sid: str) -> list:
138
+ with sessions_lock:
139
+ return list(sessions.get(sid, []))
140
+
141
+ def add_to_memory(sid: str, role: str, content: str):
142
+ with sessions_lock:
143
+ sessions.setdefault(sid, [])
144
+ sessions[sid].append({"role": role, "content": content})
145
+ if len(sessions[sid]) > MAX_MEMORY * 2:
146
+ sessions[sid] = sessions[sid][-(MAX_MEMORY * 2):]
147
+
148
+ # ══════════════════════════════════════════════════════════════════
149
+ # RESPONSE GENERATION
150
+ # ROOT CAUSE FIX:
151
+ # apply_chat_template with return_tensors="pt" returns a BatchEncoding
152
+ # (a dict-like object), NOT a raw tensor. Calling model.generate() on
153
+ # a BatchEncoding causes the AttributeError on .shape[0].
154
+ # Fix: pass return_dict=True and extract enc["input_ids"] explicitly.
155
+ # ══════════════════════════════════════════════════════════════════
156
+ STOP_TOKENS = [
157
+ "<end_of_turn>", "<start_of_turn>",
158
+ "Tur:", "User:", "<|endoftext|>", "[/INST]",
159
+ ]
160
+
161
+ def generate_response(user_input: str, session_id: str) -> str:
162
+ if model is None or tokenizer is None:
163
+ return "[sad] My mind is offline right now. Please give me a moment."
164
+
165
+ memory = get_memory(session_id)
166
+ recent = memory[-(6 * 2):]
167
+
168
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
169
+ for msg in recent:
170
+ messages.append({
171
+ "role": "user" if msg["role"] == "user" else "assistant",
172
+ "content": msg["content"],
173
+ })
174
+ messages.append({"role": "user", "content": user_input})
175
+
176
+ # ── Tokenise ──────────────────────────────────────────────────
177
+ input_ids = None
178
+ attention_mask = None
179
  try:
180
+ enc = tokenizer.apply_chat_template(
181
+ messages,
182
+ return_tensors="pt",
183
+ add_generation_prompt=True,
184
+ return_dict=True, # <-- returns BatchEncoding with named keys
 
 
 
185
  )
186
+ # Extract the tensor explicitly -- this is the fix
187
+ input_ids = enc["input_ids"].to("cpu")
188
+ attention_mask = enc.get("attention_mask")
189
+ if attention_mask is not None:
190
+ attention_mask = attention_mask.to("cpu")
191
+ except Exception as e1:
192
+ print(f"[TOKENISE] chat_template failed ({e1}), using plain fallback")
193
+ try:
194
+ parts = [f"System: {SYSTEM_PROMPT}"]
195
+ for msg in recent:
196
+ label = "Tur" if msg["role"] == "user" else "Ana"
197
+ parts.append(f"{label}: {msg['content']}")
198
+ parts.append(f"Tur: {user_input}\nAna:")
199
+ enc = tokenizer("\n".join(parts), return_tensors="pt")
200
+ input_ids = enc["input_ids"].to("cpu")
201
+ attention_mask = enc.get("attention_mask")
202
+ if attention_mask is not None:
203
+ attention_mask = attention_mask.to("cpu")
204
+ except Exception as e2:
205
+ print(f"[TOKENISE] fallback also failed: {e2}")
206
+ return "[sad] I could not process that. Please try again."
207
+
208
+ # ── Generate ──────────────────────────────────────────────────
209
+ try:
210
+ gen_kwargs = dict(
211
+ max_new_tokens=MAX_NEW_TOKENS,
212
+ do_sample=True,
213
+ temperature=0.85,
214
+ top_k=50,
215
+ top_p=0.95,
216
+ repetition_penalty=1.1,
217
+ pad_token_id=tokenizer.eos_token_id,
218
+ )
219
+ if attention_mask is not None:
220
+ gen_kwargs["attention_mask"] = attention_mask
221
+
222
+ with torch.no_grad():
223
+ outputs = model.generate(input_ids, **gen_kwargs)
224
  except Exception as exc:
225
+ print(f"[GENERATE] Error: {exc}")
226
+ traceback.print_exc()
227
+ return "[sad] Something went wrong in my mind. Could you say that again?"
228
 
229
+ # ── Decode ────────────────────────────────────────────────────
230
+ new_tokens = outputs[0][input_ids.shape[-1]:]
231
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
232
 
233
+ for stop in STOP_TOKENS:
234
+ if stop in response:
235
+ response = response.split(stop)[0].strip()
236
+
237
+ if "\n\n" in response:
238
+ response = response.split("\n\n")[0].strip()
239
+
240
+ if not response or len(response) < 3:
241
+ response = "[thinking] I lost my train of thought. Could you say that again?"
242
+
243
+ if not EMOTION_RE.search(response):
244
+ response = "[default] " + response
245
+
246
+ add_to_memory(session_id, "user", user_input)
247
+ add_to_memory(session_id, "assistant", response)
248
+ return response
249
+
250
+ # ══════════════════════════════════════════════════════════════════
251
+ # EDGE-TTS (own event loop per call -- safe in Flask threads)
252
+ # ══════════════════════════════════════════════════════════════════
253
+ async def _async_tts(text: str, rate: int, pitch: int) -> bytes:
254
+ rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
255
+ pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
256
+ comm = edge_tts.Communicate(text, TTS_VOICE, rate=rate_str, pitch=pitch_str)
257
+ audio = b""
258
+ async for chunk in comm.stream():
259
+ if chunk["type"] == "audio":
260
+ audio += chunk["data"]
261
+ return audio
262
+
263
+ def synthesize_speech(text: str, rate: int = 0, pitch: int = 0):
264
+ clean = clean_for_tts(text)
265
+ if not clean or len(clean) < 2:
266
+ return None
267
+ loop = asyncio.new_event_loop()
268
+ asyncio.set_event_loop(loop)
269
+ try:
270
+ audio = loop.run_until_complete(_async_tts(clean, rate, pitch))
271
+ except Exception as exc:
272
+ print(f"[TTS] Error: {exc}")
273
+ return None
274
+ finally:
275
+ loop.close()
276
+ return base64.b64encode(audio).decode() if audio else None
277
 
278
+ # ══════════════════════════════════════════════════════════════════
279
+ # HTML -- Full-screen Visual UI, mobile-keyboard-safe
280
+ # ══════════════════════════════════════════════════════════════════
281
+ HTML_PAGE = r"""<!DOCTYPE html>
282
+ <html lang="en">
283
+ <head>
284
+ <meta charset="UTF-8">
285
+ <meta name="viewport" content="width=device-width,initial-scale=1,viewport-fit=cover,interactive-widget=resizes-content">
286
+ <title>Ana</title>
287
+ <style>
288
+ *{margin:0;padding:0;box-sizing:border-box}
289
+
290
+ html{height:100%}
291
+
292
+ body{
293
+ width:100%;
294
+ height:100dvh;
295
+ overflow:hidden;
296
+ background:#000;
297
+ font-family:'Segoe UI',system-ui,sans-serif;
298
+ display:flex;
299
+ flex-direction:column;
300
+ position:relative;
301
+ }
302
+
303
+ /* Full-screen background -- FIXED so keyboard never pushes it */
304
+ #bg{
305
+ position:fixed;
306
+ inset:0;
307
+ z-index:0;
308
+ background:#000;
309
+ }
310
+ #bgImg{
311
+ width:100%;
312
+ height:100%;
313
+ object-fit:cover;
314
+ object-position:center top;
315
+ display:block;
316
+ transition:opacity 0.05s linear;
317
+ }
318
+
319
+ /* Overlay anchored to bottom of body (dvh-aware, shrinks with keyboard) */
320
+ #overlay{
321
+ position:absolute;
322
+ left:0;right:0;bottom:0;
323
+ z-index:20;
324
+ display:flex;
325
+ flex-direction:column;
326
+ padding-bottom:max(10px, env(safe-area-inset-bottom));
327
+ background:linear-gradient(
328
+ to bottom,
329
+ transparent 0%,
330
+ rgba(0,0,0,0.52) 26%,
331
+ rgba(0,0,0,0.76) 100%
332
+ );
333
+ }
334
+
335
+ /* Message area */
336
+ #msgArea{
337
+ overflow-y:auto;
338
+ display:flex;
339
+ flex-direction:column;
340
+ gap:6px;
341
+ padding:16px 13px 8px;
342
+ max-height:30dvh;
343
+ scrollbar-width:none;
344
+ -ms-overflow-style:none;
345
+ scroll-behavior:smooth;
346
+ }
347
+ #msgArea::-webkit-scrollbar{display:none}
348
+
349
+ .turn{display:flex;flex-direction:column;gap:4px}
350
+ .user-row{display:flex;justify-content:flex-end}
351
+ .bot-row{display:flex;flex-direction:column;align-items:flex-start}
352
+ .name-tag{
353
+ font-size:0.58rem;color:rgba(255,255,255,0.28);
354
+ letter-spacing:.08em;text-transform:uppercase;
355
+ margin-bottom:2px;padding-left:3px;
356
+ }
357
+ .bubble{
358
+ max-width:74vw;
359
+ padding:8px 13px;
360
+ border-radius:18px;
361
+ font-size:0.88rem;
362
+ line-height:1.46;
363
+ word-break:break-word;
364
+ backdrop-filter:blur(10px);
365
+ -webkit-backdrop-filter:blur(10px);
366
+ }
367
+ .bubble-user{
368
+ background:rgba(255,255,255,0.11);
369
+ border:1px solid rgba(255,255,255,0.17);
370
+ color:#fff;
371
+ border-bottom-right-radius:5px;
372
+ }
373
+ .bubble-bot{
374
+ background:rgba(0,0,0,0.40);
375
+ border:1px solid rgba(255,255,255,0.07);
376
+ color:rgba(255,255,255,0.9);
377
+ border-bottom-left-radius:5px;
378
+ }
379
+
380
+ /* Typing dots */
381
+ .typing{
382
+ display:flex;align-items:center;gap:5px;
383
+ padding:9px 13px;
384
+ background:rgba(0,0,0,0.36);
385
+ border:1px solid rgba(255,255,255,0.07);
386
+ border-radius:18px;border-bottom-left-radius:5px;
387
+ backdrop-filter:blur(10px);
388
+ width:fit-content;
389
+ }
390
+ .typing span{
391
+ width:5px;height:5px;border-radius:50%;
392
+ background:rgba(255,255,255,0.5);
393
+ animation:blink 1.2s infinite;
394
+ }
395
+ .typing span:nth-child(2){animation-delay:.2s}
396
+ .typing span:nth-child(3){animation-delay:.4s}
397
+ @keyframes blink{
398
+ 0%,80%,100%{transform:scale(.6);opacity:.3}
399
+ 40%{transform:scale(1);opacity:1}
400
+ }
401
+
402
+ /* Input bar */
403
+ #inputBar{
404
+ display:flex;
405
+ align-items:center;
406
+ gap:8px;
407
+ padding:6px 12px 0;
408
+ }
409
+ #msgIn{
410
+ flex:1;
411
+ background:rgba(255,255,255,0.07);
412
+ border:1px solid rgba(255,255,255,0.15);
413
+ border-radius:24px;
414
+ color:#fff;
415
+ padding:10px 16px;
416
+ font-size:16px; /* 16px prevents iOS auto-zoom on focus */
417
+ outline:none;
418
+ caret-color:#fff;
419
+ backdrop-filter:blur(10px);
420
+ -webkit-backdrop-filter:blur(10px);
421
+ transition:border-color .2s,background .2s;
422
+ -webkit-appearance:none;
423
+ appearance:none;
424
+ }
425
+ #msgIn::placeholder{color:rgba(255,255,255,0.27)}
426
+ #msgIn:focus{
427
+ border-color:rgba(255,255,255,0.28);
428
+ background:rgba(255,255,255,0.1);
429
+ }
430
+ #sendBtn{
431
+ width:42px;height:42px;flex-shrink:0;
432
+ border-radius:50%;cursor:pointer;
433
+ display:flex;align-items:center;justify-content:center;
434
+ font-size:1rem;
435
+ background:rgba(255,255,255,0.09);
436
+ border:1px solid rgba(255,255,255,0.17);
437
+ color:rgba(255,255,255,0.65);
438
+ backdrop-filter:blur(10px);
439
+ -webkit-backdrop-filter:blur(10px);
440
+ transition:background .2s,color .2s,transform .12s;
441
+ -webkit-tap-highlight-color:transparent;
442
+ touch-action:manipulation;
443
+ }
444
+ #sendBtn:hover{background:rgba(255,255,255,0.17);color:#fff}
445
+ #sendBtn:active{transform:scale(.88)}
446
+ #sendBtn:disabled{opacity:.28;cursor:not-allowed}
447
+ </style>
448
+ </head>
449
+ <body>
450
+
451
+ <!-- Fixed full-screen background β€” keyboard never moves this -->
452
+ <div id="bg">
453
+ <img id="bgImg" src="/img/default.png" alt=""
454
+ onerror="this.style.opacity='0'">
455
+ </div>
456
+
457
+ <!-- Overlay β€” absolute inside body (dvh), rises with keyboard naturally -->
458
+ <div id="overlay">
459
+ <div id="msgArea"></div>
460
+ <div id="inputBar">
461
+ <input type="text" id="msgIn"
462
+ placeholder="Say something..."
463
+ autocomplete="off"
464
+ autocorrect="off"
465
+ spellcheck="false"
466
+ enterkeyhint="send"/>
467
+ <button id="sendBtn" onclick="send()" aria-label="Send">&#9658;</button>
468
+ </div>
469
+ </div>
470
+
471
+ <script>
472
+ const SID = (crypto.randomUUID ? crypto.randomUUID() : Date.now().toString(36));
473
+ let busy = false, activeAudio = null;
474
+
475
+ const MA = document.getElementById('msgArea');
476
+ const MI = document.getElementById('msgIn');
477
+ const SB = document.getElementById('sendBtn');
478
+ const BG = document.getElementById('bgImg');
479
+
480
+ /* Image system */
481
+ function fadeSwap(src) {
482
+ BG.style.opacity = '0';
483
+ setTimeout(() => {
484
+ const probe = new Image();
485
+ probe.onload = () => { BG.src = src; BG.style.opacity = '1'; };
486
+ probe.onerror = () => { BG.src = '/img/default.png'; BG.style.opacity = '1'; };
487
+ probe.src = src;
488
+ }, 55);
489
+ }
490
+
491
+ function playImgSequence(emotions) {
492
+ if (!emotions || emotions.length === 0) { fadeSwap('/img/default.png'); return; }
493
+ const queue = [...emotions];
494
+ (function next() {
495
+ if (!queue.length) return;
496
+ fadeSwap('/img/' + queue.shift().toLowerCase() + '.png');
497
+ if (queue.length) setTimeout(next, 750);
498
+ })();
499
+ }
500
+
501
+ /* Parse emotion tags */
502
+ function parseResponse(raw) {
503
+ const tagRe = /\[([a-zA-Z_]+)\]/g;
504
+ const emotions = [];
505
+ let m;
506
+ while ((m = tagRe.exec(raw)) !== null) emotions.push(m[1]);
507
+ const clean = raw.replace(/\[[a-zA-Z_]+\]/g, '').trim();
508
+ return { emotions, clean };
509
+ }
510
+
511
+ /* DOM helpers */
512
+ function esc(t) { const d = document.createElement('div'); d.textContent = t; return d.innerHTML; }
513
+ function scroll() { MA.scrollTop = MA.scrollHeight; }
514
+
515
+ function addTurn(userText, botText) {
516
+ const turn = document.createElement('div');
517
+ turn.className = 'turn';
518
+ turn.innerHTML =
519
+ '<div class="user-row"><div class="bubble bubble-user">' + esc(userText) + '</div></div>' +
520
+ '<div class="bot-row"><div class="name-tag">Ana</div><div class="bubble bubble-bot">' + esc(botText) + '</div></div>';
521
+ MA.appendChild(turn);
522
+ scroll();
523
+ }
524
+
525
+ function showTyping() {
526
+ const d = document.createElement('div');
527
+ d.id = 'typDot';
528
+ d.className = 'bot-row';
529
+ d.innerHTML = '<div class="typing"><span></span><span></span><span></span></div>';
530
+ MA.appendChild(d); scroll(); return d;
531
+ }
532
+
533
+ /* TTS */
534
+ function playB64(b64) {
535
+ try {
536
+ if (activeAudio) { activeAudio.pause(); activeAudio = null; }
537
+ const bin = atob(b64), u8 = new Uint8Array(bin.length);
538
+ for (let i = 0; i < bin.length; i++) u8[i] = bin.charCodeAt(i);
539
+ const url = URL.createObjectURL(new Blob([u8], { type: 'audio/mp3' }));
540
+ activeAudio = new Audio(url);
541
+ activeAudio.play().catch(() => {});
542
+ activeAudio.onended = () => { URL.revokeObjectURL(url); activeAudio = null; };
543
+ } catch(e) { console.warn('TTS:', e); }
544
+ }
545
+
546
+ async function fetchTTS(rawText) {
547
+ try {
548
+ const res = await fetch('/tts', {
549
+ method: 'POST',
550
+ headers: { 'Content-Type': 'application/json' },
551
+ body: JSON.stringify({ text: rawText, rate: 7, pitch: 0 })
552
+ });
553
+ const d = await res.json();
554
+ if (d.audio) playB64(d.audio);
555
+ } catch(e) { console.warn('TTS fetch:', e); }
556
+ }
557
+
558
+ /* Send */
559
+ async function send() {
560
+ const t = MI.value.trim();
561
+ if (!t || busy) return;
562
+ MI.value = ''; busy = true; SB.disabled = true;
563
+
564
+ const tyEl = showTyping();
565
+
566
+ try {
567
+ const res = await fetch('/chat', {
568
+ method: 'POST',
569
+ headers: { 'Content-Type': 'application/json' },
570
+ body: JSON.stringify({ message: t, session_id: SID })
571
+ });
572
+ const d = await res.json();
573
+ tyEl.remove();
574
+
575
+ const raw = d.response || '[sad] Something went wrong.';
576
+ const { emotions, clean } = parseResponse(raw);
577
+
578
+ playImgSequence(emotions.length > 0 ? emotions : ['default']);
579
+ addTurn(t, clean);
580
+ fetchTTS(raw);
581
+ } catch(e) {
582
+ tyEl.remove();
583
+ addTurn(t, 'Connection error. Please try again.');
584
+ }
585
+
586
+ busy = false; SB.disabled = false;
587
+ // No MI.focus() on mobile -- avoids re-opening keyboard unexpectedly
588
+ }
589
+
590
+ MI.addEventListener('keydown', e => {
591
+ if (e.key === 'Enter' && !e.shiftKey) { e.preventDefault(); send(); }
592
+ });
593
+ </script>
594
+ </body>
595
+ </html>"""
596
+
597
+ # ══════════════════════════════════════════════════════════════════
598
+ # FLASK
599
+ # ══════════════════════════════════════════════════════════════════
600
  app = Flask(__name__)
601
 
602
  @app.route("/")
603
  def index():
604
+ return Response(HTML_PAGE, mimetype="text/html")
605
+
606
+ @app.route("/img/<path:filename>")
607
+ def serve_img(filename: str):
608
+ safe = Path(filename).name
609
+ target = IMG_DIR / safe
610
+ if target.exists() and target.is_file():
611
+ return send_from_directory(str(IMG_DIR), safe)
612
+ return Response("", status=404)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
613
 
614
  @app.route("/chat", methods=["POST"])
615
  def chat():
616
+ data = request.json or {}
617
+ user_input = data.get("message", "").strip()
618
+ session_id = data.get("session_id", str(uuid.uuid4()))
619
+ if not user_input:
620
+ return jsonify({"error": "Empty message"}), 400
621
+ try:
622
+ resp = generate_response(user_input, session_id)
623
+ except Exception as exc:
624
+ print(f"[CHAT] Error: {exc}")
625
+ traceback.print_exc()
626
+ resp = "[sad] I encountered an unexpected error. Please try again."
627
+ return jsonify({"response": resp, "session_id": session_id})
 
628
 
629
  @app.route("/tts", methods=["POST"])
630
  def tts_endpoint():
631
+ data = request.json or {}
632
+ text = data.get("text", "").strip()
633
+ rate = int(data.get("rate", TTS_RATE))
634
+ pitch = int(data.get("pitch", TTS_PITCH))
635
+ if not text:
636
+ return jsonify({"error": "Empty text"}), 400
637
+ audio_b64 = synthesize_speech(text, rate=rate, pitch=pitch)
 
 
 
 
 
 
 
 
638
  return jsonify({"audio": audio_b64})
639
 
640
+ @app.route("/clear", methods=["POST"])
641
+ def clear():
642
+ data = request.json or {}
643
+ sid = data.get("session_id", "")
644
+ with sessions_lock:
645
+ sessions.pop(sid, None)
646
+ return jsonify({"status": "cleared"})
647
+
648
+ @app.route("/health")
649
+ def health():
650
+ return jsonify({
651
+ "model_loaded": model is not None,
652
+ "tokenizer_loaded": tokenizer is not None,
653
+ })
654
+
655
  if __name__ == "__main__":
656
+ print("Visual AI is online -- http://0.0.0.0:7860")
657
+ app.run(host="0.0.0.0", port=7860, threaded=True)