OrbitMC commited on
Commit
5b59cdc
·
verified ·
1 Parent(s): 7cfb924

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -599
app.py CHANGED
@@ -1,635 +1,233 @@
1
  import os
2
- import io
3
- import re
4
- import uuid
5
- import base64
6
- import datetime
7
- import traceback
8
- import numpy as np
9
- import soundfile as sf
10
- from flask import Flask, request, jsonify
11
- from num2words import num2words
12
- import torch
13
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- # ══════════════════════════════════════════
16
- # CONFIG
17
- # ══════════════════════════════════════════
18
-
19
- # TTS Options (switch via env var TTS_MODE):
20
- # nano-fp32 → 15M params, 56MB (fastest, default)
21
- # nano-int8 → 15M params, 25MB (smallest)
22
- # micro → 40M params, 41MB (balanced)
23
- # mini → 80M params, 80MB (best quality)
24
- TTS_MODE = os.environ.get("TTS_MODE", "nano-fp32")
25
-
26
- TTS_MODEL_MAP = {
27
- "nano-fp32": "KittenML/kitten-tts-nano-0.8-fp32",
28
- "nano-int8": "KittenML/kitten-tts-nano-0.8-int8",
29
- "micro": "KittenML/kitten-tts-micro-0.8",
30
- "mini": "KittenML/kitten-tts-mini-0.8",
31
- }
32
-
33
- # Voice: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
34
- TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
35
- TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.15"))
36
- MAX_MEMORY = 20
37
- MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
38
-
39
- # ══════════════════════════════════════════
40
- # SYSTEM PROMPT
41
- # ══════════════════════════════════════════
42
- SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
43
- Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
44
- TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
45
- Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response short—no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
46
-
47
- # ══════════════════════════════════════════
48
- # TEXT CLEANING FOR TTS
49
- # ══════════════════════════════════════════
50
- def clean_text_for_tts(text):
51
- text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
52
- text = re.sub(r'https?://\S+', '', text)
53
- def replace_number(match):
54
- try:
55
- return num2words(int(match.group()))
56
- except Exception:
57
- return match.group()
58
- text = re.sub(r'\b\d+\b', replace_number, text)
59
- text = re.sub(r'\s+', ' ', text).strip()
60
- return text
61
-
62
- # ══════════════════════════════════════════
63
- # LOAD GEMMA 3 270M-IT
64
- # ══════════════════════════════════════════
65
- print("=" * 55)
66
- print(" J.A.R.V.I.S. — Booting Systems")
67
- print("=" * 55)
68
-
69
- print("[1/2] Loading Gemma 3 270M-IT...")
70
- GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
71
- try:
72
- tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
73
- model = AutoModelForCausalLM.from_pretrained(
74
- GEMMA_ID,
75
- torch_dtype=torch.float32,
76
- device_map="cpu",
77
- )
78
- model.eval()
79
- print(" ✅ Gemma 3 270M-IT loaded!")
80
- except Exception as e:
81
- print(f" ❌ Gemma 3 FAILED: {e}")
82
- traceback.print_exc()
83
- raise SystemExit("Cannot start without Gemma. Check HF_TOKEN and license agreement.")
84
-
85
- # ══════════════════════════════════════════
86
- # LOAD KITTENTTS
87
- # ══════════════════════════════════════════
88
- tts = None
89
- tts_model_name = TTS_MODEL_MAP.get(TTS_MODE, TTS_MODEL_MAP["nano-fp32"])
90
- print(f"[2/2] Loading KittenTTS: {TTS_MODE} → {tts_model_name}...")
91
- try:
92
- from kittentts import KittenTTS
93
- tts = KittenTTS(tts_model_name)
94
- test_audio = tts.generate("online", voice=TTS_VOICE, speed=TTS_SPEED)
95
- if test_audio is not None and len(test_audio) > 0:
96
- print(f" ✅ KittenTTS ready. Model: {TTS_MODE} | Voice: {TTS_VOICE}")
97
- else:
98
- print(" ⚠️ KittenTTS test returned empty audio!")
99
- tts = None
100
- except Exception as e:
101
- print(f" ⚠️ KittenTTS FAILED: {e}")
102
- tts = None
103
-
104
- print("=" * 55)
105
- print(f" LLM : Gemma 3 270M-IT")
106
- print(f" TTS : {TTS_MODE} ({'READY' if tts else 'DISABLED'})")
107
- print(f" Voice: {TTS_VOICE} | Speed: {TTS_SPEED}")
108
- print(f" Max tokens: {MAX_NEW_TOKENS}")
109
- print("=" * 55)
110
-
111
- # ══════════════════════════════════════════
112
- # CHAT MEMORY
113
- # ══════════════════════════════════════════
114
- sessions = {}
115
-
116
- def get_memory(sid):
117
- if sid not in sessions:
118
- sessions[sid] = []
119
- return sessions[sid]
120
-
121
- def add_to_memory(sid, role, content):
122
- mem = get_memory(sid)
123
- mem.append({
124
- "role": role,
125
- "content": content,
126
- "ts": datetime.datetime.now().isoformat(),
127
- })
128
- if len(mem) > MAX_MEMORY * 2:
129
- sessions[sid] = mem[-(MAX_MEMORY * 2):]
130
-
131
- # ══════════════════════════════════════════
132
- # GEMMA RESPONSE GENERATION
133
- # ══════════════════════════════════════════
134
- def generate_response(user_input, session_id):
135
- memory = get_memory(session_id)
136
-
137
- # Build chat messages: system instruction → memory → new message
138
- messages = [
139
- {"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
140
- {"role": "assistant", "content": "I am waiting for you!"},
141
- ]
142
-
143
- # Add recent memory (last 6 turns = 12 messages)
144
- recent = memory[-(6 * 2):]
145
- for msg in recent:
146
- role = "user" if msg["role"] == "user" else "assistant"
147
- messages.append({"role": role, "content": msg["content"]})
148
-
149
- # Current user message
150
- messages.append({"role": "user", "content": user_input})
151
-
152
- # Tokenize with Gemma chat template
153
- input_ids = tokenizer.apply_chat_template(
154
- messages,
155
- return_tensors="pt",
156
- add_generation_prompt=True,
157
- )
158
-
159
- # Generate
160
- with torch.no_grad():
161
- outputs = model.generate(
162
- input_ids,
163
- max_new_tokens=MAX_NEW_TOKENS,
164
- do_sample=True,
165
- temperature=0.9,
166
- top_k=45,
167
- top_p=0.97,
168
- )
169
-
170
- # Decode only new tokens
171
- new_tokens = outputs[0][input_ids.shape[-1]:]
172
- response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
173
-
174
- # Clean artifacts
175
- response = response.split("<end_of_turn>")[0].strip()
176
- response = response.split("<start_of_turn>")[0].strip()
177
-
178
- if not response or len(response) < 2:
179
- response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
180
-
181
- add_to_memory(session_id, "user", user_input)
182
- add_to_memory(session_id, "assistant", response)
183
- return response
184
-
185
- # ══════════════════════════════════════════
186
- # TTS SYNTHESIS
187
- # ══════════════════════════════════════════
188
- def synthesize_speech(text, voice=None):
189
- if tts is None:
190
- return None
191
- try:
192
- voice = voice or TTS_VOICE
193
- clean = clean_text_for_tts(text)
194
- if not clean or len(clean) < 2:
195
- return None
196
- if len(clean) > 400:
197
- clean = clean[:400]
198
- audio = tts.generate(clean, voice=voice, speed=TTS_SPEED)
199
- if audio is None or len(audio) == 0:
200
- return None
201
- buf = io.BytesIO()
202
- sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
203
- buf.seek(0)
204
- return base64.b64encode(buf.read()).decode('utf-8')
205
- except Exception as e:
206
- print(f"TTS Error: {e}")
207
- return None
208
 
209
- # ══════════════════════════════════════════
210
- # INLINE HTML
211
- # ══════════════════════════════════════════
212
- HTML_PAGE = """<!DOCTYPE html>
213
  <html lang="en">
214
  <head>
215
  <meta charset="UTF-8">
216
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
217
- <title>J.A.R.V.I.S. AI</title>
218
  <style>
219
- *{margin:0;padding:0;box-sizing:border-box}
220
- body{
221
- font-family:'Segoe UI',Tahoma,Geneva,Verdana,sans-serif;
222
- background:#0a0a1a;color:#e0e0e0;
223
- height:100vh;display:flex;flex-direction:column;overflow:hidden;
224
- }
225
- .header{
226
- background:linear-gradient(135deg,#0d1b2a,#1b2838);
227
- border-bottom:1px solid #00d4ff33;
228
- padding:12px 20px;display:flex;align-items:center;
229
- justify-content:space-between;flex-shrink:0;
230
- }
231
- .header-left{display:flex;align-items:center;gap:12px}
232
- .arc-reactor{
233
- width:38px;height:38px;border-radius:50%;
234
- background:radial-gradient(circle,#00d4ff 0%,#0088aa 40%,#004466 70%,transparent 100%);
235
- box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44,inset 0 0 10px #00d4ff66;
236
- animation:pulse 2s ease-in-out infinite;position:relative;
237
- }
238
- .arc-reactor::after{
239
- content:'';position:absolute;top:50%;left:50%;
240
- transform:translate(-50%,-50%);width:12px;height:12px;
241
- border-radius:50%;background:#00d4ff;box-shadow:0 0 8px #00d4ff;
242
- }
243
- @keyframes pulse{
244
- 0%,100%{box-shadow:0 0 20px #00d4ff88,0 0 40px #00d4ff44}
245
- 50%{box-shadow:0 0 30px #00d4ffaa,0 0 60px #00d4ff66}
246
- }
247
- .htitle h1{font-size:1.2rem;color:#00d4ff;letter-spacing:3px;text-transform:uppercase}
248
- .htitle p{font-size:.65rem;color:#5a8a9a;letter-spacing:1px}
249
- .hctrl{display:flex;gap:8px;align-items:center;flex-wrap:wrap}
250
- .cbtn{
251
- background:#0d1b2a;border:1px solid #00d4ff44;color:#00d4ff;
252
- padding:5px 12px;border-radius:6px;cursor:pointer;
253
- font-size:.7rem;transition:all .3s;letter-spacing:.5px;
254
- }
255
- .cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
256
- .cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
257
- .sdot{width:8px;height:8px;border-radius:50%;background:#00ff88;box-shadow:0 0 6px #00ff88}
258
- .sdot.err{background:#ff4444;box-shadow:0 0 6px #ff4444}
259
-
260
- .cfgbar{
261
- background:#0d1117;border-bottom:1px solid #00d4ff15;
262
- padding:8px 20px;display:none;flex-wrap:wrap;gap:12px;
263
- align-items:center;flex-shrink:0;
264
- }
265
- .cfgbar.open{display:flex}
266
- .cgrp{display:flex;align-items:center;gap:6px}
267
- .cgrp label{font-size:.65rem;color:#5a8a9a;text-transform:uppercase;letter-spacing:1px}
268
- .cgrp select{
269
- background:#0f1923;border:1px solid #00d4ff33;color:#00d4ff;
270
- padding:4px 8px;border-radius:4px;font-size:.7rem;cursor:pointer;outline:none;
271
- }
272
- .cgrp select:focus{border-color:#00d4ff}
273
- .ctag{
274
- font-size:.6rem;padding:3px 8px;border-radius:10px;
275
- background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ffaa;
276
- }
277
-
278
- .chat{
279
- flex:1;overflow-y:auto;padding:16px 20px;
280
- display:flex;flex-direction:column;gap:14px;scroll-behavior:smooth;
281
- }
282
- .chat::-webkit-scrollbar{width:3px}
283
- .chat::-webkit-scrollbar-thumb{background:#00d4ff33;border-radius:2px}
284
-
285
- .msg{
286
- max-width:80%;padding:12px 16px;border-radius:14px;
287
- font-size:.9rem;line-height:1.6;animation:fadeIn .3s ease-out;
288
- }
289
- @keyframes fadeIn{
290
- from{opacity:0;transform:translateY(8px)}
291
- to{opacity:1;transform:translateY(0)}
292
- }
293
- .msg.user{
294
- align-self:flex-end;background:linear-gradient(135deg,#1a3a5c,#0d2847);
295
- border:1px solid #00d4ff33;color:#c8e6ff;border-bottom-right-radius:4px;
296
- }
297
- .msg.bot{
298
- align-self:flex-start;background:linear-gradient(135deg,#141e30,#0f1923);
299
- border:1px solid #00d4ff22;color:#e0e0e0;border-bottom-left-radius:4px;
300
- }
301
- .msg .lbl{font-size:.58rem;color:#00d4ff88;letter-spacing:2px;margin-bottom:5px;text-transform:uppercase}
302
- .msg .txt{white-space:pre-wrap;word-wrap:break-word}
303
- .msg .actrl{margin-top:8px;display:flex;align-items:center;gap:8px}
304
- .abtn{
305
- display:inline-flex;align-items:center;gap:4px;
306
- background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ff;
307
- padding:3px 10px;border-radius:10px;cursor:pointer;
308
- font-size:.65rem;transition:all .2s;
309
- }
310
- .abtn:hover{background:#00d4ff25;border-color:#00d4ff66}
311
- .abtn:disabled{opacity:.3;cursor:wait}
312
- .astat{font-size:.58rem;color:#5a8a9a}
313
-
314
- .typi{align-self:flex-start;display:flex;gap:5px;padding:14px 18px}
315
- .typi span{width:7px;height:7px;border-radius:50%;background:#00d4ff;animation:typ 1.4s infinite}
316
- .typi span:nth-child(2){animation-delay:.2s}
317
- .typi span:nth-child(3){animation-delay:.4s}
318
- @keyframes typ{
319
- 0%,60%,100%{opacity:.2;transform:scale(.8)}
320
- 30%{opacity:1;transform:scale(1.1)}
321
- }
322
-
323
- .welcome{
324
- display:flex;flex-direction:column;align-items:center;
325
- justify-content:center;flex:1;gap:10px;opacity:.5;
326
- }
327
- .welcome .breact{
328
- width:70px;height:70px;border-radius:50%;
329
- background:radial-gradient(circle,#00d4ff 0%,#0088aa 35%,#004466 65%,transparent 100%);
330
- box-shadow:0 0 40px #00d4ff66;animation:pulse 2s ease-in-out infinite;
331
- }
332
- .welcome h2{color:#00d4ff;font-size:1rem;letter-spacing:4px}
333
- .welcome p{color:#5a8a9a;font-size:.75rem}
334
- .welcome .minfo{font-size:.65rem;color:#3a5a6a;margin-top:4px}
335
-
336
- .inbar{
337
- padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
338
- border-top:1px solid #00d4ff22;flex-shrink:0;
339
- }
340
- .inwrap{display:flex;gap:8px;max-width:900px;margin:0 auto}
341
- #msgIn{
342
- flex:1;background:#0f1923;border:1px solid #00d4ff33;border-radius:12px;
343
- padding:11px 16px;color:#e0e0e0;font-size:.9rem;outline:none;
344
- transition:border-color .3s;font-family:inherit;
345
- }
346
- #msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
347
- #msgIn::placeholder{color:#3a5a6a}
348
- #sendBtn{
349
- background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
350
- padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
351
- font-size:.8rem;letter-spacing:1px;transition:all .3s;text-transform:uppercase;
352
- }
353
- #sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
354
- #sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
355
- .infoot{
356
- display:flex;justify-content:space-between;margin-top:5px;
357
- max-width:900px;margin-left:auto;margin-right:auto;
358
- }
359
- .infoot span{font-size:.6rem;color:#3a5a6a}
360
-
361
- @media(max-width:640px){
362
- .header{padding:10px 12px}
363
- .htitle h1{font-size:1rem}
364
- .msg{max-width:92%;font-size:.82rem}
365
- .chat{padding:10px}
366
- .inbar{padding:10px}
367
- .cfgbar{padding:6px 12px}
368
- }
369
  </style>
370
  </head>
371
  <body>
372
-
373
- <div class="header">
374
- <div class="header-left">
375
- <div class="arc-reactor"></div>
376
- <div class="htitle">
377
- <h1>J.A.R.V.I.S.</h1>
378
- <p>Just A Rather Very Intelligent System</p>
379
- </div>
380
- </div>
381
- <div class="hctrl">
382
- <div class="sdot" id="sDot"></div>
383
- <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">⚙ CONFIG</button>
384
- <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">🔊 VOICE</button>
385
- <button class="cbtn" onclick="clearChat()">🗑 CLEAR</button>
386
- </div>
387
- </div>
388
-
389
- <div class="cfgbar" id="cfgPanel">
390
- <div class="cgrp">
391
- <label>LLM:</label>
392
- <span class="ctag">Gemma 3 270M-IT</span>
393
- </div>
394
- <div class="cgrp">
395
- <label>TTS:</label>
396
- <span class="ctag" id="ttsTag">loading...</span>
397
- </div>
398
- <div class="cgrp">
399
- <label>Voice:</label>
400
- <select id="voiceSel">
401
- <option value="Kiki">Kiki</option>
402
- <option value="Bella">Bella</option>
403
- <option value="Jasper">Jasper</option>
404
- <option value="Luna">Luna</option>
405
- <option value="Bruno">Bruno</option>
406
- <option value="Rosie">Rosie</option>
407
- <option value="Hugo">Hugo</option>
408
- <option value="Leo">Leo</option>
409
- </select>
410
- </div>
411
- <div class="cgrp">
412
- <label>TTS env options:</label>
413
- <span class="ctag">nano-fp32</span>
414
- <span class="ctag">nano-int8</span>
415
- <span class="ctag">micro</span>
416
- <span class="ctag">mini</span>
417
- </div>
418
- </div>
419
-
420
- <div class="chat" id="chatBox">
421
- <div class="welcome" id="welc">
422
- <div class="breact"></div>
423
- <h2>SYSTEMS ONLINE</h2>
424
- <p>Type a message below to begin interaction</p>
425
- <div class="minfo" id="wInfo">Initializing...</div>
426
- </div>
427
  </div>
428
-
429
- <div class="inbar">
430
- <div class="inwrap">
431
- <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
432
- <button id="sendBtn" onclick="send()">SEND</button>
433
- </div>
434
- <div class="infoot">
435
- <span id="memCt">Memory: 0 turns</span>
436
- <span id="modInfo">Loading...</span>
437
- </div>
438
- </div>
439
-
440
  <script>
441
- let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
442
- let ttsOn=true,busy=false,mc=0,voice='Kiki';
443
- const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
444
-
445
- I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
446
- document.getElementById('voiceSel').addEventListener('change',function(){voice=this.value});
447
-
448
- function toggleTts(){
449
- ttsOn=!ttsOn;
450
- const b=document.getElementById('ttsBtn');
451
- b.classList.toggle('active',ttsOn);
452
- b.textContent=ttsOn?'🔊 VOICE':'🔇 MUTE';
453
- }
454
- function toggleCfg(){
455
- document.getElementById('cfgPanel').classList.toggle('open');
456
- document.getElementById('cfgBtn').classList.toggle('active');
457
- }
458
-
459
- async function send(){
460
- const t=I.value.trim();
461
- if(!t||busy)return;
462
- const w=document.getElementById('welc');
463
- if(w)w.style.display='none';
464
- addMsg(t,'user');
465
- I.value='';busy=true;B.disabled=true;
466
- const ty=showTyp();
467
- const id=++mc;
468
- try{
469
- const r=await fetch('/chat',{
470
- method:'POST',headers:{'Content-Type':'application/json'},
471
- body:JSON.stringify({message:t,session_id:sid})
472
- });
473
- if(!r.ok)throw new Error('HTTP '+r.status);
474
- const d=await r.json();
475
- ty.remove();
476
- const el=addBot(d.response,id);
477
- document.getElementById('memCt').textContent='Memory: '+d.memory_length+' turns';
478
- if(ttsOn&&d.tts_available)fetchAudio(d.response,el);
479
- }catch(e){
480
- ty.remove();
481
- addBot('System malfunction. Please try again.',id);
482
- console.error(e);
483
  }
484
- busy=false;B.disabled=false;I.focus();
485
- }
486
-
487
- async function fetchAudio(text,el){
488
- const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
489
- if(st)st.textContent='⏳ Generating voice...';
490
- if(pb)pb.disabled=true;
491
- try{
492
- const r=await fetch('/tts',{
493
- method:'POST',headers:{'Content-Type':'application/json'},
494
- body:JSON.stringify({text:text,voice:voice})
495
- });
496
- const d=await r.json();
497
- if(d.audio){
498
- if(pb){pb.dataset.audio=d.audio;pb.disabled=false;pb.textContent='▶ Play'}
499
- if(st)st.textContent='✅ Ready';
500
- playB64(d.audio);
501
- }else{
502
- if(st)st.textContent='⚠️ Voice unavailable';
503
- if(pb)pb.style.display='none';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
504
  }
505
- }catch(e){
506
- if(st)st.textContent='⚠️ Voice error';
507
- if(pb)pb.style.display='none';
508
- }
509
- }
510
-
511
- function addMsg(t,role){
512
- const d=document.createElement('div');
513
- d.className='msg '+role;
514
- d.innerHTML='<div class="txt">'+esc(t)+'</div>';
515
- C.appendChild(d);sc();
516
- }
517
- function addBot(t,id){
518
- const d=document.createElement('div');
519
- d.className='msg bot';d.id='m'+id;
520
- d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
521
- (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
522
- C.appendChild(d);sc();return d;
523
- }
524
- function showTyp(){
525
- const d=document.createElement('div');
526
- d.className='typi';
527
- d.innerHTML='<span></span><span></span><span></span>';
528
- C.appendChild(d);sc();return d;
529
- }
530
-
531
- function playB64(b){
532
- try{
533
- const bin=atob(b),u8=new Uint8Array(bin.length);
534
- for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
535
- const url=URL.createObjectURL(new Blob([u8],{type:'audio/wav'}));
536
- const a=new Audio(url);
537
- a.play().catch(e=>console.log('Autoplay blocked:',e));
538
- a.onended=()=>URL.revokeObjectURL(url);
539
- }catch(e){console.error(e)}
540
- }
541
- function replay(b){if(b.dataset.audio)playB64(b.dataset.audio)}
542
-
543
- async function clearChat(){
544
- await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
545
- C.innerHTML='<div class="welcome" id="welc"><div class="breact"></div><h2>SYSTEMS ONLINE</h2><p>Type a message below to begin</p></div>';
546
- document.getElementById('memCt').textContent='Memory: 0 turns';
547
- sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
548
- }
549
 
550
- function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
551
- function sc(){C.scrollTop=C.scrollHeight}
 
 
 
 
552
 
553
- fetch('/health').then(r=>r.json()).then(d=>{
554
- document.getElementById('ttsTag').textContent=d.tts_mode+(d.tts_model==='DISABLED'?' (OFF)':'');
555
- document.getElementById('modInfo').textContent='Gemma 3 · '+d.tts_mode+' · '+d.tts_voice+' · CPU';
556
- const wi=document.getElementById('wInfo');
557
- if(wi)wi.textContent='LLM: Gemma 3 270M-IT | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
558
- if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
559
- if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
560
- }).catch(()=>{});
561
 
562
- I.focus();
 
 
 
 
 
 
563
  </script>
564
  </body>
565
- </html>"""
 
566
 
567
- # ══════════════════════════════════════════
568
- # FLASK APP
569
- # ══════════════════════════════════════════
570
- app = Flask(__name__)
571
 
572
  @app.route("/")
573
  def index():
574
  return HTML_PAGE
575
 
 
576
  @app.route("/chat", methods=["POST"])
577
  def chat():
578
- data = request.json or {}
579
- user_input = data.get("message", "").strip()
580
- session_id = data.get("session_id", str(uuid.uuid4()))
581
-
582
- if not user_input:
583
- return jsonify({"error": "Empty message"}), 400
584
-
585
- try:
586
- response = generate_response(user_input, session_id)
587
- except Exception as e:
588
- print(f"Generation error: {e}")
589
- traceback.print_exc()
590
- response = "I encountered a temporary system malfunction. Please try again."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
591
 
592
- return jsonify({
593
- "response": response,
594
- "session_id": session_id,
595
- "tts_available": tts is not None,
596
- "memory_length": len(get_memory(session_id)),
597
- })
598
 
599
- @app.route("/tts", methods=["POST"])
600
- def tts_endpoint():
601
- data = request.json or {}
602
- text = data.get("text", "").strip()
603
- voice = data.get("voice", TTS_VOICE)
604
 
605
- if not text:
606
- return jsonify({"error": "Empty text"}), 400
607
- if tts is None:
608
- return jsonify({"error": "TTS not available", "audio": None}), 200
609
 
610
- audio_b64 = synthesize_speech(text, voice=voice)
611
- return jsonify({"audio": audio_b64})
612
 
613
- @app.route("/clear", methods=["POST"])
614
- def clear():
615
- data = request.json or {}
616
- sid = data.get("session_id", "")
617
- if sid in sessions:
618
- del sessions[sid]
619
- return jsonify({"status": "cleared"})
620
 
621
- @app.route("/health")
622
- def health():
623
- return jsonify({
624
- "status": "online",
625
- "llm": "Gemma 3 270M-IT",
626
- "tts_mode": TTS_MODE,
627
- "tts_model": tts_model_name if tts else "DISABLED",
628
- "tts_voice": TTS_VOICE,
629
- "tts_voices": ["Bella","Jasper","Luna","Bruno","Rosie","Hugo","Kiki","Leo"],
630
- "max_new_tokens": MAX_NEW_TOKENS,
631
- })
632
 
633
  if __name__ == "__main__":
634
- print("🚀 Ana is online!")
635
- app.run(host="0.0.0.0", port=7860, threaded=True)
 
1
  import os
2
+ import time
3
+ import threading
4
+ from flask import Flask, request, jsonify, Response, stream_with_context
5
+ from huggingface_hub import hf_hub_download
6
+
7
+ # --- Download model at startup ---
8
+ MODEL_DIR = "/tmp/models"
9
+ REPO = "mradermacher/LFM2-2.6B-Uncensored-X64-GGUF"
10
+ FILENAME = "LFM2-2.6B-Uncensored-X64.Q3_K_S.gguf"
11
+ MODEL_PATH = os.path.join(MODEL_DIR, FILENAME)
12
+
13
+ os.makedirs(MODEL_DIR, exist_ok=True)
14
+
15
+ if not os.path.exists(MODEL_PATH):
16
+ print(f"Downloading {FILENAME} ...")
17
+ hf_hub_download(repo_id=REPO, filename=FILENAME, local_dir=MODEL_DIR)
18
+ print("Download complete.")
19
+
20
+ from llama_cpp import Llama
21
+
22
+ print("Loading model ...")
23
+ llm = Llama(
24
+ model_path=MODEL_PATH,
25
+ n_ctx=2048,
26
+ n_threads=os.cpu_count() or 4,
27
+ verbose=False,
28
+ )
29
+ print("Model loaded.")
30
 
31
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ HTML_PAGE = """
34
+ <!DOCTYPE html>
 
 
35
  <html lang="en">
36
  <head>
37
  <meta charset="UTF-8">
38
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
39
+ <title>LFM2-2.6B Chat</title>
40
  <style>
41
+ * { box-sizing: border-box; margin: 0; padding: 0; }
42
+ body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: #1a1a2e; color: #eee; display: flex; justify-content: center; align-items: center; min-height: 100vh; }
43
+ .container { width: 100%; max-width: 800px; padding: 20px; }
44
+ h1 { text-align: center; margin-bottom: 6px; color: #e94560; font-size: 1.5rem; }
45
+ .subtitle { text-align: center; margin-bottom: 20px; color: #888; font-size: 0.85rem; }
46
+ .chatbox { background: #16213e; border-radius: 12px; padding: 20px; height: 55vh; overflow-y: auto; margin-bottom: 15px; border: 1px solid #0f3460; }
47
+ .msg { margin-bottom: 14px; line-height: 1.6; }
48
+ .msg.user { color: #e94560; }
49
+ .msg.user::before { content: "You: "; font-weight: bold; }
50
+ .msg.bot { color: #a8d8ea; }
51
+ .msg.bot::before { content: "AI: "; font-weight: bold; }
52
+ .stats { color: #666; font-size: 0.78rem; margin-top: 4px; }
53
+ .input-row { display: flex; gap: 10px; }
54
+ textarea { flex: 1; padding: 12px; border-radius: 8px; border: 1px solid #0f3460; background: #16213e; color: #eee; font-size: 1rem; resize: none; height: 60px; font-family: inherit; }
55
+ textarea:focus { outline: none; border-color: #e94560; }
56
+ button { padding: 12px 28px; border-radius: 8px; border: none; background: #e94560; color: #fff; font-size: 1rem; cursor: pointer; font-weight: bold; }
57
+ button:hover { background: #c73650; }
58
+ button:disabled { background: #555; cursor: not-allowed; }
59
+ .settings { display: flex; gap: 15px; margin-bottom: 15px; flex-wrap: wrap; align-items: center; }
60
+ .settings label { font-size: 0.85rem; color: #aaa; }
61
+ .settings input, .settings select { background: #16213e; border: 1px solid #0f3460; color: #eee; padding: 5px 8px; border-radius: 6px; width: 80px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  </style>
63
  </head>
64
  <body>
65
+ <div class="container">
66
+ <h1>LFM2-2.6B Uncensored</h1>
67
+ <p class="subtitle">Running locally on CPU with llama.cpp</p>
68
+ <div class="settings">
69
+ <label>Max tokens: <input type="number" id="maxTokens" value="256" min="16" max="2048"></label>
70
+ <label>Temperature: <input type="number" id="temperature" value="0.7" min="0" max="2" step="0.1"></label>
71
+ <label>Top-P: <input type="number" id="topP" value="0.9" min="0" max="1" step="0.05"></label>
72
+ <button onclick="clearChat()" style="padding:5px 14px;font-size:0.85rem;">Clear</button>
73
+ </div>
74
+ <div class="chatbox" id="chatbox"></div>
75
+ <div class="input-row">
76
+ <textarea id="userInput" placeholder="Type your message..." onkeydown="if(event.key==='Enter'&&!event.shiftKey){event.preventDefault();sendMsg();}"></textarea>
77
+ <button id="sendBtn" onclick="sendMsg()">Send</button>
78
+ </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
80
  <script>
81
+ const chatbox = document.getElementById('chatbox');
82
+ const userInput = document.getElementById('userInput');
83
+ const sendBtn = document.getElementById('sendBtn');
84
+ let history = [];
85
+
86
+ function addMsg(role, text, stats) {
87
+ const div = document.createElement('div');
88
+ div.className = 'msg ' + role;
89
+ div.textContent = text;
90
+ if (stats) {
91
+ const s = document.createElement('div');
92
+ s.className = 'stats';
93
+ s.textContent = stats;
94
+ div.appendChild(s);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  }
96
+ chatbox.appendChild(div);
97
+ chatbox.scrollTop = chatbox.scrollHeight;
98
+ return div;
99
+ }
100
+
101
+ function clearChat() {
102
+ history = [];
103
+ chatbox.innerHTML = '';
104
+ }
105
+
106
+ async function sendMsg() {
107
+ const text = userInput.value.trim();
108
+ if (!text) return;
109
+ userInput.value = '';
110
+ addMsg('user', text);
111
+ history.push({role: 'user', content: text});
112
+ sendBtn.disabled = true;
113
+
114
+ const botDiv = addMsg('bot', '');
115
+ botDiv.textContent = '';
116
+
117
+ try {
118
+ const resp = await fetch('/chat', {
119
+ method: 'POST',
120
+ headers: {'Content-Type': 'application/json'},
121
+ body: JSON.stringify({
122
+ messages: history,
123
+ max_tokens: parseInt(document.getElementById('maxTokens').value) || 256,
124
+ temperature: parseFloat(document.getElementById('temperature').value) || 0.7,
125
+ top_p: parseFloat(document.getElementById('topP').value) || 0.9
126
+ })
127
+ });
128
+ const reader = resp.body.getReader();
129
+ const decoder = new TextDecoder();
130
+ let full = '';
131
+ let statsText = '';
132
+
133
+ while (true) {
134
+ const {done, value} = await reader.read();
135
+ if (done) break;
136
+ const chunk = decoder.decode(value, {stream: true});
137
+ const lines = chunk.split('\\n');
138
+ for (const line of lines) {
139
+ if (!line.startsWith('data: ')) continue;
140
+ const data = line.slice(6);
141
+ if (data === '[DONE]') continue;
142
+ try {
143
+ const j = JSON.parse(data);
144
+ if (j.token) { full += j.token; botDiv.childNodes[0].textContent = full; }
145
+ if (j.stats) { statsText = j.stats; }
146
+ } catch(e) {}
147
  }
148
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ if (statsText) {
151
+ const s = document.createElement('div');
152
+ s.className = 'stats';
153
+ s.textContent = statsText;
154
+ botDiv.appendChild(s);
155
+ }
156
 
157
+ // Ensure there's a text node
158
+ if (!botDiv.childNodes.length) botDiv.textContent = full;
 
 
 
 
 
 
159
 
160
+ history.push({role: 'assistant', content: full});
161
+ } catch(e) {
162
+ botDiv.textContent = 'Error: ' + e.message;
163
+ }
164
+ sendBtn.disabled = false;
165
+ chatbox.scrollTop = chatbox.scrollHeight;
166
+ }
167
  </script>
168
  </body>
169
+ </html>
170
+ """
171
 
 
 
 
 
172
 
173
  @app.route("/")
174
  def index():
175
  return HTML_PAGE
176
 
177
+
178
  @app.route("/chat", methods=["POST"])
179
  def chat():
180
+ data = request.json
181
+ messages = data.get("messages", [])
182
+ max_tokens = min(data.get("max_tokens", 256), 2048)
183
+ temperature = data.get("temperature", 0.7)
184
+ top_p = data.get("top_p", 0.9)
185
+
186
+ # Build prompt — simple ChatML-ish format
187
+ prompt = ""
188
+ for msg in messages:
189
+ role = msg["role"]
190
+ content = msg["content"]
191
+ if role == "user":
192
+ prompt += f"<|user|>\n{content}\n"
193
+ elif role == "assistant":
194
+ prompt += f"<|assistant|>\n{content}\n"
195
+ elif role == "system":
196
+ prompt += f"<|system|>\n{content}\n"
197
+ prompt += "<|assistant|>\n"
198
+
199
+ def generate():
200
+ token_count = 0
201
+ start = time.perf_counter()
202
+
203
+ stream = llm(
204
+ prompt,
205
+ max_tokens=max_tokens,
206
+ temperature=temperature,
207
+ top_p=top_p,
208
+ stop=["<|user|>", "<|assistant|>", "<|end|>", "<|endoftext|>"],
209
+ stream=True,
210
+ )
211
 
212
+ for output in stream:
213
+ token_text = output["choices"][0]["text"]
214
+ token_count += 1
215
+ yield f"data: {jsonify_compact({'token': token_text})}\n\n"
 
 
216
 
217
+ elapsed = time.perf_counter() - start
218
+ tps = token_count / elapsed if elapsed > 0 else 0
219
+ stats = f"{token_count} tokens in {elapsed:.1f}s {tps:.2f} tokens/s"
220
+ yield f"data: {jsonify_compact({'stats': stats})}\n\n"
221
+ yield "data: [DONE]\n\n"
222
 
223
+ return Response(stream_with_context(generate()), mimetype="text/event-stream")
 
 
 
224
 
 
 
225
 
226
+ def jsonify_compact(obj):
227
+ import json
228
+ return json.dumps(obj, ensure_ascii=False, separators=(",", ":"))
 
 
 
 
229
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  if __name__ == "__main__":
232
+ port = int(os.environ.get("PORT", 7860))
233
+ app.run(host="0.0.0.0", port=port, debug=False)