OrbitMC commited on
Commit
1030e25
Β·
verified Β·
1 Parent(s): 00e173b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -193
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import re
3
  import uuid
4
  import base64
@@ -6,63 +7,61 @@ import datetime
6
  import traceback
7
  import asyncio
8
  from flask import Flask, request, jsonify
9
- from num2words import num2words
10
- from transformers import pipeline
 
11
 
12
  # ══════════════════════════════════════════
13
  # CONFIG
14
  # ══════════════════════════════════════════
15
- TTS_VOICE = os.environ.get("TTS_VOICE", "zh-CN-XiaoyiNeural")
16
  MAX_MEMORY = 20
17
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
18
 
 
 
 
 
 
 
 
 
19
  SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
20
  Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
21
  TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
22
  Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response shortβ€”no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
23
 
 
 
 
24
  def clean_text_for_tts(text):
25
  text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
26
  text = re.sub(r'https?://\S+', '', text)
27
- def replace_number(match):
28
- try:
29
- return num2words(int(match.group()))
30
- except Exception:
31
- return match.group()
32
- text = re.sub(r'\b\d+\b', replace_number, text)
33
  text = re.sub(r'\s+', ' ', text).strip()
34
  return text
35
 
36
  # ══════════════════════════════════════════
37
- # LOAD LLM (Raw Transformers) & EDGE-TTS
38
  # ══════════════════════════════════════════
39
  print("=" * 55)
40
  print(" J.A.R.V.I.S. β€” Booting Systems")
41
  print("=" * 55)
42
 
43
- LLM_ID = "unsloth/LFM2.5-1.2B-Instruct-GGUF"
44
- GGUF_FILE = "LFM2.5-1.2B-Instruct-UD-Q8_K_XL.gguf"
45
-
46
- print(f"[1/2] Loading {GGUF_FILE} via transformers pipeline...")
47
  try:
48
- pipe = pipeline(
49
- "text-generation",
50
- model=LLM_ID,
51
- model_kwargs={"gguf_file": GGUF_FILE},
52
- device_map="cpu"
53
  )
54
- print(f" βœ… {GGUF_FILE} loaded successfully!")
 
55
  except Exception as e:
56
- print(f" ❌ Model FAILED completely: {e}")
57
  traceback.print_exc()
58
-
59
- print("[2/2] Loading edge-tts...")
60
- try:
61
- import edge_tts
62
- print(f" βœ… edge-tts ready. Default Voice: {TTS_VOICE}")
63
- except ImportError as e:
64
- print(f" ❌ edge-tts FAILED: {e}")
65
- edge_tts = None
66
 
67
  # ══════════════════════════════════════════
68
  # CHAT MEMORY
@@ -71,7 +70,7 @@ sessions = {}
71
 
72
  def get_memory(sid):
73
  if sid not in sessions:
74
- sessions[sid] = []
75
  return sessions[sid]
76
 
77
  def add_to_memory(sid, role, content):
@@ -85,13 +84,13 @@ def add_to_memory(sid, role, content):
85
  sessions[sid] = mem[-(MAX_MEMORY * 2):]
86
 
87
  # ══════════════════════════════════════════
88
- # RESPONSE GENERATION
89
  # ══════════════════════════════════════════
90
  def generate_response(user_input, session_id):
91
  memory = get_memory(session_id)
92
 
93
  messages =[
94
- {"role": "system", "content": SYSTEM_PROMPT},
95
  {"role": "assistant", "content": "I am waiting for you!"},
96
  ]
97
 
@@ -102,17 +101,30 @@ def generate_response(user_input, session_id):
102
 
103
  messages.append({"role": "user", "content": user_input})
104
 
105
- # Generate via standard transformers pipeline
106
- outputs = pipe(
107
- messages,
108
- max_new_tokens=MAX_NEW_TOKENS,
109
- do_sample=True,
110
- temperature=0.9,
111
- top_k=45,
112
- top_p=0.97,
113
- )
114
-
115
- response = outputs[0]["generated_text"][-1]["content"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
  if not response or len(response) < 2:
118
  response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
@@ -122,35 +134,33 @@ def generate_response(user_input, session_id):
122
  return response
123
 
124
  # ══════════════════════════════════════════
125
- # TTS SYNTHESIS (EDGE-TTS)
126
  # ══════════════════════════════════════════
127
- async def _synthesize_edge(text, voice):
128
- communicate = edge_tts.Communicate(text, voice, rate="+7%", pitch="+20Hz")
 
 
 
 
129
  audio_data = b""
130
  async for chunk in communicate.stream():
131
  if chunk["type"] == "audio":
132
  audio_data += chunk["data"]
133
- return audio_data
 
 
 
134
 
135
- def synthesize_speech(text, voice=None):
136
- if edge_tts is None:
 
 
137
  return None
 
138
  try:
139
- voice = voice or TTS_VOICE
140
- clean = clean_text_for_tts(text)
141
- if not clean or len(clean) < 2:
142
- return None
143
- if len(clean) > 400:
144
- clean = clean[:400]
145
-
146
- audio_bytes = asyncio.run(_synthesize_edge(clean, voice))
147
-
148
- if not audio_bytes or len(audio_bytes) == 0:
149
- return None
150
-
151
- return base64.b64encode(audio_bytes).decode('utf-8')
152
  except Exception as e:
153
- print(f"TTS Error: {e}")
154
  return None
155
 
156
  # ══════════════════════════════════════════
@@ -201,25 +211,32 @@ body{
201
  }
202
  .cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
203
  .cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
204
- .sdot{width:8px;height:8px;border-radius:50%;background:#00ff88;box-shadow:0 0 6px #00ff88}
205
- .sdot.err{background:#ff4444;box-shadow:0 0 6px #ff4444}
206
 
 
207
  .cfgbar{
208
- background:#0d1117;border-bottom:1px solid #00d4ff15;
209
- padding:8px 20px;display:none;flex-wrap:wrap;gap:12px;
210
- align-items:center;flex-shrink:0;
211
  }
212
  .cfgbar.open{display:flex}
213
- .cgrp{display:flex;align-items:center;gap:6px}
214
- .cgrp label{font-size:.65rem;color:#5a8a9a;text-transform:uppercase;letter-spacing:1px}
215
- .cgrp select{
216
- background:#0f1923;border:1px solid #00d4ff33;color:#00d4ff;
217
- padding:4px 8px;border-radius:4px;font-size:.7rem;cursor:pointer;outline:none;
 
218
  }
219
- .cgrp select:focus{border-color:#00d4ff}
220
- .ctag{
221
- font-size:.6rem;padding:3px 8px;border-radius:10px;
222
- background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ffaa;
 
 
 
 
 
 
 
223
  }
224
 
225
  .chat{
@@ -267,19 +284,6 @@ body{
267
  30%{opacity:1;transform:scale(1.1)}
268
  }
269
 
270
- .welcome{
271
- display:flex;flex-direction:column;align-items:center;
272
- justify-content:center;flex:1;gap:10px;opacity:.5;
273
- }
274
- .welcome .breact{
275
- width:70px;height:70px;border-radius:50%;
276
- background:radial-gradient(circle,#00d4ff 0%,#0088aa 35%,#004466 65%,transparent 100%);
277
- box-shadow:0 0 40px #00d4ff66;animation:pulse 2s ease-in-out infinite;
278
- }
279
- .welcome h2{color:#00d4ff;font-size:1rem;letter-spacing:4px}
280
- .welcome p{color:#5a8a9a;font-size:.75rem}
281
- .welcome .minfo{font-size:.65rem;color:#3a5a6a;margin-top:4px}
282
-
283
  .inbar{
284
  padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
285
  border-top:1px solid #00d4ff22;flex-shrink:0;
@@ -291,7 +295,6 @@ body{
291
  transition:border-color .3s;font-family:inherit;
292
  }
293
  #msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
294
- #msgIn::placeholder{color:#3a5a6a}
295
  #sendBtn{
296
  background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
297
  padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
@@ -299,20 +302,6 @@ body{
299
  }
300
  #sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
301
  #sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
302
- .infoot{
303
- display:flex;justify-content:space-between;margin-top:5px;
304
- max-width:900px;margin-left:auto;margin-right:auto;
305
- }
306
- .infoot span{font-size:.6rem;color:#3a5a6a}
307
-
308
- @media(max-width:640px){
309
- .header{padding:10px 12px}
310
- .htitle h1{font-size:1rem}
311
- .msg{max-width:92%;font-size:.82rem}
312
- .chat{padding:10px}
313
- .inbar{padding:10px}
314
- .cfgbar{padding:6px 12px}
315
- }
316
  </style>
317
  </head>
318
  <body>
@@ -322,11 +311,10 @@ body{
322
  <div class="arc-reactor"></div>
323
  <div class="htitle">
324
  <h1>J.A.R.V.I.S.</h1>
325
- <p>Just A Rather Very Intelligent System</p>
326
  </div>
327
  </div>
328
  <div class="hctrl">
329
- <div class="sdot" id="sDot"></div>
330
  <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">βš™ CONFIG</button>
331
  <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">πŸ”Š VOICE</button>
332
  <button class="cbtn" onclick="clearChat()">πŸ—‘ CLEAR</button>
@@ -335,54 +323,60 @@ body{
335
 
336
  <div class="cfgbar" id="cfgPanel">
337
  <div class="cgrp">
338
- <label>LLM:</label>
339
- <span class="ctag">LFM2.5-1.2B-Instruct</span>
340
- </div>
341
- <div class="cgrp">
342
- <label>TTS:</label>
343
- <span class="ctag" id="ttsTag">edge-tts</span>
344
- </div>
345
- <div class="cgrp">
346
- <label>Voice:</label>
347
  <select id="voiceSel">
348
- <option value="zh-CN-XiaoyiNeural">Xiaoyi (zh-CN) Female</option>
349
- <option value="en-US-AriaNeural">Aria (en-US) Female</option>
 
 
 
 
 
 
 
 
350
  </select>
351
  </div>
352
  <div class="cgrp">
353
- <label>Settings:</label>
354
- <span class="ctag">Rate: +7%</span>
355
- <span class="ctag">Pitch: +20Hz</span>
 
 
356
  </div>
357
- </div>
358
-
359
- <div class="chat" id="chatBox">
360
- <div class="welcome" id="welc">
361
- <div class="breact"></div>
362
- <h2>SYSTEMS ONLINE</h2>
363
- <p>Type a message below to begin interaction</p>
364
- <div class="minfo" id="wInfo">Initializing...</div>
365
  </div>
366
  </div>
367
 
 
 
368
  <div class="inbar">
369
  <div class="inwrap">
370
  <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
371
  <button id="sendBtn" onclick="send()">SEND</button>
372
  </div>
373
- <div class="infoot">
374
- <span id="memCt">Memory: 0 turns</span>
375
- <span id="modInfo">Loading...</span>
376
- </div>
377
  </div>
378
 
379
  <script>
380
- let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
381
- let ttsOn=true,busy=false,mc=0,voice='zh-CN-XiaoyiNeural';
382
- const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
383
 
 
 
 
 
 
 
 
 
 
 
384
  I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
385
- document.getElementById('voiceSel').addEventListener('change',function(){voice=this.value});
386
 
387
  function toggleTts(){
388
  ttsOn=!ttsOn;
@@ -398,8 +392,6 @@ function toggleCfg(){
398
  async function send(){
399
  const t=I.value.trim();
400
  if(!t||busy)return;
401
- const w=document.getElementById('welc');
402
- if(w)w.style.display='none';
403
  addMsg(t,'user');
404
  I.value='';busy=true;B.disabled=true;
405
  const ty=showTyp();
@@ -409,16 +401,12 @@ async function send(){
409
  method:'POST',headers:{'Content-Type':'application/json'},
410
  body:JSON.stringify({message:t,session_id:sid})
411
  });
412
- if(!r.ok)throw new Error('HTTP '+r.status);
413
  const d=await r.json();
414
  ty.remove();
415
  const el=addBot(d.response,id);
416
- document.getElementById('memCt').textContent='Memory: '+d.memory_length+' turns';
417
- if(ttsOn&&d.tts_available)fetchAudio(d.response,el);
418
  }catch(e){
419
- ty.remove();
420
- addBot('System malfunction. Please try again.',id);
421
- console.error(e);
422
  }
423
  busy=false;B.disabled=false;I.focus();
424
  }
@@ -426,11 +414,16 @@ async function send(){
426
  async function fetchAudio(text,el){
427
  const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
428
  if(st)st.textContent='⏳ Generating voice...';
429
- if(pb)pb.disabled=true;
430
  try{
 
 
 
 
 
 
431
  const r=await fetch('/tts',{
432
  method:'POST',headers:{'Content-Type':'application/json'},
433
- body:JSON.stringify({text:text,voice:voice})
434
  });
435
  const d=await r.json();
436
  if(d.audio){
@@ -439,39 +432,33 @@ async function fetchAudio(text,el){
439
  playB64(d.audio);
440
  }else{
441
  if(st)st.textContent='⚠️ Voice unavailable';
442
- if(pb)pb.style.display='none';
443
  }
444
  }catch(e){
445
  if(st)st.textContent='⚠️ Voice error';
446
- if(pb)pb.style.display='none';
447
  }
448
  }
449
 
450
  function addMsg(t,role){
451
- const d=document.createElement('div');
452
- d.className='msg '+role;
453
- d.innerHTML='<div class="txt">'+esc(t)+'</div>';
454
- C.appendChild(d);sc();
455
  }
456
  function addBot(t,id){
457
- const d=document.createElement('div');
458
- d.className='msg bot';d.id='m'+id;
459
  d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
460
  (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
461
  C.appendChild(d);sc();return d;
462
  }
463
  function showTyp(){
464
- const d=document.createElement('div');
465
- d.className='typi';
466
- d.innerHTML='<span></span><span></span><span></span>';
467
- C.appendChild(d);sc();return d;
468
  }
469
 
470
  function playB64(b){
471
  try{
472
  const bin=atob(b),u8=new Uint8Array(bin.length);
473
  for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
474
- const url=URL.createObjectURL(new Blob([u8],{type:'audio/mpeg'}));
 
475
  const a=new Audio(url);
476
  a.play().catch(e=>console.log('Autoplay blocked:',e));
477
  a.onended=()=>URL.revokeObjectURL(url);
@@ -481,23 +468,11 @@ function replay(b){if(b.dataset.audio)playB64(b.dataset.audio)}
481
 
482
  async function clearChat(){
483
  await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
484
- C.innerHTML='<div class="welcome" id="welc"><div class="breact"></div><h2>SYSTEMS ONLINE</h2><p>Type a message below to begin</p></div>';
485
- document.getElementById('memCt').textContent='Memory: 0 turns';
486
- sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
487
  }
488
 
489
  function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
490
  function sc(){C.scrollTop=C.scrollHeight}
491
-
492
- fetch('/health').then(r=>r.json()).then(d=>{
493
- document.getElementById('ttsTag').textContent=d.tts_mode+(d.tts_model==='DISABLED'?' (OFF)':'');
494
- document.getElementById('modInfo').textContent='LFM2.5 Β· '+d.tts_mode+' Β· '+d.tts_voice+' Β· CPU';
495
- const wi=document.getElementById('wInfo');
496
- if(wi)wi.textContent='LLM: Transformers Pipeline | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
497
- if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
498
- if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
499
- }).catch(()=>{});
500
-
501
  I.focus();
502
  </script>
503
  </body>
@@ -525,14 +500,11 @@ def chat():
525
  response = generate_response(user_input, session_id)
526
  except Exception as e:
527
  print(f"Generation error: {e}")
528
- traceback.print_exc()
529
  response = "I encountered a temporary system malfunction. Please try again."
530
 
531
  return jsonify({
532
  "response": response,
533
- "session_id": session_id,
534
- "tts_available": edge_tts is not None,
535
- "memory_length": len(get_memory(session_id)),
536
  })
537
 
538
  @app.route("/tts", methods=["POST"])
@@ -540,13 +512,13 @@ def tts_endpoint():
540
  data = request.json or {}
541
  text = data.get("text", "").strip()
542
  voice = data.get("voice", TTS_VOICE)
 
 
543
 
544
  if not text:
545
  return jsonify({"error": "Empty text"}), 400
546
- if edge_tts is None:
547
- return jsonify({"error": "TTS not available", "audio": None}), 200
548
 
549
- audio_b64 = synthesize_speech(text, voice=voice)
550
  return jsonify({"audio": audio_b64})
551
 
552
  @app.route("/clear", methods=["POST"])
@@ -557,18 +529,6 @@ def clear():
557
  del sessions[sid]
558
  return jsonify({"status": "cleared"})
559
 
560
- @app.route("/health")
561
- def health():
562
- return jsonify({
563
- "status": "online",
564
- "llm": "Transformers Pipeline (Raw Python)",
565
- "tts_mode": "edge-tts",
566
- "tts_model": "edge-tts" if edge_tts else "DISABLED",
567
- "tts_voice": TTS_VOICE,
568
- "tts_voices":["zh-CN-XiaoyiNeural", "en-US-AriaNeural"],
569
- "max_new_tokens": MAX_NEW_TOKENS,
570
- })
571
-
572
  if __name__ == "__main__":
573
- print("πŸš€ App is online!")
574
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
1
  import os
2
+ import io
3
  import re
4
  import uuid
5
  import base64
 
7
  import traceback
8
  import asyncio
9
  from flask import Flask, request, jsonify
10
+ import torch
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM
12
+ import edge_tts
13
 
14
  # ══════════════════════════════════════════
15
  # CONFIG
16
  # ══════════════════════════════════════════
 
17
  MAX_MEMORY = 20
18
  MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
19
 
20
+ # Default Edge-TTS settings matching image logic
21
+ TTS_VOICE = os.environ.get("TTS_VOICE", "zh-CN-XiaoyiNeural")
22
+ TTS_RATE = 7
23
+ TTS_PITCH = 20
24
+
25
+ # ══════════════════════════════════════════
26
+ # SYSTEM PROMPT
27
+ # ══════════════════════════════════════════
28
  SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
29
  Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
30
  TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
31
  Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response shortβ€”no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
32
 
33
+ # ══════════════════════════════════════════
34
+ # TEXT CLEANING FOR TTS
35
+ # ══════════════════════════════════════════
36
  def clean_text_for_tts(text):
37
  text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
38
  text = re.sub(r'https?://\S+', '', text)
 
 
 
 
 
 
39
  text = re.sub(r'\s+', ' ', text).strip()
40
  return text
41
 
42
  # ══════════════════════════════════════════
43
+ # LOAD GEMMA 3 270M-IT
44
  # ══════════════════════════════════════════
45
  print("=" * 55)
46
  print(" J.A.R.V.I.S. β€” Booting Systems")
47
  print("=" * 55)
48
 
49
+ print("[1/1] Loading Gemma 3 270M-IT...")
50
+ GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
 
 
51
  try:
52
+ tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
53
+ model = AutoModelForCausalLM.from_pretrained(
54
+ GEMMA_ID,
55
+ torch_dtype=torch.float32,
56
+ device_map="cpu",
57
  )
58
+ model.eval()
59
+ print(" βœ… Gemma 3 loaded!")
60
  except Exception as e:
61
+ print(f" ❌ Gemma 3 FAILED: {e}")
62
  traceback.print_exc()
63
+ # Proceeding without it for debugging the UI/TTS if needed, or raise SystemExit
64
+ # raise SystemExit("Cannot start without Gemma.")
 
 
 
 
 
 
65
 
66
  # ══════════════════════════════════════════
67
  # CHAT MEMORY
 
70
 
71
  def get_memory(sid):
72
  if sid not in sessions:
73
+ sessions[sid] =[]
74
  return sessions[sid]
75
 
76
  def add_to_memory(sid, role, content):
 
84
  sessions[sid] = mem[-(MAX_MEMORY * 2):]
85
 
86
  # ══════════════════════════════════════════
87
+ # GEMMA RESPONSE GENERATION
88
  # ══════════════════════════════════════════
89
  def generate_response(user_input, session_id):
90
  memory = get_memory(session_id)
91
 
92
  messages =[
93
+ {"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
94
  {"role": "assistant", "content": "I am waiting for you!"},
95
  ]
96
 
 
101
 
102
  messages.append({"role": "user", "content": user_input})
103
 
104
+ try:
105
+ input_ids = tokenizer.apply_chat_template(
106
+ messages,
107
+ return_tensors="pt",
108
+ add_generation_prompt=True,
109
+ )
110
+
111
+ with torch.no_grad():
112
+ outputs = model.generate(
113
+ input_ids,
114
+ max_new_tokens=MAX_NEW_TOKENS,
115
+ do_sample=True,
116
+ temperature=0.9,
117
+ top_k=45,
118
+ top_p=0.97,
119
+ )
120
+
121
+ new_tokens = outputs[0][input_ids.shape[-1]:]
122
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
123
+
124
+ response = response.split("<end_of_turn>")[0].strip()
125
+ response = response.split("<start_of_turn>")[0].strip()
126
+ except:
127
+ response = "System Error: Could not generate text."
128
 
129
  if not response or len(response) < 2:
130
  response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
 
134
  return response
135
 
136
  # ══════════════════════════════════════════
137
+ # EDGE-TTS SYNTHESIS
138
  # ══════════════════════════════════════════
139
+ async def async_synthesize_speech(text, voice, rate, pitch):
140
+ # Format strings required by edge-tts (e.g., "+7%", "-5Hz")
141
+ rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
142
+ pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
143
+
144
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
145
  audio_data = b""
146
  async for chunk in communicate.stream():
147
  if chunk["type"] == "audio":
148
  audio_data += chunk["data"]
149
+
150
+ if not audio_data:
151
+ return None
152
+ return base64.b64encode(audio_data).decode('utf-8')
153
 
154
+ def synthesize_speech(text, voice=None, rate=0, pitch=0):
155
+ voice = voice or TTS_VOICE
156
+ clean = clean_text_for_tts(text)
157
+ if not clean or len(clean) < 2:
158
  return None
159
+
160
  try:
161
+ return asyncio.run(async_synthesize_speech(clean, voice, rate, pitch))
 
 
 
 
 
 
 
 
 
 
 
 
162
  except Exception as e:
163
+ print(f"Edge-TTS Error: {e}")
164
  return None
165
 
166
  # ══════════════════════════════════════════
 
211
  }
212
  .cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
213
  .cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
 
 
214
 
215
+ /* Configuration Panel specific styling for Edge-TTS sliders */
216
  .cfgbar{
217
+ background:#1c1c1e;border-bottom:1px solid #00d4ff15;
218
+ padding:16px 20px;display:none;flex-direction:column;gap:16px;
219
+ flex-shrink:0;box-shadow: 0 4px 6px rgba(0,0,0,0.3);
220
  }
221
  .cfgbar.open{display:flex}
222
+ .cgrp{display:flex;flex-direction:column;gap:6px;}
223
+ .cgrp-row{display:flex;align-items:center;justify-content:space-between;gap:10px}
224
+ .cgrp label{font-size:.8rem;color:#a0a0a0;}
225
+ .cgrp select, .cgrp input[type="number"]{
226
+ background:#2c2c2e;border:1px solid #444;color:#fff;
227
+ padding:8px;border-radius:4px;font-size:.8rem;cursor:pointer;outline:none;
228
  }
229
+ .cgrp select:focus, .cgrp input[type="number"]:focus{border-color:#00d4ff}
230
+ input[type=range] {
231
+ -webkit-appearance: none; width: 100%; background: transparent;
232
+ }
233
+ input[type=range]::-webkit-slider-thumb {
234
+ -webkit-appearance: none; height: 16px; width: 16px; border-radius: 50%;
235
+ background: #ff8c00; cursor: pointer; margin-top: -6px;
236
+ }
237
+ input[type=range]::-webkit-slider-runnable-track {
238
+ width: 100%; height: 4px; cursor: pointer;
239
+ background: #ff8c00; border-radius: 2px;
240
  }
241
 
242
  .chat{
 
284
  30%{opacity:1;transform:scale(1.1)}
285
  }
286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  .inbar{
288
  padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
289
  border-top:1px solid #00d4ff22;flex-shrink:0;
 
295
  transition:border-color .3s;font-family:inherit;
296
  }
297
  #msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
 
298
  #sendBtn{
299
  background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
300
  padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
 
302
  }
303
  #sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
304
  #sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  </style>
306
  </head>
307
  <body>
 
311
  <div class="arc-reactor"></div>
312
  <div class="htitle">
313
  <h1>J.A.R.V.I.S.</h1>
314
+ <p>Powered by Gemma & Edge-TTS</p>
315
  </div>
316
  </div>
317
  <div class="hctrl">
 
318
  <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">βš™ CONFIG</button>
319
  <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">πŸ”Š VOICE</button>
320
  <button class="cbtn" onclick="clearChat()">πŸ—‘ CLEAR</button>
 
323
 
324
  <div class="cfgbar" id="cfgPanel">
325
  <div class="cgrp">
326
+ <label>Select Voice</label>
 
 
 
 
 
 
 
 
327
  <select id="voiceSel">
328
+ <!-- English -->
329
+ <option value="en-US-JennyNeural">en-US-JennyNeural - en-US (Female)</option>
330
+ <option value="en-US-GuyNeural">en-US-GuyNeural - en-US (Male)</option>
331
+ <option value="en-US-AnaNeural">en-US-AnaNeural - en-US (Female)</option>
332
+ <option value="en-US-AriaNeural">en-US-AriaNeural - en-US (Female)</option>
333
+ <!-- Chinese -->
334
+ <option value="zh-CN-XiaoyiNeural" selected>zh-CN-XiaoyiNeural - zh-CN (Female)</option>
335
+ <option value="zh-CN-YunxiNeural">zh-CN-YunxiNeural - zh-CN (Male)</option>
336
+ <option value="zh-CN-YunjianNeural">zh-CN-YunjianNeural - zh-CN (Male)</option>
337
+ <option value="zh-CN-XiaoxiaoNeural">zh-CN-XiaoxiaoNeural - zh-CN (Female)</option>
338
  </select>
339
  </div>
340
  <div class="cgrp">
341
+ <div class="cgrp-row">
342
+ <label>Speech Rate Adjustment (%)</label>
343
+ <input type="number" id="rateNum" value="7" min="-100" max="100">
344
+ </div>
345
+ <input type="range" id="rateRange" min="-100" max="100" value="7">
346
  </div>
347
+ <div class="cgrp">
348
+ <div class="cgrp-row">
349
+ <label>Pitch Adjustment (Hz)</label>
350
+ <input type="number" id="pitchNum" value="20" min="-100" max="100">
351
+ </div>
352
+ <input type="range" id="pitchRange" min="-100" max="100" value="20">
 
 
353
  </div>
354
  </div>
355
 
356
+ <div class="chat" id="chatBox"></div>
357
+
358
  <div class="inbar">
359
  <div class="inwrap">
360
  <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
361
  <button id="sendBtn" onclick="send()">SEND</button>
362
  </div>
 
 
 
 
363
  </div>
364
 
365
  <script>
366
+ let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36);
367
+ let ttsOn=true,busy=false,mc=0;
 
368
 
369
+ // Sync sliders and inputs
370
+ const sR=document.getElementById('rateRange'), nR=document.getElementById('rateNum');
371
+ sR.oninput = () => nR.value = sR.value;
372
+ nR.oninput = () => sR.value = nR.value;
373
+
374
+ const sP=document.getElementById('pitchRange'), nP=document.getElementById('pitchNum');
375
+ sP.oninput = () => nP.value = sP.value;
376
+ nP.oninput = () => sP.value = nP.value;
377
+
378
+ const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
379
  I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
 
380
 
381
  function toggleTts(){
382
  ttsOn=!ttsOn;
 
392
  async function send(){
393
  const t=I.value.trim();
394
  if(!t||busy)return;
 
 
395
  addMsg(t,'user');
396
  I.value='';busy=true;B.disabled=true;
397
  const ty=showTyp();
 
401
  method:'POST',headers:{'Content-Type':'application/json'},
402
  body:JSON.stringify({message:t,session_id:sid})
403
  });
 
404
  const d=await r.json();
405
  ty.remove();
406
  const el=addBot(d.response,id);
407
+ if(ttsOn)fetchAudio(d.response,el);
 
408
  }catch(e){
409
+ ty.remove(); addBot('System malfunction.',id);
 
 
410
  }
411
  busy=false;B.disabled=false;I.focus();
412
  }
 
414
  async function fetchAudio(text,el){
415
  const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
416
  if(st)st.textContent='⏳ Generating voice...';
 
417
  try{
418
+ const payload = {
419
+ text: text,
420
+ voice: document.getElementById('voiceSel').value,
421
+ rate: parseInt(document.getElementById('rateNum').value),
422
+ pitch: parseInt(document.getElementById('pitchNum').value)
423
+ };
424
  const r=await fetch('/tts',{
425
  method:'POST',headers:{'Content-Type':'application/json'},
426
+ body:JSON.stringify(payload)
427
  });
428
  const d=await r.json();
429
  if(d.audio){
 
432
  playB64(d.audio);
433
  }else{
434
  if(st)st.textContent='⚠️ Voice unavailable';
 
435
  }
436
  }catch(e){
437
  if(st)st.textContent='⚠️ Voice error';
 
438
  }
439
  }
440
 
441
  function addMsg(t,role){
442
+ const d=document.createElement('div'); d.className='msg '+role;
443
+ d.innerHTML='<div class="txt">'+esc(t)+'</div>'; C.appendChild(d);sc();
 
 
444
  }
445
  function addBot(t,id){
446
+ const d=document.createElement('div'); d.className='msg bot';d.id='m'+id;
 
447
  d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
448
  (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
449
  C.appendChild(d);sc();return d;
450
  }
451
  function showTyp(){
452
+ const d=document.createElement('div'); d.className='typi';
453
+ d.innerHTML='<span></span><span></span><span></span>'; C.appendChild(d);sc();return d;
 
 
454
  }
455
 
456
  function playB64(b){
457
  try{
458
  const bin=atob(b),u8=new Uint8Array(bin.length);
459
  for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
460
+ // Edge-TTS generates MP3/WebM natively, use mp3 mime type
461
+ const url=URL.createObjectURL(new Blob([u8],{type:'audio/mp3'}));
462
  const a=new Audio(url);
463
  a.play().catch(e=>console.log('Autoplay blocked:',e));
464
  a.onended=()=>URL.revokeObjectURL(url);
 
468
 
469
  async function clearChat(){
470
  await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
471
+ C.innerHTML=''; sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36);
 
 
472
  }
473
 
474
  function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
475
  function sc(){C.scrollTop=C.scrollHeight}
 
 
 
 
 
 
 
 
 
 
476
  I.focus();
477
  </script>
478
  </body>
 
500
  response = generate_response(user_input, session_id)
501
  except Exception as e:
502
  print(f"Generation error: {e}")
 
503
  response = "I encountered a temporary system malfunction. Please try again."
504
 
505
  return jsonify({
506
  "response": response,
507
+ "session_id": session_id
 
 
508
  })
509
 
510
  @app.route("/tts", methods=["POST"])
 
512
  data = request.json or {}
513
  text = data.get("text", "").strip()
514
  voice = data.get("voice", TTS_VOICE)
515
+ rate = data.get("rate", TTS_RATE)
516
+ pitch = data.get("pitch", TTS_PITCH)
517
 
518
  if not text:
519
  return jsonify({"error": "Empty text"}), 400
 
 
520
 
521
+ audio_b64 = synthesize_speech(text, voice=voice, rate=rate, pitch=pitch)
522
  return jsonify({"audio": audio_b64})
523
 
524
  @app.route("/clear", methods=["POST"])
 
529
  del sessions[sid]
530
  return jsonify({"status": "cleared"})
531
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  if __name__ == "__main__":
533
+ print("πŸš€ App is online with Edge-TTS!")
534
  app.run(host="0.0.0.0", port=7860, threaded=True)