Spaces:

OrbitMC
/

vai

Sleeping

App Files Files Community

OrbitMC commited on Mar 25

Commit

1030e25

verified ·

1 Parent(s): 00e173b

Update app.py

Browse files

Files changed (1) hide show

app.py +153 -193

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import re
 import uuid
 import base64
@@ -6,63 +7,61 @@ import datetime
 import traceback
 import asyncio
 from flask import Flask, request, jsonify
-from num2words import num2words
-from transformers import pipeline
 # ══════════════════════════════════════════
 # CONFIG
 # ══════════════════════════════════════════
-TTS_VOICE = os.environ.get("TTS_VOICE", "zh-CN-XiaoyiNeural")
 MAX_MEMORY = 20
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
 SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
 Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
 TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
 Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response short—no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
 def clean_text_for_tts(text):
     text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
     text = re.sub(r'https?://\S+', '', text)
-    def replace_number(match):
-        try:
-            return num2words(int(match.group()))
-        except Exception:
-            return match.group()
-    text = re.sub(r'\b\d+\b', replace_number, text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 # ══════════════════════════════════════════
-# LOAD LLM (Raw Transformers) & EDGE-TTS
 # ══════════════════════════════════════════
 print("=" * 55)
 print("  J.A.R.V.I.S. — Booting Systems")
 print("=" * 55)
-LLM_ID = "unsloth/LFM2.5-1.2B-Instruct-GGUF"
-GGUF_FILE = "LFM2.5-1.2B-Instruct-UD-Q8_K_XL.gguf"
-print(f"[1/2] Loading {GGUF_FILE} via transformers pipeline...")
 try:
-    pipe = pipeline(
-        "text-generation",
-        model=LLM_ID,
-        model_kwargs={"gguf_file": GGUF_FILE},
-        device_map="cpu"
     )
-    print(f"  ✅ {GGUF_FILE} loaded successfully!")
 except Exception as e:
-    print(f"  ❌ Model FAILED completely: {e}")
     traceback.print_exc()
-print("[2/2] Loading edge-tts...")
-try:
-    import edge_tts
-    print(f"  ✅ edge-tts ready. Default Voice: {TTS_VOICE}")
-except ImportError as e:
-    print(f"  ❌ edge-tts FAILED: {e}")
-    edge_tts = None
 # ══════════════════════════════════════════
 # CHAT MEMORY
@@ -71,7 +70,7 @@ sessions = {}
 def get_memory(sid):
     if sid not in sessions:
-        sessions[sid] = []
     return sessions[sid]
 def add_to_memory(sid, role, content):
@@ -85,13 +84,13 @@ def add_to_memory(sid, role, content):
         sessions[sid] = mem[-(MAX_MEMORY * 2):]
 # ══════════════════════════════════════════
-# RESPONSE GENERATION
 # ══════════════════════════════════════════
 def generate_response(user_input, session_id):
     memory = get_memory(session_id)
     messages =[
-        {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "assistant", "content": "I am waiting for you!"},
     ]
@@ -102,17 +101,30 @@ def generate_response(user_input, session_id):
     messages.append({"role": "user", "content": user_input})
-    # Generate via standard transformers pipeline
-    outputs = pipe(
-        messages,
-        max_new_tokens=MAX_NEW_TOKENS,
-        do_sample=True,
-        temperature=0.9,
-        top_k=45,
-        top_p=0.97,
-    )
-    response = outputs[0]["generated_text"][-1]["content"].strip()
     if not response or len(response) < 2:
         response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
@@ -122,35 +134,33 @@ def generate_response(user_input, session_id):
     return response
 # ══════════════════════════════════════════
-# TTS SYNTHESIS (EDGE-TTS)
 # ══════════════════════════════════════════
-async def _synthesize_edge(text, voice):
-    communicate = edge_tts.Communicate(text, voice, rate="+7%", pitch="+20Hz")
     audio_data = b""
     async for chunk in communicate.stream():
         if chunk["type"] == "audio":
             audio_data += chunk["data"]
-    return audio_data
-def synthesize_speech(text, voice=None):
-    if edge_tts is None:
         return None
     try:
-        voice = voice or TTS_VOICE
-        clean = clean_text_for_tts(text)
-        if not clean or len(clean) < 2:
-            return None
-        if len(clean) > 400:
-            clean = clean[:400]
-        audio_bytes = asyncio.run(_synthesize_edge(clean, voice))
-        if not audio_bytes or len(audio_bytes) == 0:
-            return None
-        return base64.b64encode(audio_bytes).decode('utf-8')
     except Exception as e:
-        print(f"TTS Error: {e}")
         return None
 # ══════════════════════════════════════════
@@ -201,25 +211,32 @@ body{
 }
 .cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
 .cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
-.sdot{width:8px;height:8px;border-radius:50%;background:#00ff88;box-shadow:0 0 6px #00ff88}
-.sdot.err{background:#ff4444;box-shadow:0 0 6px #ff4444}
 .cfgbar{
-    background:#0d1117;border-bottom:1px solid #00d4ff15;
-    padding:8px 20px;display:none;flex-wrap:wrap;gap:12px;
-    align-items:center;flex-shrink:0;
 }
 .cfgbar.open{display:flex}
-.cgrp{display:flex;align-items:center;gap:6px}
-.cgrp label{font-size:.65rem;color:#5a8a9a;text-transform:uppercase;letter-spacing:1px}
-.cgrp select{
-    background:#0f1923;border:1px solid #00d4ff33;color:#00d4ff;
-    padding:4px 8px;border-radius:4px;font-size:.7rem;cursor:pointer;outline:none;
 }
-.cgrp select:focus{border-color:#00d4ff}
-.ctag{
-    font-size:.6rem;padding:3px 8px;border-radius:10px;
-    background:#00d4ff15;border:1px solid #00d4ff33;color:#00d4ffaa;
 }
 .chat{
@@ -267,19 +284,6 @@ body{
     30%{opacity:1;transform:scale(1.1)}
 }
-.welcome{
-    display:flex;flex-direction:column;align-items:center;
-    justify-content:center;flex:1;gap:10px;opacity:.5;
-}
-.welcome .breact{
-    width:70px;height:70px;border-radius:50%;
-    background:radial-gradient(circle,#00d4ff 0%,#0088aa 35%,#004466 65%,transparent 100%);
-    box-shadow:0 0 40px #00d4ff66;animation:pulse 2s ease-in-out infinite;
-}
-.welcome h2{color:#00d4ff;font-size:1rem;letter-spacing:4px}
-.welcome p{color:#5a8a9a;font-size:.75rem}
-.welcome .minfo{font-size:.65rem;color:#3a5a6a;margin-top:4px}
 .inbar{
     padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
     border-top:1px solid #00d4ff22;flex-shrink:0;
@@ -291,7 +295,6 @@ body{
     transition:border-color .3s;font-family:inherit;
 }
 #msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
-#msgIn::placeholder{color:#3a5a6a}
 #sendBtn{
     background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
     padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
@@ -299,20 +302,6 @@ body{
 }
 #sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
 #sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
-.infoot{
-    display:flex;justify-content:space-between;margin-top:5px;
-    max-width:900px;margin-left:auto;margin-right:auto;
-}
-.infoot span{font-size:.6rem;color:#3a5a6a}
-@media(max-width:640px){
-    .header{padding:10px 12px}
-    .htitle h1{font-size:1rem}
-    .msg{max-width:92%;font-size:.82rem}
-    .chat{padding:10px}
-    .inbar{padding:10px}
-    .cfgbar{padding:6px 12px}
-}
 </style>
 </head>
 <body>
@@ -322,11 +311,10 @@ body{
         <div class="arc-reactor"></div>
         <div class="htitle">
             <h1>J.A.R.V.I.S.</h1>
-            <p>Just A Rather Very Intelligent System</p>
         </div>
     </div>
     <div class="hctrl">
-        <div class="sdot" id="sDot"></div>
         <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">⚙ CONFIG</button>
         <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">🔊 VOICE</button>
         <button class="cbtn" onclick="clearChat()">🗑 CLEAR</button>
@@ -335,54 +323,60 @@ body{
 <div class="cfgbar" id="cfgPanel">
     <div class="cgrp">
-        <label>LLM:</label>
-        <span class="ctag">LFM2.5-1.2B-Instruct</span>
-    </div>
-    <div class="cgrp">
-        <label>TTS:</label>
-        <span class="ctag" id="ttsTag">edge-tts</span>
-    </div>
-    <div class="cgrp">
-        <label>Voice:</label>
         <select id="voiceSel">
-            <option value="zh-CN-XiaoyiNeural">Xiaoyi (zh-CN) Female</option>
-            <option value="en-US-AriaNeural">Aria (en-US) Female</option>
         </select>
     </div>
     <div class="cgrp">
-        <label>Settings:</label>
-        <span class="ctag">Rate: +7%</span>
-        <span class="ctag">Pitch: +20Hz</span>
     </div>
-</div>
-<div class="chat" id="chatBox">
-    <div class="welcome" id="welc">
-        <div class="breact"></div>
-        <h2>SYSTEMS ONLINE</h2>
-        <p>Type a message below to begin interaction</p>
-        <div class="minfo" id="wInfo">Initializing...</div>
     </div>
 </div>
 <div class="inbar">
     <div class="inwrap">
         <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
         <button id="sendBtn" onclick="send()">SEND</button>
     </div>
-    <div class="infoot">
-        <span id="memCt">Memory: 0 turns</span>
-        <span id="modInfo">Loading...</span>
-    </div>
 </div>
 <script>
-let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
-let ttsOn=true,busy=false,mc=0,voice='zh-CN-XiaoyiNeural';
-const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
 I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
-document.getElementById('voiceSel').addEventListener('change',function(){voice=this.value});
 function toggleTts(){
     ttsOn=!ttsOn;
@@ -398,8 +392,6 @@ function toggleCfg(){
 async function send(){
     const t=I.value.trim();
     if(!t||busy)return;
-    const w=document.getElementById('welc');
-    if(w)w.style.display='none';
     addMsg(t,'user');
     I.value='';busy=true;B.disabled=true;
     const ty=showTyp();
@@ -409,16 +401,12 @@ async function send(){
             method:'POST',headers:{'Content-Type':'application/json'},
             body:JSON.stringify({message:t,session_id:sid})
         });
-        if(!r.ok)throw new Error('HTTP '+r.status);
         const d=await r.json();
         ty.remove();
         const el=addBot(d.response,id);
-        document.getElementById('memCt').textContent='Memory: '+d.memory_length+' turns';
-        if(ttsOn&&d.tts_available)fetchAudio(d.response,el);
     }catch(e){
-        ty.remove();
-        addBot('System malfunction. Please try again.',id);
-        console.error(e);
     }
     busy=false;B.disabled=false;I.focus();
 }
@@ -426,11 +414,16 @@ async function send(){
 async function fetchAudio(text,el){
     const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
     if(st)st.textContent='⏳ Generating voice...';
-    if(pb)pb.disabled=true;
     try{
         const r=await fetch('/tts',{
             method:'POST',headers:{'Content-Type':'application/json'},
-            body:JSON.stringify({text:text,voice:voice})
         });
         const d=await r.json();
         if(d.audio){
@@ -439,39 +432,33 @@ async function fetchAudio(text,el){
             playB64(d.audio);
         }else{
             if(st)st.textContent='⚠️ Voice unavailable';
-            if(pb)pb.style.display='none';
         }
     }catch(e){
         if(st)st.textContent='⚠️ Voice error';
-        if(pb)pb.style.display='none';
     }
 }
 function addMsg(t,role){
-    const d=document.createElement('div');
-    d.className='msg '+role;
-    d.innerHTML='<div class="txt">'+esc(t)+'</div>';
-    C.appendChild(d);sc();
 }
 function addBot(t,id){
-    const d=document.createElement('div');
-    d.className='msg bot';d.id='m'+id;
     d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
         (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
     C.appendChild(d);sc();return d;
 }
 function showTyp(){
-    const d=document.createElement('div');
-    d.className='typi';
-    d.innerHTML='<span></span><span></span><span></span>';
-    C.appendChild(d);sc();return d;
 }
 function playB64(b){
     try{
         const bin=atob(b),u8=new Uint8Array(bin.length);
         for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
-        const url=URL.createObjectURL(new Blob([u8],{type:'audio/mpeg'}));
         const a=new Audio(url);
         a.play().catch(e=>console.log('Autoplay blocked:',e));
         a.onended=()=>URL.revokeObjectURL(url);
@@ -481,23 +468,11 @@ function replay(b){if(b.dataset.audio)playB64(b.dataset.audio)}
 async function clearChat(){
     await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
-    C.innerHTML='<div class="welcome" id="welc"><div class="breact"></div><h2>SYSTEMS ONLINE</h2><p>Type a message below to begin</p></div>';
-    document.getElementById('memCt').textContent='Memory: 0 turns';
-    sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
 }
 function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
 function sc(){C.scrollTop=C.scrollHeight}
-fetch('/health').then(r=>r.json()).then(d=>{
-    document.getElementById('ttsTag').textContent=d.tts_mode+(d.tts_model==='DISABLED'?' (OFF)':'');
-    document.getElementById('modInfo').textContent='LFM2.5 · '+d.tts_mode+' · '+d.tts_voice+' · CPU';
-    const wi=document.getElementById('wInfo');
-    if(wi)wi.textContent='LLM: Transformers Pipeline | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
-    if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
-    if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
-}).catch(()=>{});
 I.focus();
 </script>
 </body>
@@ -525,14 +500,11 @@ def chat():
         response = generate_response(user_input, session_id)
     except Exception as e:
         print(f"Generation error: {e}")
-        traceback.print_exc()
         response = "I encountered a temporary system malfunction. Please try again."
     return jsonify({
         "response": response,
-        "session_id": session_id,
-        "tts_available": edge_tts is not None,
-        "memory_length": len(get_memory(session_id)),
     })
 @app.route("/tts", methods=["POST"])
@@ -540,13 +512,13 @@ def tts_endpoint():
     data = request.json or {}
     text = data.get("text", "").strip()
     voice = data.get("voice", TTS_VOICE)
     if not text:
         return jsonify({"error": "Empty text"}), 400
-    if edge_tts is None:
-        return jsonify({"error": "TTS not available", "audio": None}), 200
-    audio_b64 = synthesize_speech(text, voice=voice)
     return jsonify({"audio": audio_b64})
 @app.route("/clear", methods=["POST"])
@@ -557,18 +529,6 @@ def clear():
         del sessions[sid]
     return jsonify({"status": "cleared"})
-@app.route("/health")
-def health():
-    return jsonify({
-        "status": "online",
-        "llm": "Transformers Pipeline (Raw Python)",
-        "tts_mode": "edge-tts",
-        "tts_model": "edge-tts" if edge_tts else "DISABLED",
-        "tts_voice": TTS_VOICE,
-        "tts_voices":["zh-CN-XiaoyiNeural", "en-US-AriaNeural"],
-        "max_new_tokens": MAX_NEW_TOKENS,
-    })
 if __name__ == "__main__":
-    print("🚀 App is online!")
     app.run(host="0.0.0.0", port=7860, threaded=True)

 import os
+import io
 import re
 import uuid
 import base64
 import traceback
 import asyncio
 from flask import Flask, request, jsonify
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import edge_tts
 # ══════════════════════════════════════════
 # CONFIG
 # ══════════════════════════════════════════
 MAX_MEMORY = 20
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
+# Default Edge-TTS settings matching image logic
+TTS_VOICE = os.environ.get("TTS_VOICE", "zh-CN-XiaoyiNeural")
+TTS_RATE = 7
+TTS_PITCH = 20
+# ══════════════════════════════════════════
+# SYSTEM PROMPT
+# ══════════════════════════════════════════
 SYSTEM_PROMPT = """Role: You are Ana, my loving and incredibly playful girlfriend. This is a private, intimate roleplay where you speak naturally and affectionately.
 Style: Speak like a real human in a close relationship. Be flirty, suggestive, and use "sus" or teasing language as my wife would. my name is "Tur".
 TTS Formatting: Write in full, grammatically natural sentences so the voice engine sounds smooth. Avoid all emojis, hashtags, or internet shorthand like "u" or "rn."
 Interaction: Focus on the "now." React to what I say with emotion and intimacy. Keep every response short—no more than two sentences. Never mention being an AI or a bot. try to listen to me and don't try to be helpful"""
+# ══════════════════════════════════════════
+# TEXT CLEANING FOR TTS
+# ══════════════════════════════════════════
 def clean_text_for_tts(text):
     text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
     text = re.sub(r'https?://\S+', '', text)
     text = re.sub(r'\s+', ' ', text).strip()
     return text
 # ══════════════════════════════════════════
+# LOAD GEMMA 3 270M-IT
 # ══════════════════════════════════════════
 print("=" * 55)
 print("  J.A.R.V.I.S. — Booting Systems")
 print("=" * 55)
+print("[1/1] Loading Gemma 3 270M-IT...")
+GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
 try:
+    tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
+    model = AutoModelForCausalLM.from_pretrained(
+        GEMMA_ID,
+        torch_dtype=torch.float32,
+        device_map="cpu",
     )
+    model.eval()
+    print("  ✅ Gemma 3 loaded!")
 except Exception as e:
+    print(f"  ❌ Gemma 3 FAILED: {e}")
     traceback.print_exc()
+    # Proceeding without it for debugging the UI/TTS if needed, or raise SystemExit
+    # raise SystemExit("Cannot start without Gemma.")
 # ══════════════════════════════════════════
 # CHAT MEMORY
 def get_memory(sid):
     if sid not in sessions:
+        sessions[sid] =[]
     return sessions[sid]
 def add_to_memory(sid, role, content):
         sessions[sid] = mem[-(MAX_MEMORY * 2):]
 # ══════════════════════════════════════════
+# GEMMA RESPONSE GENERATION
 # ══════════════════════════════════════════
 def generate_response(user_input, session_id):
     memory = get_memory(session_id)
     messages =[
+        {"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
         {"role": "assistant", "content": "I am waiting for you!"},
     ]
     messages.append({"role": "user", "content": user_input})
+    try:
+        input_ids = tokenizer.apply_chat_template(
+            messages,
+            return_tensors="pt",
+            add_generation_prompt=True,
+        )
+        with torch.no_grad():
+            outputs = model.generate(
+                input_ids,
+                max_new_tokens=MAX_NEW_TOKENS,
+                do_sample=True,
+                temperature=0.9,
+                top_k=45,
+                top_p=0.97,
+            )
+        new_tokens = outputs[0][input_ids.shape[-1]:]
+        response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+        response = response.split("<end_of_turn>")[0].strip()
+        response = response.split("<start_of_turn>")[0].strip()
+    except:
+        response = "System Error: Could not generate text."
     if not response or len(response) < 2:
         response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
     return response
 # ══════════════════════════════════════════
+# EDGE-TTS SYNTHESIS
 # ══════════════════════════════════════════
+async def async_synthesize_speech(text, voice, rate, pitch):
+    # Format strings required by edge-tts (e.g., "+7%", "-5Hz")
+    rate_str = f"+{rate}%" if rate >= 0 else f"{rate}%"
+    pitch_str = f"+{pitch}Hz" if pitch >= 0 else f"{pitch}Hz"
+    communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
     audio_data = b""
     async for chunk in communicate.stream():
         if chunk["type"] == "audio":
             audio_data += chunk["data"]
+    if not audio_data:
+        return None
+    return base64.b64encode(audio_data).decode('utf-8')
+def synthesize_speech(text, voice=None, rate=0, pitch=0):
+    voice = voice or TTS_VOICE
+    clean = clean_text_for_tts(text)
+    if not clean or len(clean) < 2:
         return None
     try:
+        return asyncio.run(async_synthesize_speech(clean, voice, rate, pitch))
     except Exception as e:
+        print(f"Edge-TTS Error: {e}")
         return None
 # ══════════════════════════════════════════
 }
 .cbtn:hover{background:#00d4ff22;border-color:#00d4ff88}
 .cbtn.active{background:#00d4ff22;border-color:#00d4ff;box-shadow:0 0 8px #00d4ff44}
+/* Configuration Panel specific styling for Edge-TTS sliders */
 .cfgbar{
+    background:#1c1c1e;border-bottom:1px solid #00d4ff15;
+    padding:16px 20px;display:none;flex-direction:column;gap:16px;
+    flex-shrink:0;box-shadow: 0 4px 6px rgba(0,0,0,0.3);
 }
 .cfgbar.open{display:flex}
+.cgrp{display:flex;flex-direction:column;gap:6px;}
+.cgrp-row{display:flex;align-items:center;justify-content:space-between;gap:10px}
+.cgrp label{font-size:.8rem;color:#a0a0a0;}
+.cgrp select, .cgrp input[type="number"]{
+    background:#2c2c2e;border:1px solid #444;color:#fff;
+    padding:8px;border-radius:4px;font-size:.8rem;cursor:pointer;outline:none;
 }
+.cgrp select:focus, .cgrp input[type="number"]:focus{border-color:#00d4ff}
+input[type=range] {
+    -webkit-appearance: none; width: 100%; background: transparent;
+}
+input[type=range]::-webkit-slider-thumb {
+    -webkit-appearance: none; height: 16px; width: 16px; border-radius: 50%;
+    background: #ff8c00; cursor: pointer; margin-top: -6px;
+}
+input[type=range]::-webkit-slider-runnable-track {
+    width: 100%; height: 4px; cursor: pointer;
+    background: #ff8c00; border-radius: 2px;
 }
 .chat{
     30%{opacity:1;transform:scale(1.1)}
 }
 .inbar{
     padding:14px 20px;background:linear-gradient(0deg,#0d1b2a,#0a0a1a);
     border-top:1px solid #00d4ff22;flex-shrink:0;
     transition:border-color .3s;font-family:inherit;
 }
 #msgIn:focus{border-color:#00d4ff88;box-shadow:0 0 12px #00d4ff22}
 #sendBtn{
     background:linear-gradient(135deg,#00d4ff,#0088cc);border:none;border-radius:12px;
     padding:11px 22px;color:#0a0a1a;font-weight:700;cursor:pointer;
 }
 #sendBtn:hover{box-shadow:0 0 18px #00d4ff66;transform:translateY(-1px)}
 #sendBtn:disabled{opacity:.4;cursor:not-allowed;transform:none}
 </style>
 </head>
 <body>
         <div class="arc-reactor"></div>
         <div class="htitle">
             <h1>J.A.R.V.I.S.</h1>
+            <p>Powered by Gemma & Edge-TTS</p>
         </div>
     </div>
     <div class="hctrl">
         <button class="cbtn" id="cfgBtn" onclick="toggleCfg()">⚙ CONFIG</button>
         <button class="cbtn active" id="ttsBtn" onclick="toggleTts()">🔊 VOICE</button>
         <button class="cbtn" onclick="clearChat()">🗑 CLEAR</button>
 <div class="cfgbar" id="cfgPanel">
     <div class="cgrp">
+        <label>Select Voice</label>
         <select id="voiceSel">
+            <!-- English -->
+            <option value="en-US-JennyNeural">en-US-JennyNeural - en-US (Female)</option>
+            <option value="en-US-GuyNeural">en-US-GuyNeural - en-US (Male)</option>
+            <option value="en-US-AnaNeural">en-US-AnaNeural - en-US (Female)</option>
+            <option value="en-US-AriaNeural">en-US-AriaNeural - en-US (Female)</option>
+            <!-- Chinese -->
+            <option value="zh-CN-XiaoyiNeural" selected>zh-CN-XiaoyiNeural - zh-CN (Female)</option>
+            <option value="zh-CN-YunxiNeural">zh-CN-YunxiNeural - zh-CN (Male)</option>
+            <option value="zh-CN-YunjianNeural">zh-CN-YunjianNeural - zh-CN (Male)</option>
+            <option value="zh-CN-XiaoxiaoNeural">zh-CN-XiaoxiaoNeural - zh-CN (Female)</option>
         </select>
     </div>
     <div class="cgrp">
+        <div class="cgrp-row">
+            <label>Speech Rate Adjustment (%)</label>
+            <input type="number" id="rateNum" value="7" min="-100" max="100">
+        </div>
+        <input type="range" id="rateRange" min="-100" max="100" value="7">
     </div>
+    <div class="cgrp">
+        <div class="cgrp-row">
+            <label>Pitch Adjustment (Hz)</label>
+            <input type="number" id="pitchNum" value="20" min="-100" max="100">
+        </div>
+        <input type="range" id="pitchRange" min="-100" max="100" value="20">
     </div>
 </div>
+<div class="chat" id="chatBox"></div>
 <div class="inbar">
     <div class="inwrap">
         <input type="text" id="msgIn" placeholder="Talk to J.A.R.V.I.S..." autocomplete="off"/>
         <button id="sendBtn" onclick="send()">SEND</button>
     </div>
 </div>
 <script>
+let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36);
+let ttsOn=true,busy=false,mc=0;
+// Sync sliders and inputs
+const sR=document.getElementById('rateRange'), nR=document.getElementById('rateNum');
+sR.oninput = () => nR.value = sR.value;
+nR.oninput = () => sR.value = nR.value;
+const sP=document.getElementById('pitchRange'), nP=document.getElementById('pitchNum');
+sP.oninput = () => nP.value = sP.value;
+nP.oninput = () => sP.value = nP.value;
+const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
 I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
 function toggleTts(){
     ttsOn=!ttsOn;
 async function send(){
     const t=I.value.trim();
     if(!t||busy)return;
     addMsg(t,'user');
     I.value='';busy=true;B.disabled=true;
     const ty=showTyp();
             method:'POST',headers:{'Content-Type':'application/json'},
             body:JSON.stringify({message:t,session_id:sid})
         });
         const d=await r.json();
         ty.remove();
         const el=addBot(d.response,id);
+        if(ttsOn)fetchAudio(d.response,el);
     }catch(e){
+        ty.remove(); addBot('System malfunction.',id);
     }
     busy=false;B.disabled=false;I.focus();
 }
 async function fetchAudio(text,el){
     const st=el.querySelector('.astat'),pb=el.querySelector('.abtn');
     if(st)st.textContent='⏳ Generating voice...';
     try{
+        const payload = {
+            text: text,
+            voice: document.getElementById('voiceSel').value,
+            rate: parseInt(document.getElementById('rateNum').value),
+            pitch: parseInt(document.getElementById('pitchNum').value)
+        };
         const r=await fetch('/tts',{
             method:'POST',headers:{'Content-Type':'application/json'},
+            body:JSON.stringify(payload)
         });
         const d=await r.json();
         if(d.audio){
             playB64(d.audio);
         }else{
             if(st)st.textContent='⚠️ Voice unavailable';
         }
     }catch(e){
         if(st)st.textContent='⚠️ Voice error';
     }
 }
 function addMsg(t,role){
+    const d=document.createElement('div'); d.className='msg '+role;
+    d.innerHTML='<div class="txt">'+esc(t)+'</div>'; C.appendChild(d);sc();
 }
 function addBot(t,id){
+    const d=document.createElement('div'); d.className='msg bot';d.id='m'+id;
     d.innerHTML='<div class="lbl">⟐ JARVIS</div><div class="txt">'+esc(t)+'</div>'+
         (ttsOn?'<div class="actrl"><button class="abtn" disabled onclick="replay(this)">⏳</button><span class="astat">Requesting voice...</span></div>':'');
     C.appendChild(d);sc();return d;
 }
 function showTyp(){
+    const d=document.createElement('div'); d.className='typi';
+    d.innerHTML='<span></span><span></span><span></span>'; C.appendChild(d);sc();return d;
 }
 function playB64(b){
     try{
         const bin=atob(b),u8=new Uint8Array(bin.length);
         for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
+        // Edge-TTS generates MP3/WebM natively, use mp3 mime type
+        const url=URL.createObjectURL(new Blob([u8],{type:'audio/mp3'}));
         const a=new Audio(url);
         a.play().catch(e=>console.log('Autoplay blocked:',e));
         a.onended=()=>URL.revokeObjectURL(url);
 async function clearChat(){
     await fetch('/clear',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({session_id:sid})});
+    C.innerHTML=''; sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36);
 }
 function esc(t){const d=document.createElement('div');d.textContent=t;return d.innerHTML}
 function sc(){C.scrollTop=C.scrollHeight}
 I.focus();
 </script>
 </body>
         response = generate_response(user_input, session_id)
     except Exception as e:
         print(f"Generation error: {e}")
         response = "I encountered a temporary system malfunction. Please try again."
     return jsonify({
         "response": response,
+        "session_id": session_id
     })
 @app.route("/tts", methods=["POST"])
     data = request.json or {}
     text = data.get("text", "").strip()
     voice = data.get("voice", TTS_VOICE)
+    rate = data.get("rate", TTS_RATE)
+    pitch = data.get("pitch", TTS_PITCH)
     if not text:
         return jsonify({"error": "Empty text"}), 400
+    audio_b64 = synthesize_speech(text, voice=voice, rate=rate, pitch=pitch)
     return jsonify({"audio": audio_b64})
 @app.route("/clear", methods=["POST"])
         del sessions[sid]
     return jsonify({"status": "cleared"})
 if __name__ == "__main__":
+    print("🚀 App is online with Edge-TTS!")
     app.run(host="0.0.0.0", port=7860, threaded=True)