Ana

Paused

App Files Files Community

OrbitMC commited on 10 days ago

Commit

748b20d

verified ·

1 Parent(s): ecbb6cb

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -115

app.py CHANGED Viewed

@@ -1,38 +1,20 @@
 import os
-import io
 import re
 import uuid
 import base64
 import datetime
 import traceback
-import numpy as np
-import soundfile as sf
 from flask import Flask, request, jsonify
 from num2words import num2words
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # ══════════════════════════════════════════
 # CONFIG
 # ══════════════════════════════════════════
-# TTS Options (switch via env var TTS_MODE):
-#   nano-fp32  → 15M params, 56MB  (fastest, default)
-#   nano-int8  → 15M params, 25MB  (smallest)
-#   micro      → 40M params, 41MB  (balanced)
-#   mini       → 80M params, 80MB  (best quality)
-TTS_MODE = os.environ.get("TTS_MODE", "nano-fp32")
-TTS_MODEL_MAP = {
-    "nano-fp32": "KittenML/kitten-tts-nano-0.8-fp32",
-    "nano-int8": "KittenML/kitten-tts-nano-0.8-int8",
-    "micro":     "KittenML/kitten-tts-micro-0.8",
-    "mini":      "KittenML/kitten-tts-mini-0.8",
-}
-# Voice: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
-TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
-TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.15"))
 MAX_MEMORY = 20
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
@@ -60,51 +42,40 @@ def clean_text_for_tts(text):
     return text
 # ══════════════════════════════════════════
-# LOAD GEMMA 3 270M-IT
 # ══════════════════════════════════════════
 print("=" * 55)
 print("  J.A.R.V.I.S. — Booting Systems")
 print("=" * 55)
-print("[1/2] Loading Gemma 3 270M-IT...")
-GEMMA_ID = "LiquidAI/LFM2.5-1.2B-Instruct"
 try:
-    tokenizer = AutoTokenizer.from_pretrained(GEMMA_ID)
-    model = AutoModelForCausalLM.from_pretrained(
-        GEMMA_ID,
-        torch_dtype=torch.float32,
-        device_map="cpu",
-    )
-    model.eval()
-    print("  ✅ Gemma 3 270M-IT loaded!")
 except Exception as e:
-    print(f"  ❌ Gemma 3 FAILED: {e}")
-    traceback.print_exc()
-    raise SystemExit("Cannot start without Gemma. Check HF_TOKEN and license agreement.")
-# ══════════════════════════════════════════
-# LOAD KITTENTTS
-# ══════════════════════════════════════════
-tts = None
-tts_model_name = TTS_MODEL_MAP.get(TTS_MODE, TTS_MODEL_MAP["nano-fp32"])
-print(f"[2/2] Loading KittenTTS: {TTS_MODE} → {tts_model_name}...")
 try:
-    from kittentts import KittenTTS
-    tts = KittenTTS(tts_model_name)
-    test_audio = tts.generate("online", voice=TTS_VOICE, speed=TTS_SPEED)
-    if test_audio is not None and len(test_audio) > 0:
-        print(f"  ✅ KittenTTS ready. Model: {TTS_MODE} | Voice: {TTS_VOICE}")
-    else:
-        print("  ⚠️  KittenTTS test returned empty audio!")
-        tts = None
-except Exception as e:
-    print(f"  ⚠️  KittenTTS FAILED: {e}")
-    tts = None
 print("=" * 55)
-print(f"  LLM  : Gemma 3 270M-IT")
-print(f"  TTS  : {TTS_MODE} ({'READY' if tts else 'DISABLED'})")
-print(f"  Voice: {TTS_VOICE} | Speed: {TTS_SPEED}")
 print(f"  Max tokens: {MAX_NEW_TOKENS}")
 print("=" * 55)
@@ -129,14 +100,14 @@ def add_to_memory(sid, role, content):
         sessions[sid] = mem[-(MAX_MEMORY * 2):]
 # ══════════════════════════════════════════
-# GEMMA RESPONSE GENERATION
 # ══════════════════════════════════════════
 def generate_response(user_input, session_id):
     memory = get_memory(session_id)
-    # Build chat messages: system instruction → memory → new message
-    messages = [
-        {"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
         {"role": "assistant", "content": "I am waiting for you!"},
     ]
@@ -149,31 +120,18 @@ def generate_response(user_input, session_id):
     # Current user message
     messages.append({"role": "user", "content": user_input})
-    # Tokenize with Gemma chat template
-    input_ids = tokenizer.apply_chat_template(
         messages,
-        return_tensors="pt",
-        add_generation_prompt=True,
     )
-    # Generate
-    with torch.no_grad():
-        outputs = model.generate(
-            input_ids,
-            max_new_tokens=MAX_NEW_TOKENS,
-            do_sample=True,
-            temperature=0.9,
-            top_k=45,
-            top_p=0.97,
-        )
-    # Decode only new tokens
-    new_tokens = outputs[0][input_ids.shape[-1]:]
-    response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
-    # Clean artifacts
-    response = response.split("<end_of_turn>")[0].strip()
-    response = response.split("<start_of_turn>")[0].strip()
     if not response or len(response) < 2:
         response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
@@ -183,10 +141,19 @@ def generate_response(user_input, session_id):
     return response
 # ══════════════════════════════════════════
-# TTS SYNTHESIS
 # ══════════════════════════════════════════
 def synthesize_speech(text, voice=None):
-    if tts is None:
         return None
     try:
         voice = voice or TTS_VOICE
@@ -195,13 +162,13 @@ def synthesize_speech(text, voice=None):
             return None
         if len(clean) > 400:
             clean = clean[:400]
-        audio = tts.generate(clean, voice=voice, speed=TTS_SPEED)
-        if audio is None or len(audio) == 0:
             return None
-        buf = io.BytesIO()
-        sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
-        buf.seek(0)
-        return base64.b64encode(buf.read()).decode('utf-8')
     except Exception as e:
         print(f"TTS Error: {e}")
         return None
@@ -389,31 +356,23 @@ body{
 <div class="cfgbar" id="cfgPanel">
     <div class="cgrp">
         <label>LLM:</label>
-        <span class="ctag">Gemma 3 270M-IT</span>
     </div>
     <div class="cgrp">
         <label>TTS:</label>
-        <span class="ctag" id="ttsTag">loading...</span>
     </div>
     <div class="cgrp">
         <label>Voice:</label>
         <select id="voiceSel">
-            <option value="Kiki">Kiki</option>
-            <option value="Bella">Bella</option>
-            <option value="Jasper">Jasper</option>
-            <option value="Luna">Luna</option>
-            <option value="Bruno">Bruno</option>
-            <option value="Rosie">Rosie</option>
-            <option value="Hugo">Hugo</option>
-            <option value="Leo">Leo</option>
         </select>
     </div>
     <div class="cgrp">
-        <label>TTS env options:</label>
-        <span class="ctag">nano-fp32</span>
-        <span class="ctag">nano-int8</span>
-        <span class="ctag">micro</span>
-        <span class="ctag">mini</span>
     </div>
 </div>
@@ -439,7 +398,7 @@ body{
 <script>
 let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
-let ttsOn=true,busy=false,mc=0,voice='Kiki';
 const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
 I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
@@ -532,7 +491,8 @@ function playB64(b){
     try{
         const bin=atob(b),u8=new Uint8Array(bin.length);
         for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
-        const url=URL.createObjectURL(new Blob([u8],{type:'audio/wav'}));
         const a=new Audio(url);
         a.play().catch(e=>console.log('Autoplay blocked:',e));
         a.onended=()=>URL.revokeObjectURL(url);
@@ -552,9 +512,9 @@ function sc(){C.scrollTop=C.scrollHeight}
 fetch('/health').then(r=>r.json()).then(d=>{
     document.getElementById('ttsTag').textContent=d.tts_mode+(d.tts_model==='DISABLED'?' (OFF)':'');
-    document.getElementById('modInfo').textContent='Gemma 3 · '+d.tts_mode+' · '+d.tts_voice+' · CPU';
     const wi=document.getElementById('wInfo');
-    if(wi)wi.textContent='LLM: Gemma 3 270M-IT | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
     if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
     if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
 }).catch(()=>{});
@@ -592,7 +552,7 @@ def chat():
     return jsonify({
         "response": response,
         "session_id": session_id,
-        "tts_available": tts is not None,
         "memory_length": len(get_memory(session_id)),
     })
@@ -604,7 +564,7 @@ def tts_endpoint():
     if not text:
         return jsonify({"error": "Empty text"}), 400
-    if tts is None:
         return jsonify({"error": "TTS not available", "audio": None}), 200
     audio_b64 = synthesize_speech(text, voice=voice)
@@ -622,11 +582,11 @@ def clear():
 def health():
     return jsonify({
         "status": "online",
-        "llm": "Gemma 3 270M-IT",
-        "tts_mode": TTS_MODE,
-        "tts_model": tts_model_name if tts else "DISABLED",
         "tts_voice": TTS_VOICE,
-        "tts_voices": ["Bella","Jasper","Luna","Bruno","Rosie","Hugo","Kiki","Leo"],
         "max_new_tokens": MAX_NEW_TOKENS,
     })

 import os
 import re
 import uuid
 import base64
 import datetime
 import traceback
+import asyncio
 from flask import Flask, request, jsonify
 from num2words import num2words
+from transformers import pipeline
 # ══════════════════════════════════════════
 # CONFIG
 # ══════════════════════════════════════════
+# edge-tts Options
+TTS_VOICE = os.environ.get("TTS_VOICE", "zh-CN-XiaoyiNeural")
 MAX_MEMORY = 20
 MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
     return text
 # ══════════════════════════════════════════
+# LOAD UNSLOTH GGUF & EDGE-TTS
 # ══════════════════════════════════════════
 print("=" * 55)
 print("  J.A.R.V.I.S. — Booting Systems")
 print("=" * 55)
+print("[1/2] Loading LFM2.5 1.2B Instruct GGUF via pipeline...")
+LLM_ID = "unsloth/LFM2.5-1.2B-Instruct-GGUF"
 try:
+    # We attempt to load a standard quant first to save RAM if multiple exist
+    pipe = pipeline("text-generation", model=LLM_ID, device_map="cpu", model_kwargs={"gguf_file": "*Q4_K_M.gguf"})
+    print(f"  ✅ {LLM_ID} loaded with *Q4_K_M.gguf!")
 except Exception as e:
+    print(f"  ⚠️ Pipeline load failed with specific gguf_file, trying default auto-load... ({e})")
+    try:
+        pipe = pipeline("text-generation", model=LLM_ID, device_map="cpu")
+        print(f"  ✅ {LLM_ID} loaded with default!")
+    except Exception as e2:
+        print(f"  ❌ Model FAILED completely: {e2}")
+        traceback.print_exc()
+        raise SystemExit("Cannot start without LLM. Check HF_TOKEN and GGUF compatibility.")
+print("[2/2] Loading edge-tts...")
 try:
+    import edge_tts
+    print(f"  ✅ edge-tts ready. Default Voice: {TTS_VOICE}")
+except ImportError as e:
+    print(f"  ❌ edge-tts FAILED: {e}")
+    edge_tts = None
 print("=" * 55)
+print(f"  LLM  : {LLM_ID}")
+print(f"  TTS  : edge-tts ({'READY' if edge_tts else 'DISABLED'})")
+print(f"  Voice: {TTS_VOICE} | Rate: +7% | Pitch: +20Hz")
 print(f"  Max tokens: {MAX_NEW_TOKENS}")
 print("=" * 55)
         sessions[sid] = mem[-(MAX_MEMORY * 2):]
 # ══════════════════════════════════════════
+# RESPONSE GENERATION
 # ══════════════════════════════════════════
 def generate_response(user_input, session_id):
     memory = get_memory(session_id)
+    # Build chat messages
+    messages =[
+        {"role": "system", "content": SYSTEM_PROMPT},
         {"role": "assistant", "content": "I am waiting for you!"},
     ]
     # Current user message
     messages.append({"role": "user", "content": user_input})
+    # Generate via pipeline
+    outputs = pipe(
         messages,
+        max_new_tokens=MAX_NEW_TOKENS,
+        do_sample=True,
+        temperature=0.9,
+        top_k=45,
+        top_p=0.97,
     )
+    # Extract the assistant's newly generated text
+    response = outputs[0]["generated_text"][-1]["content"].strip()
     if not response or len(response) < 2:
         response = "I appear to have momentarily lost my train of thought. Could you rephrase that?"
     return response
 # ══════════════════════════════════════════
+# TTS SYNTHESIS (EDGE-TTS)
 # ══════════════════════════════════════════
+async def _synthesize_edge(text, voice):
+    # Applied specific settings from your image UI: rate +7%, pitch +20Hz
+    communicate = edge_tts.Communicate(text, voice, rate="+7%", pitch="+20Hz")
+    audio_data = b""
+    async for chunk in communicate.stream():
+        if chunk["type"] == "audio":
+            audio_data += chunk["data"]
+    return audio_data
 def synthesize_speech(text, voice=None):
+    if edge_tts is None:
         return None
     try:
         voice = voice or TTS_VOICE
             return None
         if len(clean) > 400:
             clean = clean[:400]
+        audio_bytes = asyncio.run(_synthesize_edge(clean, voice))
+        if not audio_bytes or len(audio_bytes) == 0:
             return None
+        return base64.b64encode(audio_bytes).decode('utf-8')
     except Exception as e:
         print(f"TTS Error: {e}")
         return None
 <div class="cfgbar" id="cfgPanel">
     <div class="cgrp">
         <label>LLM:</label>
+        <span class="ctag">LFM2.5-1.2B-Instruct-GGUF</span>
     </div>
     <div class="cgrp">
         <label>TTS:</label>
+        <span class="ctag" id="ttsTag">edge-tts</span>
     </div>
     <div class="cgrp">
         <label>Voice:</label>
         <select id="voiceSel">
+            <option value="zh-CN-XiaoyiNeural">Xiaoyi (zh-CN) Female</option>
+            <option value="en-US-AriaNeural">Aria (en-US) Female</option>
         </select>
     </div>
     <div class="cgrp">
+        <label>Settings:</label>
+        <span class="ctag">Rate: +7%</span>
+        <span class="ctag">Pitch: +20Hz</span>
     </div>
 </div>
 <script>
 let sid=crypto.randomUUID?crypto.randomUUID():Date.now().toString(36)+Math.random().toString(36).slice(2);
+let ttsOn=true,busy=false,mc=0,voice='zh-CN-XiaoyiNeural';
 const C=document.getElementById('chatBox'),I=document.getElementById('msgIn'),B=document.getElementById('sendBtn');
 I.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send()}});
     try{
         const bin=atob(b),u8=new Uint8Array(bin.length);
         for(let i=0;i<bin.length;i++)u8[i]=bin.charCodeAt(i);
+        // Using audio/mpeg as edge-tts outputs MP3 chunks
+        const url=URL.createObjectURL(new Blob([u8],{type:'audio/mpeg'}));
         const a=new Audio(url);
         a.play().catch(e=>console.log('Autoplay blocked:',e));
         a.onended=()=>URL.revokeObjectURL(url);
 fetch('/health').then(r=>r.json()).then(d=>{
     document.getElementById('ttsTag').textContent=d.tts_mode+(d.tts_model==='DISABLED'?' (OFF)':'');
+    document.getElementById('modInfo').textContent=d.llm+' · '+d.tts_mode+' · '+d.tts_voice+' · CPU';
     const wi=document.getElementById('wInfo');
+    if(wi)wi.textContent='LLM: '+d.llm+' | TTS: '+d.tts_mode+' | Voice: '+d.tts_voice;
     if(d.tts_model==='DISABLED')document.getElementById('sDot').classList.add('err');
     if(d.tts_voice){document.getElementById('voiceSel').value=d.tts_voice;voice=d.tts_voice}
 }).catch(()=>{});
     return jsonify({
         "response": response,
         "session_id": session_id,
+        "tts_available": edge_tts is not None,
         "memory_length": len(get_memory(session_id)),
     })
     if not text:
         return jsonify({"error": "Empty text"}), 400
+    if edge_tts is None:
         return jsonify({"error": "TTS not available", "audio": None}), 200
     audio_b64 = synthesize_speech(text, voice=voice)
 def health():
     return jsonify({
         "status": "online",
+        "llm": "unsloth/LFM2.5-1.2B-Instruct-GGUF",
+        "tts_mode": "edge-tts",
+        "tts_model": "edge-tts" if edge_tts else "DISABLED",
         "tts_voice": TTS_VOICE,
+        "tts_voices":["zh-CN-XiaoyiNeural", "en-US-AriaNeural"],
         "max_new_tokens": MAX_NEW_TOKENS,
     })