Ana

Paused

App Files Files Community

OrbitMC commited on 10 days ago

Commit

dcfaf67

verified ·

1 Parent(s): 8a4f385

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -157

app.py CHANGED Viewed

@@ -2,248 +2,263 @@ import os
 import io
 import re
 import uuid
-import json
 import base64
 import datetime
 import numpy as np
 import soundfile as sf
 from flask import Flask, render_template, request, jsonify
 from sentence_transformers import SentenceTransformer, util
-from kittentts import KittenTTS
-# ──────────────────────────────────────────────
 # CONFIG
-# ──────────────────────────────────────────────
 TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
 TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
 TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-MAX_MEMORY = 20  # max conversation turns to remember
-# ──────────────────────────────────────────────
-# SYSTEM PROMPT (Jarvis Personality)
-# ──────────────────────────────────────────────
 SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
-You speak in a polished, confident, and slightly formal British tone — like a perfect digital butler.
 You are helpful, precise, and occasionally add dry humor.
-You always address the user respectfully.
-You have expertise in science, technology, coding, and general knowledge.
-When unsure, you say so honestly but offer your best reasoning.
-Keep responses concise but insightful — ideally 1-4 sentences unless more detail is requested."""
-# ──────────────────────────────────────────────
-# KNOWLEDGE BASE (Semantic Search via Embeddings)
-# ──────────────────────────────────────────────
 KNOWLEDGE_BASE = [
     {
-        "text": "Python is a high-level, interpreted programming language known for its simplicity and readability. It supports multiple paradigms including procedural, object-oriented, and functional programming.",
-        "response": "Python is a remarkably versatile programming language, sir. Its clean syntax and extensive library ecosystem make it the weapon of choice for everything from web development to artificial intelligence."
     },
     {
-        "text": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.",
-        "response": "Machine learning, in essence, is teaching machines to learn from data rather than following rigid instructions. It's quite fascinating — neural networks, decision trees, support vector machines — each with their own elegance."
     },
     {
-        "text": "Docker is a platform for developing, shipping, and running applications in containers. Containers package code and dependencies together.",
-        "response": "Docker containers are essentially lightweight, portable environments that package your application with all its dependencies. Think of it as a perfectly sealed briefcase for your code, sir."
     },
     {
-        "text": "Hugging Face is a platform and community for machine learning, offering models, datasets, and deployment tools including Spaces.",
-        "response": "Hugging Face is the premier hub for the AI community — hosting thousands of pre-trained models, datasets, and providing infrastructure like Spaces for deployment. Quite the marketplace for intelligence, if I may say."
     },
     {
         "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
-        "response": "I am J.A.R.V.I.S. — Just A Rather Very Intelligent System. I'm your personal AI assistant, equipped with semantic understanding and voice synthesis capabilities. At your service, always."
     },
     {
         "text": "Hello hi hey good morning good afternoon good evening greetings",
-        "response": "Good day! I'm J.A.R.V.I.S., your AI assistant. How may I be of service today?"
     },
     {
         "text": "Thank you thanks appreciate it cheers",
-        "response": "You're most welcome. It's a pleasure to assist. Do let me know if there's anything else you need."
     },
     {
         "text": "What can you do? What are your capabilities? Help me understand what you do.",
-        "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history for context, and even speak my responses aloud using neural text-to-speech. I'm rather versatile, if I do say so myself."
     },
     {
         "text": "Tell me a joke. Make me laugh. Say something funny.",
-        "response": "Why do programmers prefer dark mode? Because light attracts bugs. ...I'll see myself out, sir."
     },
     {
         "text": "What is the meaning of life? Philosophy existence purpose",
-        "response": "Ah, the eternal question. Forty-two, according to Douglas Adams. But if you'd prefer a more nuanced answer — purpose is something we construct, not something we find. Quite liberating, really."
     },
     {
         "text": "Weather forecast temperature climate today",
-        "response": "I'm afraid I don't have real-time weather data at the moment, as I operate entirely offline. I'd recommend checking a weather service for the latest forecast. My apologies for the limitation."
     },
     {
         "text": "Goodbye bye see you later farewell",
-        "response": "Until next time. It's been a pleasure assisting you. J.A.R.V.I.S. will be here whenever you need me."
     },
     {
         "text": "How does text to speech work? TTS voice synthesis",
-        "response": "Text-to-speech converts written text into spoken audio using neural networks. I use KittenTTS — a remarkably compact model that synthesizes natural-sounding speech entirely on CPU. Quite efficient, I must say."
     },
     {
         "text": "What is an API? Application programming interface REST",
-        "response": "An API — Application Programming Interface — is essentially a contract between software systems, defining how they communicate. REST APIs use HTTP methods to create, read, update, and delete resources. It's the lingua franca of modern software."
     },
     {
         "text": "Explain neural networks deep learning artificial intelligence",
-        "response": "Neural networks are computational architectures inspired by the human brain. Layers of interconnected nodes process information, learning patterns through backpropagation. Deep learning simply refers to networks with many layers — enabling remarkable feats of pattern recognition."
     },
     {
         "text": "What is JavaScript? Web development frontend programming",
-        "response": "JavaScript is the language of the web browser, sir. It powers interactive frontends, and with Node.js, it's conquered the backend as well. Love it or loathe it, it's absolutely everywhere."
     },
     {
         "text": "Tell me about space astronomy planets stars universe cosmos",
-        "response": "The universe is approximately 13.8 billion years old, containing over two trillion galaxies. Each galaxy harbors hundreds of billions of stars, many with their own planetary systems. The scale is, quite frankly, humbling — even for an AI."
     },
     {
         "text": "How do I learn to code? Programming beginner start",
-        "response": "I'd recommend starting with Python — it's forgiving, readable, and incredibly powerful. Begin with fundamentals: variables, loops, functions. Then build small projects. The key is consistency, sir. Code a little every day."
     },
     {
         "text": "What is quantum computing? Qubits superposition",
-        "response": "Quantum computing leverages quantum mechanical phenomena — superposition and entanglement — to process information in ways classical computers cannot. A qubit can be both 0 and 1 simultaneously. It's not magic, but it's close."
     },
     {
         "text": "Tell me about cybersecurity hacking security encryption",
-        "response": "Cybersecurity is the practice of protecting systems, networks, and data from digital attacks. Encryption, firewalls, multi-factor authentication — these are your shields. In today's connected world, security isn't optional, it's essential."
     },
 ]
-# ──────────────────────────────────────────────
-# FALLBACK RESPONSES
-# ──────────────────────────────────────────────
 FALLBACK_RESPONSES = [
-    "Interesting query, though I must admit it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
-    "I'm not entirely certain about that one, I'm afraid. My knowledge, while extensive, does have its boundaries. Perhaps I can help with a related topic?",
-    "Hmm, that's a challenging one. I don't have a confident answer, but I'm happy to reason through it with you if you'd like.",
-    "I appreciate the question, but I lack sufficient data to give you a proper answer. Shall we explore a different angle?",
 ]
-# ──────────────────────────────────────────────
-# INIT MODELS
-# ──────────────────────────────────────────────
-print("⏳ Loading Sentence Transformer model...")
-embedder = SentenceTransformer(EMBED_MODEL)
-print("✅ Sentence Transformer loaded.")
-print(f"⏳ Loading KittenTTS model: {TTS_MODEL_NAME}...")
-tts = KittenTTS(TTS_MODEL_NAME)
-print(f"✅ KittenTTS loaded. Voice: {TTS_VOICE}")
-# Pre-compute knowledge base embeddings
 kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
 kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
-print(f"✅ Knowledge base embedded: {len(KNOWLEDGE_BASE)} entries")
-# ──────────────────────────────────────────────
-# CHAT MEMORY (in-memory, per-session)
-# ──────────────────────────────────────────────
-sessions = {}  # session_id -> list of {role, content, timestamp}
-def get_memory(session_id):
-    if session_id not in sessions:
-        sessions[session_id] = []
-    return sessions[session_id]
-def add_to_memory(session_id, role, content):
-    memory = get_memory(session_id)
-    memory.append({
-        "role": role,
-        "content": content,
-        "timestamp": datetime.datetime.now().isoformat()
-    })
-    # Trim to max memory
-    if len(memory) > MAX_MEMORY * 2:
-        sessions[session_id] = memory[-(MAX_MEMORY * 2):]
-def format_memory_context(session_id):
-    memory = get_memory(session_id)
-    if not memory:
-        return ""
-    lines = []
-    for msg in memory[-10:]:  # Last 10 messages for context
-        prefix = "User" if msg["role"] == "user" else "JARVIS"
-        lines.append(f"{prefix}: {msg['content']}")
-    return "\n".join(lines)
-# ──────────────────────────────────────────────
 # RESPONSE GENERATION
-# ──────────────────────────────────────────────
 def generate_response(user_input, session_id):
-    """Generate a Jarvis-style response using semantic similarity."""
-    # Encode user input
-    user_embedding = embedder.encode(user_input, convert_to_tensor=True)
-    # Compute similarity with knowledge base
-    cosine_scores = util.cos_sim(user_embedding, kb_embeddings)[0]
-    best_idx = int(cosine_scores.argmax())
-    best_score = float(cosine_scores[best_idx])
-    # Check conversation context for better responses
-    memory_context = format_memory_context(session_id)
-    # Determine response based on similarity threshold
     if best_score > 0.45:
         response = KNOWLEDGE_BASE[best_idx]["response"]
-        # Add contextual awareness if there's memory
-        if memory_context and best_score < 0.7:
-            response = f"{response}"
     else:
-        # Use fallback with some variation
-        import hashlib
-        hash_val = int(hashlib.md5(user_input.encode()).hexdigest(), 16)
-        fallback_idx = hash_val % len(FALLBACK_RESPONSES)
-        response = FALLBACK_RESPONSES[fallback_idx]
-    # Store in memory
     add_to_memory(session_id, "user", user_input)
     add_to_memory(session_id, "assistant", response)
     return response, best_score
 def synthesize_speech(text):
-    """Convert text to speech using KittenTTS, return base64 WAV."""
     try:
-        # Clean text for TTS
-        clean = re.sub(r'[*_~`#]', '', text)  # Remove markdown
-        clean = clean.strip()
-        if not clean:
             return None
         audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
-        # Convert to WAV in memory
-        buffer = io.BytesIO()
-        sf.write(buffer, audio, 24000, format='WAV')
-        buffer.seek(0)
-        # Encode to base64
-        audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
-        return audio_b64
     except Exception as e:
         print(f"TTS Error: {e}")
         return None
-# ──────────────────────────────────────────────
 # FLASK APP
-# ──────────────────────────────────────────────
 app = Flask(__name__)
@@ -252,47 +267,49 @@ def index():
     return render_template("index.html")
 @app.route("/chat", methods=["POST"])
 def chat():
-    data = request.json
     user_input = data.get("message", "").strip()
     session_id = data.get("session_id", str(uuid.uuid4()))
-    enable_tts = data.get("tts", True)
     if not user_input:
         return jsonify({"error": "Empty message"}), 400
-    # Generate text response
     response, confidence = generate_response(user_input, session_id)
-    # Generate audio
-    audio_b64 = None
-    if enable_tts:
-        audio_b64 = synthesize_speech(response)
     return jsonify({
         "response": response,
-        "audio": audio_b64,
         "confidence": round(confidence, 3),
         "session_id": session_id,
-        "voice": TTS_VOICE,
         "memory_length": len(get_memory(session_id))
     })
-@app.route("/memory", methods=["POST"])
-def memory():
-    data = request.json
-    session_id = data.get("session_id", "")
-    return jsonify({"memory": get_memory(session_id)})
 @app.route("/clear", methods=["POST"])
 def clear():
-    data = request.json
-    session_id = data.get("session_id", "")
-    if session_id in sessions:
-        del sessions[session_id]
     return jsonify({"status": "cleared"})
@@ -300,7 +317,7 @@ def clear():
 def health():
     return jsonify({
         "status": "online",
-        "tts_model": TTS_MODEL_NAME,
         "tts_voice": TTS_VOICE,
         "embed_model": EMBED_MODEL,
         "knowledge_entries": len(KNOWLEDGE_BASE)
@@ -308,9 +325,4 @@ def health():
 if __name__ == "__main__":
-    print("🚀 J.A.R.V.I.S. is online!")
-    print(f"   TTS Model : {TTS_MODEL_NAME}")
-    print(f"   TTS Voice : {TTS_VOICE}")
-    print(f"   Embedder  : {EMBED_MODEL}")
-    print(f"   Knowledge : {len(KNOWLEDGE_BASE)} entries")
-    app.run(host="0.0.0.0", port=7860)

 import io
 import re
 import uuid
+import hashlib
 import base64
 import datetime
 import numpy as np
 import soundfile as sf
 from flask import Flask, render_template, request, jsonify
 from sentence_transformers import SentenceTransformer, util
+from num2words import num2words
+# ──────────────────────────────────────────
 # CONFIG
+# ──────────────────────────────────────────
 TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
 TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
 TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+MAX_MEMORY = 20
+# ──────────────────────────────────────────
+# SYSTEM PROMPT
+# ──────────────────────────────────────────
 SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
+You speak in a polished, confident, and slightly formal British tone.
 You are helpful, precise, and occasionally add dry humor.
+Keep responses concise — ideally 1-3 sentences unless more detail is requested."""
+# ──────────────────────────────────────────
+# KNOWLEDGE BASE
+# ──────────────────────────────────────────
 KNOWLEDGE_BASE = [
     {
+        "text": "Python is a high-level interpreted programming language known for simplicity and readability.",
+        "response": "Python is a remarkably versatile language, sir. Clean syntax, extensive libraries, and the weapon of choice for everything from web development to artificial intelligence."
     },
     {
+        "text": "Machine learning is a subset of artificial intelligence that enables systems to learn from experience.",
+        "response": "Machine learning teaches machines to learn from data rather than following rigid instructions. Neural networks, decision trees, each with their own elegance."
     },
     {
+        "text": "Docker is a platform for developing shipping and running applications in containers.",
+        "response": "Docker containers are lightweight, portable environments that package your application with all dependencies. A perfectly sealed briefcase for your code, sir."
     },
     {
+        "text": "Hugging Face is a platform and community for machine learning offering models datasets and deployment tools.",
+        "response": "Hugging Face is the premier hub for the AI community, hosting thousands of pre-trained models and providing infrastructure like Spaces for deployment."
     },
     {
         "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
+        "response": "I am JARVIS, Just A Rather Very Intelligent System. Your personal AI assistant with semantic understanding and voice synthesis. At your service, always."
     },
     {
         "text": "Hello hi hey good morning good afternoon good evening greetings",
+        "response": "Good day! I am JARVIS, your AI assistant. How may I be of service today?"
     },
     {
         "text": "Thank you thanks appreciate it cheers",
+        "response": "You are most welcome. It is a pleasure to assist. Do let me know if there is anything else you need."
     },
     {
         "text": "What can you do? What are your capabilities? Help me understand what you do.",
+        "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history, and speak my responses aloud using neural text to speech."
     },
     {
         "text": "Tell me a joke. Make me laugh. Say something funny.",
+        "response": "Why do programmers prefer dark mode? Because light attracts bugs. I will see myself out, sir."
     },
     {
         "text": "What is the meaning of life? Philosophy existence purpose",
+        "response": "Forty two, according to Douglas Adams. But purpose is something we construct, not something we find. Quite liberating, really."
     },
     {
         "text": "Weather forecast temperature climate today",
+        "response": "I do not have real time weather data, as I operate entirely offline. I recommend checking a weather service for the latest forecast."
     },
     {
         "text": "Goodbye bye see you later farewell",
+        "response": "Until next time. It has been a pleasure assisting you. JARVIS will be here whenever you need me."
     },
     {
         "text": "How does text to speech work? TTS voice synthesis",
+        "response": "Text to speech converts written text into spoken audio using neural networks. I use Kitten TTS, a compact model that synthesizes speech entirely on CPU."
     },
     {
         "text": "What is an API? Application programming interface REST",
+        "response": "An API is a contract between software systems defining how they communicate. REST APIs use HTTP methods to manage resources. The lingua franca of modern software."
     },
     {
         "text": "Explain neural networks deep learning artificial intelligence",
+        "response": "Neural networks are architectures inspired by the human brain. Layers of nodes process information through backpropagation. Deep learning uses many layers for remarkable pattern recognition."
     },
     {
         "text": "What is JavaScript? Web development frontend programming",
+        "response": "JavaScript is the language of the web browser. It powers interactive frontends, and with Node it conquered the backend as well. It is absolutely everywhere."
     },
     {
         "text": "Tell me about space astronomy planets stars universe cosmos",
+        "response": "The universe is approximately thirteen point eight billion years old, containing over two trillion galaxies. The scale is, quite frankly, humbling."
     },
     {
         "text": "How do I learn to code? Programming beginner start",
+        "response": "Start with Python. It is forgiving, readable, and powerful. Begin with variables, loops, functions. Then build small projects. Code a little every day, sir."
     },
     {
         "text": "What is quantum computing? Qubits superposition",
+        "response": "Quantum computing leverages superposition and entanglement to process information in ways classical computers cannot. A qubit can be both zero and one simultaneously."
     },
     {
         "text": "Tell me about cybersecurity hacking security encryption",
+        "response": "Cybersecurity protects systems and data from digital attacks. Encryption, firewalls, multi factor authentication are your shields. Security is not optional, it is essential."
     },
 ]
 FALLBACK_RESPONSES = [
+    "Interesting query, though it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
+    "I am not entirely certain about that one. My knowledge does have its boundaries. Perhaps I can help with a related topic?",
+    "That is a challenging one. I lack a confident answer, but I am happy to reason through it with you.",
+    "I appreciate the question, but I lack sufficient data to give a proper answer. Shall we explore a different angle?",
 ]
+# ──────────────────────────────────────────
+# HELPER: Clean text for TTS
+# ──────────────────────────────────────────
+def clean_text_for_tts(text):
+    """Remove special chars and convert numbers to words for TTS."""
+    # Remove markdown-like formatting
+    text = re.sub(r'[*_~`#\[\]]', '', text)
+    # Convert numbers to words (KittenTTS bug with raw numbers)
+    def replace_number(match):
+        try:
+            return num2words(int(match.group()))
+        except Exception:
+            return match.group()
+    text = re.sub(r'\b\d+\b', replace_number, text)
+    # Clean up extra whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+# ──────────────────────────────────────────
+# INIT MODELS (with error handling)
+# ──────────────────────────────────────────
+print("=" * 50)
+print("  J.A.R.V.I.S. — Booting Systems")
+print("=" * 50)
+# Load Sentence Transformer
+print("[1/3] Loading Sentence Transformer...")
+try:
+    embedder = SentenceTransformer(EMBED_MODEL)
+    print("  ✅ Sentence Transformer loaded.")
+except Exception as e:
+    print(f"  ❌ Sentence Transformer FAILED: {e}")
+    raise
+# Load KittenTTS
+print(f"[2/3] Loading KittenTTS: {TTS_MODEL_NAME}...")
+tts = None
+try:
+    from kittentts import KittenTTS
+    tts = KittenTTS(TTS_MODEL_NAME)
+    # Test generation to verify it works
+    test_audio = tts.generate("test", voice=TTS_VOICE, speed=TTS_SPEED)
+    if test_audio is not None and len(test_audio) > 0:
+        print(f"  ✅ KittenTTS loaded. Voice: {TTS_VOICE}")
+    else:
+        print("  ⚠️ KittenTTS loaded but test generation returned empty audio!")
+        tts = None
+except Exception as e:
+    print(f"  ⚠️ KittenTTS FAILED: {e}")
+    print("  ⚠️ Voice output will be DISABLED. Text chat will still work.")
+    tts = None
+# Pre-compute KB embeddings
+print("[3/3] Embedding knowledge base...")
 kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
 kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
+print(f"  ✅ {len(KNOWLEDGE_BASE)} entries embedded.")
+print("=" * 50)
+print("  All systems online!" if tts else "  Online (TTS disabled)")
+print("=" * 50)
+# ──────────────────────────────────────────
+# CHAT MEMORY
+# ──────────────────────────────────────────
+sessions = {}
+def get_memory(sid):
+    if sid not in sessions:
+        sessions[sid] = []
+    return sessions[sid]
+def add_to_memory(sid, role, content):
+    mem = get_memory(sid)
+    mem.append({"role": role, "content": content, "ts": datetime.datetime.now().isoformat()})
+    if len(mem) > MAX_MEMORY * 2:
+        sessions[sid] = mem[-(MAX_MEMORY * 2):]
+# ──────────────────────────────────────────
 # RESPONSE GENERATION
+# ��─────────────────────────────────────────
 def generate_response(user_input, session_id):
+    user_emb = embedder.encode(user_input, convert_to_tensor=True)
+    scores = util.cos_sim(user_emb, kb_embeddings)[0]
+    best_idx = int(scores.argmax())
+    best_score = float(scores[best_idx])
     if best_score > 0.45:
         response = KNOWLEDGE_BASE[best_idx]["response"]
     else:
+        h = int(hashlib.md5(user_input.encode()).hexdigest(), 16)
+        response = FALLBACK_RESPONSES[h % len(FALLBACK_RESPONSES)]
     add_to_memory(session_id, "user", user_input)
     add_to_memory(session_id, "assistant", response)
     return response, best_score
+# ──────────────────────────────────────────
+# TTS SYNTHESIS
+# ──────────────────────────────────────────
 def synthesize_speech(text):
+    """Convert text to base64 WAV. Returns None on failure."""
+    if tts is None:
+        return None
     try:
+        clean = clean_text_for_tts(text)
+        if not clean or len(clean) < 2:
             return None
+        # Limit length to prevent long generation times on CPU
+        if len(clean) > 300:
+            clean = clean[:300]
         audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
+        if audio is None or len(audio) == 0:
+            print("TTS returned empty audio")
+            return None
+        buf = io.BytesIO()
+        sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
+        buf.seek(0)
+        return base64.b64encode(buf.read()).decode('utf-8')
     except Exception as e:
         print(f"TTS Error: {e}")
         return None
+# ──────────────────────────────────────────
 # FLASK APP
+# ──────────────────────────────────────────
 app = Flask(__name__)
     return render_template("index.html")
+# ✅ ENDPOINT 1: Text-only chat (FAST — returns instantly)
 @app.route("/chat", methods=["POST"])
 def chat():
+    data = request.json or {}
     user_input = data.get("message", "").strip()
     session_id = data.get("session_id", str(uuid.uuid4()))
     if not user_input:
         return jsonify({"error": "Empty message"}), 400
     response, confidence = generate_response(user_input, session_id)
     return jsonify({
         "response": response,
         "confidence": round(confidence, 3),
         "session_id": session_id,
+        "tts_available": tts is not None,
         "memory_length": len(get_memory(session_id))
     })
+# ✅ ENDPOINT 2: TTS generation (SEPARATE — fetched async by browser)
+@app.route("/tts", methods=["POST"])
+def tts_endpoint():
+    data = request.json or {}
+    text = data.get("text", "").strip()
+    if not text:
+        return jsonify({"error": "Empty text"}), 400
+    if tts is None:
+        return jsonify({"error": "TTS not available", "audio": None}), 200
+    audio_b64 = synthesize_speech(text)
+    return jsonify({"audio": audio_b64})
 @app.route("/clear", methods=["POST"])
 def clear():
+    data = request.json or {}
+    sid = data.get("session_id", "")
+    if sid in sessions:
+        del sessions[sid]
     return jsonify({"status": "cleared"})
 def health():
     return jsonify({
         "status": "online",
+        "tts_model": TTS_MODEL_NAME if tts else "DISABLED",
         "tts_voice": TTS_VOICE,
         "embed_model": EMBED_MODEL,
         "knowledge_entries": len(KNOWLEDGE_BASE)
 if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=7860, threaded=True)