Ana

Paused

App Files Files Community

OrbitMC commited on 21 days ago

Commit

e599a24

verified ·

1 Parent(s): a144f7f

Update app.py

Browse files

Files changed (1) hide show

app.py +306 -62

app.py CHANGED Viewed

@@ -1,72 +1,316 @@
 import os
 import io
 import base64
-from flask import Flask, request, jsonify
-from huggingface_hub import hf_hub_download
-from ctransformers import AutoModelForCausalLM
-from kittentts import KittenTTS
 import soundfile as sf
 app = Flask(__name__)
-MODEL_REPO = "unsloth/gemma-3-270m-it-GGUF"
-MODEL_FILE = "gemma-3-270m-it-F16.gguf"
-model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir="models")
-llm = AutoModelForCausalLM.from_pretrained(model_path, model_type="gemma", context_length=2048)
-tts = KittenTTS("KittenML/kitten-tts-nano-0.8-int8")
-HTML = """<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <title>Local Gemma + Kitten TTS</title>
-    <style>body{font-family:Arial;margin:0;padding:20px;background:#111;color:#0f0} #chat{max-width:600px;margin:auto} .msg{margin:10px 0;padding:10px;border-radius:8px} .user{background:#222} .assistant{background:#333} input{width:80%;padding:10px} button{padding:10px}</style>
-</head>
-<body>
-    <div id="chat"></div>
-    <input id="input" placeholder="Type message..." onkeypress="if(event.key==='Enter')send()">
-    <button onclick="send()">Send</button>
-    <script>
-        function addMsg(role,text,audioB64){
-            const div=document.createElement('div');div.className='msg '+role;
-            div.innerHTML=`<b>${role}:</b> ${text}<br>`;
-            if(audioB64){
-                const a=document.createElement('audio');a.controls=true;a.src='data:audio/wav;base64,'+audioB64;div.append(a);
-            }
-            document.getElementById('chat').append(div);div.scrollIntoView();
-        }
-        async function send(){
-            const input=document.getElementById('input');
-            const msg=input.value.trim();if(!msg)return;
-            addMsg('user',msg);
-            input.value='';
-            const res=await fetch('/api/chat',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({message:msg})});
-            const data=await res.json();
-            addMsg('assistant',data.text,data.audio);
-        }
-    </script>
-</body>
-</html>"""
-@app.route('/')
 def index():
-    return HTML
-@app.route('/api/chat', methods=['POST'])
 def chat():
-    user_msg = request.json['message']
-    prompt = f"""<bos><start_of_turn>user
-{user_msg}<end_of_turn>
-<start_of_turn>model
-"""
-    response = llm(prompt, max_new_tokens=512, temperature=0.7, stop=["<end_of_turn>"])
-    audio = tts.generate(text=response, voice="Kiki")
-    buf = io.BytesIO()
-    sf.write(buf, audio, 24000, format='WAV')
-    buf.seek(0)
-    audio_b64 = base64.b64encode(buf.read()).decode()
-    return jsonify({"text": response, "audio": audio_b64})
-if __name__ == '__main__':
-    port = int(os.environ.get('PORT', 7860))
-    app.run(host='0.0.0.0', port=port)

 import os
 import io
+import re
+import uuid
+import json
 import base64
+import datetime
+import numpy as np
 import soundfile as sf
+from flask import Flask, render_template, request, jsonify
+from sentence_transformers import SentenceTransformer, util
+from kittentts import KittenTTS
+# ──────────────────────────────────────────────
+# CONFIG
+# ──────────────────────────────────────────────
+TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
+TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
+TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
+EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+MAX_MEMORY = 20  # max conversation turns to remember
+# ──────────────────────────────────────────────
+# SYSTEM PROMPT (Jarvis Personality)
+# ──────────────────────────────────────────────
+SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
+You speak in a polished, confident, and slightly formal British tone — like a perfect digital butler.
+You are helpful, precise, and occasionally add dry humor.
+You always address the user respectfully.
+You have expertise in science, technology, coding, and general knowledge.
+When unsure, you say so honestly but offer your best reasoning.
+Keep responses concise but insightful — ideally 1-4 sentences unless more detail is requested."""
+# ──────────────────────────────────────────────
+# KNOWLEDGE BASE (Semantic Search via Embeddings)
+# ──────────────────────────────────────────────
+KNOWLEDGE_BASE = [
+    {
+        "text": "Python is a high-level, interpreted programming language known for its simplicity and readability. It supports multiple paradigms including procedural, object-oriented, and functional programming.",
+        "response": "Python is a remarkably versatile programming language, sir. Its clean syntax and extensive library ecosystem make it the weapon of choice for everything from web development to artificial intelligence."
+    },
+    {
+        "text": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.",
+        "response": "Machine learning, in essence, is teaching machines to learn from data rather than following rigid instructions. It's quite fascinating — neural networks, decision trees, support vector machines — each with their own elegance."
+    },
+    {
+        "text": "Docker is a platform for developing, shipping, and running applications in containers. Containers package code and dependencies together.",
+        "response": "Docker containers are essentially lightweight, portable environments that package your application with all its dependencies. Think of it as a perfectly sealed briefcase for your code, sir."
+    },
+    {
+        "text": "Hugging Face is a platform and community for machine learning, offering models, datasets, and deployment tools including Spaces.",
+        "response": "Hugging Face is the premier hub for the AI community — hosting thousands of pre-trained models, datasets, and providing infrastructure like Spaces for deployment. Quite the marketplace for intelligence, if I may say."
+    },
+    {
+        "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
+        "response": "I am J.A.R.V.I.S. — Just A Rather Very Intelligent System. I'm your personal AI assistant, equipped with semantic understanding and voice synthesis capabilities. At your service, always."
+    },
+    {
+        "text": "Hello hi hey good morning good afternoon good evening greetings",
+        "response": "Good day! I'm J.A.R.V.I.S., your AI assistant. How may I be of service today?"
+    },
+    {
+        "text": "Thank you thanks appreciate it cheers",
+        "response": "You're most welcome. It's a pleasure to assist. Do let me know if there's anything else you need."
+    },
+    {
+        "text": "What can you do? What are your capabilities? Help me understand what you do.",
+        "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history for context, and even speak my responses aloud using neural text-to-speech. I'm rather versatile, if I do say so myself."
+    },
+    {
+        "text": "Tell me a joke. Make me laugh. Say something funny.",
+        "response": "Why do programmers prefer dark mode? Because light attracts bugs. ...I'll see myself out, sir."
+    },
+    {
+        "text": "What is the meaning of life? Philosophy existence purpose",
+        "response": "Ah, the eternal question. Forty-two, according to Douglas Adams. But if you'd prefer a more nuanced answer — purpose is something we construct, not something we find. Quite liberating, really."
+    },
+    {
+        "text": "Weather forecast temperature climate today",
+        "response": "I'm afraid I don't have real-time weather data at the moment, as I operate entirely offline. I'd recommend checking a weather service for the latest forecast. My apologies for the limitation."
+    },
+    {
+        "text": "Goodbye bye see you later farewell",
+        "response": "Until next time. It's been a pleasure assisting you. J.A.R.V.I.S. will be here whenever you need me."
+    },
+    {
+        "text": "How does text to speech work? TTS voice synthesis",
+        "response": "Text-to-speech converts written text into spoken audio using neural networks. I use KittenTTS — a remarkably compact model that synthesizes natural-sounding speech entirely on CPU. Quite efficient, I must say."
+    },
+    {
+        "text": "What is an API? Application programming interface REST",
+        "response": "An API — Application Programming Interface — is essentially a contract between software systems, defining how they communicate. REST APIs use HTTP methods to create, read, update, and delete resources. It's the lingua franca of modern software."
+    },
+    {
+        "text": "Explain neural networks deep learning artificial intelligence",
+        "response": "Neural networks are computational architectures inspired by the human brain. Layers of interconnected nodes process information, learning patterns through backpropagation. Deep learning simply refers to networks with many layers — enabling remarkable feats of pattern recognition."
+    },
+    {
+        "text": "What is JavaScript? Web development frontend programming",
+        "response": "JavaScript is the language of the web browser, sir. It powers interactive frontends, and with Node.js, it's conquered the backend as well. Love it or loathe it, it's absolutely everywhere."
+    },
+    {
+        "text": "Tell me about space astronomy planets stars universe cosmos",
+        "response": "The universe is approximately 13.8 billion years old, containing over two trillion galaxies. Each galaxy harbors hundreds of billions of stars, many with their own planetary systems. The scale is, quite frankly, humbling — even for an AI."
+    },
+    {
+        "text": "How do I learn to code? Programming beginner start",
+        "response": "I'd recommend starting with Python — it's forgiving, readable, and incredibly powerful. Begin with fundamentals: variables, loops, functions. Then build small projects. The key is consistency, sir. Code a little every day."
+    },
+    {
+        "text": "What is quantum computing? Qubits superposition",
+        "response": "Quantum computing leverages quantum mechanical phenomena — superposition and entanglement — to process information in ways classical computers cannot. A qubit can be both 0 and 1 simultaneously. It's not magic, but it's close."
+    },
+    {
+        "text": "Tell me about cybersecurity hacking security encryption",
+        "response": "Cybersecurity is the practice of protecting systems, networks, and data from digital attacks. Encryption, firewalls, multi-factor authentication — these are your shields. In today's connected world, security isn't optional, it's essential."
+    },
+]
+# ──────────────────────────────────────────────
+# FALLBACK RESPONSES
+# ──────────────────────────────────────────────
+FALLBACK_RESPONSES = [
+    "Interesting query, though I must admit it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
+    "I'm not entirely certain about that one, I'm afraid. My knowledge, while extensive, does have its boundaries. Perhaps I can help with a related topic?",
+    "Hmm, that's a challenging one. I don't have a confident answer, but I'm happy to reason through it with you if you'd like.",
+    "I appreciate the question, but I lack sufficient data to give you a proper answer. Shall we explore a different angle?",
+]
+# ──────────────────────────────────────────────
+# INIT MODELS
+# ──────────────────────────────────────────────
+print("⏳ Loading Sentence Transformer model...")
+embedder = SentenceTransformer(EMBED_MODEL)
+print("✅ Sentence Transformer loaded.")
+print(f"⏳ Loading KittenTTS model: {TTS_MODEL_NAME}...")
+tts = KittenTTS(TTS_MODEL_NAME)
+print(f"✅ KittenTTS loaded. Voice: {TTS_VOICE}")
+# Pre-compute knowledge base embeddings
+kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
+kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
+print(f"✅ Knowledge base embedded: {len(KNOWLEDGE_BASE)} entries")
+# ──────────────────────────────────────────────
+# CHAT MEMORY (in-memory, per-session)
+# ──────────────────────────────────────────────
+sessions = {}  # session_id -> list of {role, content, timestamp}
+def get_memory(session_id):
+    if session_id not in sessions:
+        sessions[session_id] = []
+    return sessions[session_id]
+def add_to_memory(session_id, role, content):
+    memory = get_memory(session_id)
+    memory.append({
+        "role": role,
+        "content": content,
+        "timestamp": datetime.datetime.now().isoformat()
+    })
+    # Trim to max memory
+    if len(memory) > MAX_MEMORY * 2:
+        sessions[session_id] = memory[-(MAX_MEMORY * 2):]
+def format_memory_context(session_id):
+    memory = get_memory(session_id)
+    if not memory:
+        return ""
+    lines = []
+    for msg in memory[-10:]:  # Last 10 messages for context
+        prefix = "User" if msg["role"] == "user" else "JARVIS"
+        lines.append(f"{prefix}: {msg['content']}")
+    return "\n".join(lines)
+# ──────────────────────────────────────────────
+# RESPONSE GENERATION
+# ──────────────────────────────────────────────
+def generate_response(user_input, session_id):
+    """Generate a Jarvis-style response using semantic similarity."""
+    # Encode user input
+    user_embedding = embedder.encode(user_input, convert_to_tensor=True)
+    # Compute similarity with knowledge base
+    cosine_scores = util.cos_sim(user_embedding, kb_embeddings)[0]
+    best_idx = int(cosine_scores.argmax())
+    best_score = float(cosine_scores[best_idx])
+    # Check conversation context for better responses
+    memory_context = format_memory_context(session_id)
+    # Determine response based on similarity threshold
+    if best_score > 0.45:
+        response = KNOWLEDGE_BASE[best_idx]["response"]
+        # Add contextual awareness if there's memory
+        if memory_context and best_score < 0.7:
+            response = f"{response}"
+    else:
+        # Use fallback with some variation
+        import hashlib
+        hash_val = int(hashlib.md5(user_input.encode()).hexdigest(), 16)
+        fallback_idx = hash_val % len(FALLBACK_RESPONSES)
+        response = FALLBACK_RESPONSES[fallback_idx]
+    # Store in memory
+    add_to_memory(session_id, "user", user_input)
+    add_to_memory(session_id, "assistant", response)
+    return response, best_score
+def synthesize_speech(text):
+    """Convert text to speech using KittenTTS, return base64 WAV."""
+    try:
+        # Clean text for TTS
+        clean = re.sub(r'[*_~`#]', '', text)  # Remove markdown
+        clean = clean.strip()
+        if not clean:
+            return None
+        audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
+        # Convert to WAV in memory
+        buffer = io.BytesIO()
+        sf.write(buffer, audio, 24000, format='WAV')
+        buffer.seek(0)
+        # Encode to base64
+        audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
+        return audio_b64
+    except Exception as e:
+        print(f"TTS Error: {e}")
+        return None
+# ──────────────────────────────────────────────
+# FLASK APP
+# ──────────────────────────────────────────────
 app = Flask(__name__)
+@app.route("/")
 def index():
+    return render_template("index.html")
+@app.route("/chat", methods=["POST"])
 def chat():
+    data = request.json
+    user_input = data.get("message", "").strip()
+    session_id = data.get("session_id", str(uuid.uuid4()))
+    enable_tts = data.get("tts", True)
+    if not user_input:
+        return jsonify({"error": "Empty message"}), 400
+    # Generate text response
+    response, confidence = generate_response(user_input, session_id)
+    # Generate audio
+    audio_b64 = None
+    if enable_tts:
+        audio_b64 = synthesize_speech(response)
+    return jsonify({
+        "response": response,
+        "audio": audio_b64,
+        "confidence": round(confidence, 3),
+        "session_id": session_id,
+        "voice": TTS_VOICE,
+        "memory_length": len(get_memory(session_id))
+    })
+@app.route("/memory", methods=["POST"])
+def memory():
+    data = request.json
+    session_id = data.get("session_id", "")
+    return jsonify({"memory": get_memory(session_id)})
+@app.route("/clear", methods=["POST"])
+def clear():
+    data = request.json
+    session_id = data.get("session_id", "")
+    if session_id in sessions:
+        del sessions[session_id]
+    return jsonify({"status": "cleared"})
+@app.route("/health")
+def health():
+    return jsonify({
+        "status": "online",
+        "tts_model": TTS_MODEL_NAME,
+        "tts_voice": TTS_VOICE,
+        "embed_model": EMBED_MODEL,
+        "knowledge_entries": len(KNOWLEDGE_BASE)
+    })
+if __name__ == "__main__":
+    print("🚀 J.A.R.V.I.S. is online!")
+    print(f"   TTS Model : {TTS_MODEL_NAME}")
+    print(f"   TTS Voice : {TTS_VOICE}")
+    print(f"   Embedder  : {EMBED_MODEL}")
+    print(f"   Knowledge : {len(KNOWLEDGE_BASE)} entries")
+    app.run(host="0.0.0.0", port=7860)