OrbitMC commited on
Commit
e599a24
Β·
verified Β·
1 Parent(s): a144f7f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +306 -62
app.py CHANGED
@@ -1,72 +1,316 @@
1
  import os
2
  import io
 
 
 
3
  import base64
4
- from flask import Flask, request, jsonify
5
- from huggingface_hub import hf_hub_download
6
- from ctransformers import AutoModelForCausalLM
7
- from kittentts import KittenTTS
8
  import soundfile as sf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  app = Flask(__name__)
11
 
12
- MODEL_REPO = "unsloth/gemma-3-270m-it-GGUF"
13
- MODEL_FILE = "gemma-3-270m-it-F16.gguf"
14
- model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE, local_dir="models")
15
- llm = AutoModelForCausalLM.from_pretrained(model_path, model_type="gemma", context_length=2048)
16
- tts = KittenTTS("KittenML/kitten-tts-nano-0.8-int8")
17
-
18
- HTML = """<!DOCTYPE html>
19
- <html lang="en">
20
- <head>
21
- <meta charset="UTF-8">
22
- <title>Local Gemma + Kitten TTS</title>
23
- <style>body{font-family:Arial;margin:0;padding:20px;background:#111;color:#0f0} #chat{max-width:600px;margin:auto} .msg{margin:10px 0;padding:10px;border-radius:8px} .user{background:#222} .assistant{background:#333} input{width:80%;padding:10px} button{padding:10px}</style>
24
- </head>
25
- <body>
26
- <div id="chat"></div>
27
- <input id="input" placeholder="Type message..." onkeypress="if(event.key==='Enter')send()">
28
- <button onclick="send()">Send</button>
29
- <script>
30
- function addMsg(role,text,audioB64){
31
- const div=document.createElement('div');div.className='msg '+role;
32
- div.innerHTML=`<b>${role}:</b> ${text}<br>`;
33
- if(audioB64){
34
- const a=document.createElement('audio');a.controls=true;a.src='data:audio/wav;base64,'+audioB64;div.append(a);
35
- }
36
- document.getElementById('chat').append(div);div.scrollIntoView();
37
- }
38
- async function send(){
39
- const input=document.getElementById('input');
40
- const msg=input.value.trim();if(!msg)return;
41
- addMsg('user',msg);
42
- input.value='';
43
- const res=await fetch('/api/chat',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({message:msg})});
44
- const data=await res.json();
45
- addMsg('assistant',data.text,data.audio);
46
- }
47
- </script>
48
- </body>
49
- </html>"""
50
-
51
- @app.route('/')
52
  def index():
53
- return HTML
 
54
 
55
- @app.route('/api/chat', methods=['POST'])
56
  def chat():
57
- user_msg = request.json['message']
58
- prompt = f"""<bos><start_of_turn>user
59
- {user_msg}<end_of_turn>
60
- <start_of_turn>model
61
- """
62
- response = llm(prompt, max_new_tokens=512, temperature=0.7, stop=["<end_of_turn>"])
63
- audio = tts.generate(text=response, voice="Kiki")
64
- buf = io.BytesIO()
65
- sf.write(buf, audio, 24000, format='WAV')
66
- buf.seek(0)
67
- audio_b64 = base64.b64encode(buf.read()).decode()
68
- return jsonify({"text": response, "audio": audio_b64})
69
-
70
- if __name__ == '__main__':
71
- port = int(os.environ.get('PORT', 7860))
72
- app.run(host='0.0.0.0', port=port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import io
3
+ import re
4
+ import uuid
5
+ import json
6
  import base64
7
+ import datetime
8
+ import numpy as np
 
 
9
  import soundfile as sf
10
+ from flask import Flask, render_template, request, jsonify
11
+ from sentence_transformers import SentenceTransformer, util
12
+ from kittentts import KittenTTS
13
+
14
+ # ──────────────────────────────────────────────
15
+ # CONFIG
16
+ # ──────────────────────────────────────────────
17
+ TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
18
+ TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
19
+ TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
20
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
21
+ MAX_MEMORY = 20 # max conversation turns to remember
22
+
23
+ # ──────────────────────────────────────────────
24
+ # SYSTEM PROMPT (Jarvis Personality)
25
+ # ──────────────────────────────────────────────
26
+ SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
27
+ You speak in a polished, confident, and slightly formal British tone β€” like a perfect digital butler.
28
+ You are helpful, precise, and occasionally add dry humor.
29
+ You always address the user respectfully.
30
+ You have expertise in science, technology, coding, and general knowledge.
31
+ When unsure, you say so honestly but offer your best reasoning.
32
+ Keep responses concise but insightful β€” ideally 1-4 sentences unless more detail is requested."""
33
+
34
+ # ──────────────────────────────────────────────
35
+ # KNOWLEDGE BASE (Semantic Search via Embeddings)
36
+ # ──────────────────────────────────────────────
37
+ KNOWLEDGE_BASE = [
38
+ {
39
+ "text": "Python is a high-level, interpreted programming language known for its simplicity and readability. It supports multiple paradigms including procedural, object-oriented, and functional programming.",
40
+ "response": "Python is a remarkably versatile programming language, sir. Its clean syntax and extensive library ecosystem make it the weapon of choice for everything from web development to artificial intelligence."
41
+ },
42
+ {
43
+ "text": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.",
44
+ "response": "Machine learning, in essence, is teaching machines to learn from data rather than following rigid instructions. It's quite fascinating β€” neural networks, decision trees, support vector machines β€” each with their own elegance."
45
+ },
46
+ {
47
+ "text": "Docker is a platform for developing, shipping, and running applications in containers. Containers package code and dependencies together.",
48
+ "response": "Docker containers are essentially lightweight, portable environments that package your application with all its dependencies. Think of it as a perfectly sealed briefcase for your code, sir."
49
+ },
50
+ {
51
+ "text": "Hugging Face is a platform and community for machine learning, offering models, datasets, and deployment tools including Spaces.",
52
+ "response": "Hugging Face is the premier hub for the AI community β€” hosting thousands of pre-trained models, datasets, and providing infrastructure like Spaces for deployment. Quite the marketplace for intelligence, if I may say."
53
+ },
54
+ {
55
+ "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
56
+ "response": "I am J.A.R.V.I.S. β€” Just A Rather Very Intelligent System. I'm your personal AI assistant, equipped with semantic understanding and voice synthesis capabilities. At your service, always."
57
+ },
58
+ {
59
+ "text": "Hello hi hey good morning good afternoon good evening greetings",
60
+ "response": "Good day! I'm J.A.R.V.I.S., your AI assistant. How may I be of service today?"
61
+ },
62
+ {
63
+ "text": "Thank you thanks appreciate it cheers",
64
+ "response": "You're most welcome. It's a pleasure to assist. Do let me know if there's anything else you need."
65
+ },
66
+ {
67
+ "text": "What can you do? What are your capabilities? Help me understand what you do.",
68
+ "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history for context, and even speak my responses aloud using neural text-to-speech. I'm rather versatile, if I do say so myself."
69
+ },
70
+ {
71
+ "text": "Tell me a joke. Make me laugh. Say something funny.",
72
+ "response": "Why do programmers prefer dark mode? Because light attracts bugs. ...I'll see myself out, sir."
73
+ },
74
+ {
75
+ "text": "What is the meaning of life? Philosophy existence purpose",
76
+ "response": "Ah, the eternal question. Forty-two, according to Douglas Adams. But if you'd prefer a more nuanced answer β€” purpose is something we construct, not something we find. Quite liberating, really."
77
+ },
78
+ {
79
+ "text": "Weather forecast temperature climate today",
80
+ "response": "I'm afraid I don't have real-time weather data at the moment, as I operate entirely offline. I'd recommend checking a weather service for the latest forecast. My apologies for the limitation."
81
+ },
82
+ {
83
+ "text": "Goodbye bye see you later farewell",
84
+ "response": "Until next time. It's been a pleasure assisting you. J.A.R.V.I.S. will be here whenever you need me."
85
+ },
86
+ {
87
+ "text": "How does text to speech work? TTS voice synthesis",
88
+ "response": "Text-to-speech converts written text into spoken audio using neural networks. I use KittenTTS β€” a remarkably compact model that synthesizes natural-sounding speech entirely on CPU. Quite efficient, I must say."
89
+ },
90
+ {
91
+ "text": "What is an API? Application programming interface REST",
92
+ "response": "An API β€” Application Programming Interface β€” is essentially a contract between software systems, defining how they communicate. REST APIs use HTTP methods to create, read, update, and delete resources. It's the lingua franca of modern software."
93
+ },
94
+ {
95
+ "text": "Explain neural networks deep learning artificial intelligence",
96
+ "response": "Neural networks are computational architectures inspired by the human brain. Layers of interconnected nodes process information, learning patterns through backpropagation. Deep learning simply refers to networks with many layers β€” enabling remarkable feats of pattern recognition."
97
+ },
98
+ {
99
+ "text": "What is JavaScript? Web development frontend programming",
100
+ "response": "JavaScript is the language of the web browser, sir. It powers interactive frontends, and with Node.js, it's conquered the backend as well. Love it or loathe it, it's absolutely everywhere."
101
+ },
102
+ {
103
+ "text": "Tell me about space astronomy planets stars universe cosmos",
104
+ "response": "The universe is approximately 13.8 billion years old, containing over two trillion galaxies. Each galaxy harbors hundreds of billions of stars, many with their own planetary systems. The scale is, quite frankly, humbling β€” even for an AI."
105
+ },
106
+ {
107
+ "text": "How do I learn to code? Programming beginner start",
108
+ "response": "I'd recommend starting with Python β€” it's forgiving, readable, and incredibly powerful. Begin with fundamentals: variables, loops, functions. Then build small projects. The key is consistency, sir. Code a little every day."
109
+ },
110
+ {
111
+ "text": "What is quantum computing? Qubits superposition",
112
+ "response": "Quantum computing leverages quantum mechanical phenomena β€” superposition and entanglement β€” to process information in ways classical computers cannot. A qubit can be both 0 and 1 simultaneously. It's not magic, but it's close."
113
+ },
114
+ {
115
+ "text": "Tell me about cybersecurity hacking security encryption",
116
+ "response": "Cybersecurity is the practice of protecting systems, networks, and data from digital attacks. Encryption, firewalls, multi-factor authentication β€” these are your shields. In today's connected world, security isn't optional, it's essential."
117
+ },
118
+ ]
119
+
120
+ # ──────────────────────────────────────────────
121
+ # FALLBACK RESPONSES
122
+ # ──────────────────────────────────────────────
123
+ FALLBACK_RESPONSES = [
124
+ "Interesting query, though I must admit it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
125
+ "I'm not entirely certain about that one, I'm afraid. My knowledge, while extensive, does have its boundaries. Perhaps I can help with a related topic?",
126
+ "Hmm, that's a challenging one. I don't have a confident answer, but I'm happy to reason through it with you if you'd like.",
127
+ "I appreciate the question, but I lack sufficient data to give you a proper answer. Shall we explore a different angle?",
128
+ ]
129
+
130
+ # ──────────────────────────────────────────────
131
+ # INIT MODELS
132
+ # ──────────────────────────────────────────────
133
+ print("⏳ Loading Sentence Transformer model...")
134
+ embedder = SentenceTransformer(EMBED_MODEL)
135
+ print("βœ… Sentence Transformer loaded.")
136
+
137
+ print(f"⏳ Loading KittenTTS model: {TTS_MODEL_NAME}...")
138
+ tts = KittenTTS(TTS_MODEL_NAME)
139
+ print(f"βœ… KittenTTS loaded. Voice: {TTS_VOICE}")
140
+
141
+ # Pre-compute knowledge base embeddings
142
+ kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
143
+ kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
144
+ print(f"βœ… Knowledge base embedded: {len(KNOWLEDGE_BASE)} entries")
145
+
146
+ # ──────────────────────────────────────────────
147
+ # CHAT MEMORY (in-memory, per-session)
148
+ # ──────────────────────────────────────────────
149
+ sessions = {} # session_id -> list of {role, content, timestamp}
150
+
151
+
152
+ def get_memory(session_id):
153
+ if session_id not in sessions:
154
+ sessions[session_id] = []
155
+ return sessions[session_id]
156
+
157
+
158
+ def add_to_memory(session_id, role, content):
159
+ memory = get_memory(session_id)
160
+ memory.append({
161
+ "role": role,
162
+ "content": content,
163
+ "timestamp": datetime.datetime.now().isoformat()
164
+ })
165
+ # Trim to max memory
166
+ if len(memory) > MAX_MEMORY * 2:
167
+ sessions[session_id] = memory[-(MAX_MEMORY * 2):]
168
+
169
+
170
+ def format_memory_context(session_id):
171
+ memory = get_memory(session_id)
172
+ if not memory:
173
+ return ""
174
+ lines = []
175
+ for msg in memory[-10:]: # Last 10 messages for context
176
+ prefix = "User" if msg["role"] == "user" else "JARVIS"
177
+ lines.append(f"{prefix}: {msg['content']}")
178
+ return "\n".join(lines)
179
+
180
+
181
+ # ──────────────────────────────────────────────
182
+ # RESPONSE GENERATION
183
+ # ──────────────────────────────────────────────
184
+ def generate_response(user_input, session_id):
185
+ """Generate a Jarvis-style response using semantic similarity."""
186
+
187
+ # Encode user input
188
+ user_embedding = embedder.encode(user_input, convert_to_tensor=True)
189
+
190
+ # Compute similarity with knowledge base
191
+ cosine_scores = util.cos_sim(user_embedding, kb_embeddings)[0]
192
+ best_idx = int(cosine_scores.argmax())
193
+ best_score = float(cosine_scores[best_idx])
194
+
195
+ # Check conversation context for better responses
196
+ memory_context = format_memory_context(session_id)
197
+
198
+ # Determine response based on similarity threshold
199
+ if best_score > 0.45:
200
+ response = KNOWLEDGE_BASE[best_idx]["response"]
201
+
202
+ # Add contextual awareness if there's memory
203
+ if memory_context and best_score < 0.7:
204
+ response = f"{response}"
205
+ else:
206
+ # Use fallback with some variation
207
+ import hashlib
208
+ hash_val = int(hashlib.md5(user_input.encode()).hexdigest(), 16)
209
+ fallback_idx = hash_val % len(FALLBACK_RESPONSES)
210
+ response = FALLBACK_RESPONSES[fallback_idx]
211
 
212
+ # Store in memory
213
+ add_to_memory(session_id, "user", user_input)
214
+ add_to_memory(session_id, "assistant", response)
215
+
216
+ return response, best_score
217
+
218
+
219
+ def synthesize_speech(text):
220
+ """Convert text to speech using KittenTTS, return base64 WAV."""
221
+ try:
222
+ # Clean text for TTS
223
+ clean = re.sub(r'[*_~`#]', '', text) # Remove markdown
224
+ clean = clean.strip()
225
+
226
+ if not clean:
227
+ return None
228
+
229
+ audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
230
+
231
+ # Convert to WAV in memory
232
+ buffer = io.BytesIO()
233
+ sf.write(buffer, audio, 24000, format='WAV')
234
+ buffer.seek(0)
235
+
236
+ # Encode to base64
237
+ audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
238
+ return audio_b64
239
+ except Exception as e:
240
+ print(f"TTS Error: {e}")
241
+ return None
242
+
243
+
244
+ # ──────────────────────────────────────────────
245
+ # FLASK APP
246
+ # ──────────────────────────────────────────────
247
  app = Flask(__name__)
248
 
249
+
250
+ @app.route("/")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  def index():
252
+ return render_template("index.html")
253
+
254
 
255
+ @app.route("/chat", methods=["POST"])
256
  def chat():
257
+ data = request.json
258
+ user_input = data.get("message", "").strip()
259
+ session_id = data.get("session_id", str(uuid.uuid4()))
260
+ enable_tts = data.get("tts", True)
261
+
262
+ if not user_input:
263
+ return jsonify({"error": "Empty message"}), 400
264
+
265
+ # Generate text response
266
+ response, confidence = generate_response(user_input, session_id)
267
+
268
+ # Generate audio
269
+ audio_b64 = None
270
+ if enable_tts:
271
+ audio_b64 = synthesize_speech(response)
272
+
273
+ return jsonify({
274
+ "response": response,
275
+ "audio": audio_b64,
276
+ "confidence": round(confidence, 3),
277
+ "session_id": session_id,
278
+ "voice": TTS_VOICE,
279
+ "memory_length": len(get_memory(session_id))
280
+ })
281
+
282
+
283
+ @app.route("/memory", methods=["POST"])
284
+ def memory():
285
+ data = request.json
286
+ session_id = data.get("session_id", "")
287
+ return jsonify({"memory": get_memory(session_id)})
288
+
289
+
290
+ @app.route("/clear", methods=["POST"])
291
+ def clear():
292
+ data = request.json
293
+ session_id = data.get("session_id", "")
294
+ if session_id in sessions:
295
+ del sessions[session_id]
296
+ return jsonify({"status": "cleared"})
297
+
298
+
299
+ @app.route("/health")
300
+ def health():
301
+ return jsonify({
302
+ "status": "online",
303
+ "tts_model": TTS_MODEL_NAME,
304
+ "tts_voice": TTS_VOICE,
305
+ "embed_model": EMBED_MODEL,
306
+ "knowledge_entries": len(KNOWLEDGE_BASE)
307
+ })
308
+
309
+
310
+ if __name__ == "__main__":
311
+ print("πŸš€ J.A.R.V.I.S. is online!")
312
+ print(f" TTS Model : {TTS_MODEL_NAME}")
313
+ print(f" TTS Voice : {TTS_VOICE}")
314
+ print(f" Embedder : {EMBED_MODEL}")
315
+ print(f" Knowledge : {len(KNOWLEDGE_BASE)} entries")
316
+ app.run(host="0.0.0.0", port=7860)