OrbitMC commited on
Commit
dcfaf67
Β·
verified Β·
1 Parent(s): 8a4f385

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +169 -157
app.py CHANGED
@@ -2,248 +2,263 @@ import os
2
  import io
3
  import re
4
  import uuid
5
- import json
6
  import base64
7
  import datetime
8
  import numpy as np
9
  import soundfile as sf
10
  from flask import Flask, render_template, request, jsonify
11
  from sentence_transformers import SentenceTransformer, util
12
- from kittentts import KittenTTS
13
 
14
- # ──────────────────────────────────────────────
15
  # CONFIG
16
- # ──────────────────────────────────────────────
17
  TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
18
  TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
19
  TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
20
  EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
21
- MAX_MEMORY = 20 # max conversation turns to remember
22
 
23
- # ──────────────────────────────────────────────
24
- # SYSTEM PROMPT (Jarvis Personality)
25
- # ──────────────────────────────────────────────
26
  SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
27
- You speak in a polished, confident, and slightly formal British tone β€” like a perfect digital butler.
28
  You are helpful, precise, and occasionally add dry humor.
29
- You always address the user respectfully.
30
- You have expertise in science, technology, coding, and general knowledge.
31
- When unsure, you say so honestly but offer your best reasoning.
32
- Keep responses concise but insightful β€” ideally 1-4 sentences unless more detail is requested."""
33
-
34
- # ──────────────────────────────────────────────
35
- # KNOWLEDGE BASE (Semantic Search via Embeddings)
36
- # ──────────────────────────────────────────────
37
  KNOWLEDGE_BASE = [
38
  {
39
- "text": "Python is a high-level, interpreted programming language known for its simplicity and readability. It supports multiple paradigms including procedural, object-oriented, and functional programming.",
40
- "response": "Python is a remarkably versatile programming language, sir. Its clean syntax and extensive library ecosystem make it the weapon of choice for everything from web development to artificial intelligence."
41
  },
42
  {
43
- "text": "Machine learning is a subset of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.",
44
- "response": "Machine learning, in essence, is teaching machines to learn from data rather than following rigid instructions. It's quite fascinating β€” neural networks, decision trees, support vector machines β€” each with their own elegance."
45
  },
46
  {
47
- "text": "Docker is a platform for developing, shipping, and running applications in containers. Containers package code and dependencies together.",
48
- "response": "Docker containers are essentially lightweight, portable environments that package your application with all its dependencies. Think of it as a perfectly sealed briefcase for your code, sir."
49
  },
50
  {
51
- "text": "Hugging Face is a platform and community for machine learning, offering models, datasets, and deployment tools including Spaces.",
52
- "response": "Hugging Face is the premier hub for the AI community β€” hosting thousands of pre-trained models, datasets, and providing infrastructure like Spaces for deployment. Quite the marketplace for intelligence, if I may say."
53
  },
54
  {
55
  "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
56
- "response": "I am J.A.R.V.I.S. β€” Just A Rather Very Intelligent System. I'm your personal AI assistant, equipped with semantic understanding and voice synthesis capabilities. At your service, always."
57
  },
58
  {
59
  "text": "Hello hi hey good morning good afternoon good evening greetings",
60
- "response": "Good day! I'm J.A.R.V.I.S., your AI assistant. How may I be of service today?"
61
  },
62
  {
63
  "text": "Thank you thanks appreciate it cheers",
64
- "response": "You're most welcome. It's a pleasure to assist. Do let me know if there's anything else you need."
65
  },
66
  {
67
  "text": "What can you do? What are your capabilities? Help me understand what you do.",
68
- "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history for context, and even speak my responses aloud using neural text-to-speech. I'm rather versatile, if I do say so myself."
69
  },
70
  {
71
  "text": "Tell me a joke. Make me laugh. Say something funny.",
72
- "response": "Why do programmers prefer dark mode? Because light attracts bugs. ...I'll see myself out, sir."
73
  },
74
  {
75
  "text": "What is the meaning of life? Philosophy existence purpose",
76
- "response": "Ah, the eternal question. Forty-two, according to Douglas Adams. But if you'd prefer a more nuanced answer β€” purpose is something we construct, not something we find. Quite liberating, really."
77
  },
78
  {
79
  "text": "Weather forecast temperature climate today",
80
- "response": "I'm afraid I don't have real-time weather data at the moment, as I operate entirely offline. I'd recommend checking a weather service for the latest forecast. My apologies for the limitation."
81
  },
82
  {
83
  "text": "Goodbye bye see you later farewell",
84
- "response": "Until next time. It's been a pleasure assisting you. J.A.R.V.I.S. will be here whenever you need me."
85
  },
86
  {
87
  "text": "How does text to speech work? TTS voice synthesis",
88
- "response": "Text-to-speech converts written text into spoken audio using neural networks. I use KittenTTS β€” a remarkably compact model that synthesizes natural-sounding speech entirely on CPU. Quite efficient, I must say."
89
  },
90
  {
91
  "text": "What is an API? Application programming interface REST",
92
- "response": "An API β€” Application Programming Interface β€” is essentially a contract between software systems, defining how they communicate. REST APIs use HTTP methods to create, read, update, and delete resources. It's the lingua franca of modern software."
93
  },
94
  {
95
  "text": "Explain neural networks deep learning artificial intelligence",
96
- "response": "Neural networks are computational architectures inspired by the human brain. Layers of interconnected nodes process information, learning patterns through backpropagation. Deep learning simply refers to networks with many layers β€” enabling remarkable feats of pattern recognition."
97
  },
98
  {
99
  "text": "What is JavaScript? Web development frontend programming",
100
- "response": "JavaScript is the language of the web browser, sir. It powers interactive frontends, and with Node.js, it's conquered the backend as well. Love it or loathe it, it's absolutely everywhere."
101
  },
102
  {
103
  "text": "Tell me about space astronomy planets stars universe cosmos",
104
- "response": "The universe is approximately 13.8 billion years old, containing over two trillion galaxies. Each galaxy harbors hundreds of billions of stars, many with their own planetary systems. The scale is, quite frankly, humbling β€” even for an AI."
105
  },
106
  {
107
  "text": "How do I learn to code? Programming beginner start",
108
- "response": "I'd recommend starting with Python β€” it's forgiving, readable, and incredibly powerful. Begin with fundamentals: variables, loops, functions. Then build small projects. The key is consistency, sir. Code a little every day."
109
  },
110
  {
111
  "text": "What is quantum computing? Qubits superposition",
112
- "response": "Quantum computing leverages quantum mechanical phenomena β€” superposition and entanglement β€” to process information in ways classical computers cannot. A qubit can be both 0 and 1 simultaneously. It's not magic, but it's close."
113
  },
114
  {
115
  "text": "Tell me about cybersecurity hacking security encryption",
116
- "response": "Cybersecurity is the practice of protecting systems, networks, and data from digital attacks. Encryption, firewalls, multi-factor authentication β€” these are your shields. In today's connected world, security isn't optional, it's essential."
117
  },
118
  ]
119
 
120
- # ──────────────────────────────────────────────
121
- # FALLBACK RESPONSES
122
- # ──────────────────────────────────────────────
123
  FALLBACK_RESPONSES = [
124
- "Interesting query, though I must admit it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
125
- "I'm not entirely certain about that one, I'm afraid. My knowledge, while extensive, does have its boundaries. Perhaps I can help with a related topic?",
126
- "Hmm, that's a challenging one. I don't have a confident answer, but I'm happy to reason through it with you if you'd like.",
127
- "I appreciate the question, but I lack sufficient data to give you a proper answer. Shall we explore a different angle?",
128
  ]
129
 
130
- # ──────────────────────────────────────────────
131
- # INIT MODELS
132
- # ──────────────────────────────────────────────
133
- print("⏳ Loading Sentence Transformer model...")
134
- embedder = SentenceTransformer(EMBED_MODEL)
135
- print("βœ… Sentence Transformer loaded.")
136
-
137
- print(f"⏳ Loading KittenTTS model: {TTS_MODEL_NAME}...")
138
- tts = KittenTTS(TTS_MODEL_NAME)
139
- print(f"βœ… KittenTTS loaded. Voice: {TTS_VOICE}")
140
-
141
- # Pre-compute knowledge base embeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
143
  kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
144
- print(f"βœ… Knowledge base embedded: {len(KNOWLEDGE_BASE)} entries")
145
-
146
- # ──────────────────────────────────────────────
147
- # CHAT MEMORY (in-memory, per-session)
148
- # ──────────────────────────────────────────────
149
- sessions = {} # session_id -> list of {role, content, timestamp}
150
 
 
 
 
 
151
 
152
- def get_memory(session_id):
153
- if session_id not in sessions:
154
- sessions[session_id] = []
155
- return sessions[session_id]
156
 
157
-
158
- def add_to_memory(session_id, role, content):
159
- memory = get_memory(session_id)
160
- memory.append({
161
- "role": role,
162
- "content": content,
163
- "timestamp": datetime.datetime.now().isoformat()
164
- })
165
- # Trim to max memory
166
- if len(memory) > MAX_MEMORY * 2:
167
- sessions[session_id] = memory[-(MAX_MEMORY * 2):]
168
 
169
 
170
- def format_memory_context(session_id):
171
- memory = get_memory(session_id)
172
- if not memory:
173
- return ""
174
- lines = []
175
- for msg in memory[-10:]: # Last 10 messages for context
176
- prefix = "User" if msg["role"] == "user" else "JARVIS"
177
- lines.append(f"{prefix}: {msg['content']}")
178
- return "\n".join(lines)
179
 
180
 
181
- # ──────────────────────────────────────────────
182
  # RESPONSE GENERATION
183
- # ──────────────────────────────────────────────
184
  def generate_response(user_input, session_id):
185
- """Generate a Jarvis-style response using semantic similarity."""
186
-
187
- # Encode user input
188
- user_embedding = embedder.encode(user_input, convert_to_tensor=True)
189
-
190
- # Compute similarity with knowledge base
191
- cosine_scores = util.cos_sim(user_embedding, kb_embeddings)[0]
192
- best_idx = int(cosine_scores.argmax())
193
- best_score = float(cosine_scores[best_idx])
194
-
195
- # Check conversation context for better responses
196
- memory_context = format_memory_context(session_id)
197
 
198
- # Determine response based on similarity threshold
199
  if best_score > 0.45:
200
  response = KNOWLEDGE_BASE[best_idx]["response"]
201
-
202
- # Add contextual awareness if there's memory
203
- if memory_context and best_score < 0.7:
204
- response = f"{response}"
205
  else:
206
- # Use fallback with some variation
207
- import hashlib
208
- hash_val = int(hashlib.md5(user_input.encode()).hexdigest(), 16)
209
- fallback_idx = hash_val % len(FALLBACK_RESPONSES)
210
- response = FALLBACK_RESPONSES[fallback_idx]
211
 
212
- # Store in memory
213
  add_to_memory(session_id, "user", user_input)
214
  add_to_memory(session_id, "assistant", response)
215
-
216
  return response, best_score
217
 
218
 
 
 
 
219
  def synthesize_speech(text):
220
- """Convert text to speech using KittenTTS, return base64 WAV."""
 
 
221
  try:
222
- # Clean text for TTS
223
- clean = re.sub(r'[*_~`#]', '', text) # Remove markdown
224
- clean = clean.strip()
225
-
226
- if not clean:
227
  return None
228
 
 
 
 
 
229
  audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
230
 
231
- # Convert to WAV in memory
232
- buffer = io.BytesIO()
233
- sf.write(buffer, audio, 24000, format='WAV')
234
- buffer.seek(0)
235
 
236
- # Encode to base64
237
- audio_b64 = base64.b64encode(buffer.read()).decode('utf-8')
238
- return audio_b64
 
239
  except Exception as e:
240
  print(f"TTS Error: {e}")
241
  return None
242
 
243
 
244
- # ──────────────────────────────────────────────
245
  # FLASK APP
246
- # ──────────────────────────────────────────────
247
  app = Flask(__name__)
248
 
249
 
@@ -252,47 +267,49 @@ def index():
252
  return render_template("index.html")
253
 
254
 
 
255
  @app.route("/chat", methods=["POST"])
256
  def chat():
257
- data = request.json
258
  user_input = data.get("message", "").strip()
259
  session_id = data.get("session_id", str(uuid.uuid4()))
260
- enable_tts = data.get("tts", True)
261
 
262
  if not user_input:
263
  return jsonify({"error": "Empty message"}), 400
264
 
265
- # Generate text response
266
  response, confidence = generate_response(user_input, session_id)
267
 
268
- # Generate audio
269
- audio_b64 = None
270
- if enable_tts:
271
- audio_b64 = synthesize_speech(response)
272
-
273
  return jsonify({
274
  "response": response,
275
- "audio": audio_b64,
276
  "confidence": round(confidence, 3),
277
  "session_id": session_id,
278
- "voice": TTS_VOICE,
279
  "memory_length": len(get_memory(session_id))
280
  })
281
 
282
 
283
- @app.route("/memory", methods=["POST"])
284
- def memory():
285
- data = request.json
286
- session_id = data.get("session_id", "")
287
- return jsonify({"memory": get_memory(session_id)})
 
 
 
 
 
 
 
 
 
288
 
289
 
290
  @app.route("/clear", methods=["POST"])
291
  def clear():
292
- data = request.json
293
- session_id = data.get("session_id", "")
294
- if session_id in sessions:
295
- del sessions[session_id]
296
  return jsonify({"status": "cleared"})
297
 
298
 
@@ -300,7 +317,7 @@ def clear():
300
  def health():
301
  return jsonify({
302
  "status": "online",
303
- "tts_model": TTS_MODEL_NAME,
304
  "tts_voice": TTS_VOICE,
305
  "embed_model": EMBED_MODEL,
306
  "knowledge_entries": len(KNOWLEDGE_BASE)
@@ -308,9 +325,4 @@ def health():
308
 
309
 
310
  if __name__ == "__main__":
311
- print("πŸš€ J.A.R.V.I.S. is online!")
312
- print(f" TTS Model : {TTS_MODEL_NAME}")
313
- print(f" TTS Voice : {TTS_VOICE}")
314
- print(f" Embedder : {EMBED_MODEL}")
315
- print(f" Knowledge : {len(KNOWLEDGE_BASE)} entries")
316
- app.run(host="0.0.0.0", port=7860)
 
2
  import io
3
  import re
4
  import uuid
5
+ import hashlib
6
  import base64
7
  import datetime
8
  import numpy as np
9
  import soundfile as sf
10
  from flask import Flask, render_template, request, jsonify
11
  from sentence_transformers import SentenceTransformer, util
12
+ from num2words import num2words
13
 
14
+ # ──────────────────────────────────────────
15
  # CONFIG
16
+ # ──────────────────────────────────────────
17
  TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
18
  TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
19
  TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
20
  EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
21
+ MAX_MEMORY = 20
22
 
23
+ # ──────────────────────────────────────────
24
+ # SYSTEM PROMPT
25
+ # ──────────────────────────────────────────
26
  SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
27
+ You speak in a polished, confident, and slightly formal British tone.
28
  You are helpful, precise, and occasionally add dry humor.
29
+ Keep responses concise β€” ideally 1-3 sentences unless more detail is requested."""
30
+
31
+ # ──────────────────────────────────────────
32
+ # KNOWLEDGE BASE
33
+ # ──────────────────────────────────────────
 
 
 
34
  KNOWLEDGE_BASE = [
35
  {
36
+ "text": "Python is a high-level interpreted programming language known for simplicity and readability.",
37
+ "response": "Python is a remarkably versatile language, sir. Clean syntax, extensive libraries, and the weapon of choice for everything from web development to artificial intelligence."
38
  },
39
  {
40
+ "text": "Machine learning is a subset of artificial intelligence that enables systems to learn from experience.",
41
+ "response": "Machine learning teaches machines to learn from data rather than following rigid instructions. Neural networks, decision trees, each with their own elegance."
42
  },
43
  {
44
+ "text": "Docker is a platform for developing shipping and running applications in containers.",
45
+ "response": "Docker containers are lightweight, portable environments that package your application with all dependencies. A perfectly sealed briefcase for your code, sir."
46
  },
47
  {
48
+ "text": "Hugging Face is a platform and community for machine learning offering models datasets and deployment tools.",
49
+ "response": "Hugging Face is the premier hub for the AI community, hosting thousands of pre-trained models and providing infrastructure like Spaces for deployment."
50
  },
51
  {
52
  "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
53
+ "response": "I am JARVIS, Just A Rather Very Intelligent System. Your personal AI assistant with semantic understanding and voice synthesis. At your service, always."
54
  },
55
  {
56
  "text": "Hello hi hey good morning good afternoon good evening greetings",
57
+ "response": "Good day! I am JARVIS, your AI assistant. How may I be of service today?"
58
  },
59
  {
60
  "text": "Thank you thanks appreciate it cheers",
61
+ "response": "You are most welcome. It is a pleasure to assist. Do let me know if there is anything else you need."
62
  },
63
  {
64
  "text": "What can you do? What are your capabilities? Help me understand what you do.",
65
+ "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history, and speak my responses aloud using neural text to speech."
66
  },
67
  {
68
  "text": "Tell me a joke. Make me laugh. Say something funny.",
69
+ "response": "Why do programmers prefer dark mode? Because light attracts bugs. I will see myself out, sir."
70
  },
71
  {
72
  "text": "What is the meaning of life? Philosophy existence purpose",
73
+ "response": "Forty two, according to Douglas Adams. But purpose is something we construct, not something we find. Quite liberating, really."
74
  },
75
  {
76
  "text": "Weather forecast temperature climate today",
77
+ "response": "I do not have real time weather data, as I operate entirely offline. I recommend checking a weather service for the latest forecast."
78
  },
79
  {
80
  "text": "Goodbye bye see you later farewell",
81
+ "response": "Until next time. It has been a pleasure assisting you. JARVIS will be here whenever you need me."
82
  },
83
  {
84
  "text": "How does text to speech work? TTS voice synthesis",
85
+ "response": "Text to speech converts written text into spoken audio using neural networks. I use Kitten TTS, a compact model that synthesizes speech entirely on CPU."
86
  },
87
  {
88
  "text": "What is an API? Application programming interface REST",
89
+ "response": "An API is a contract between software systems defining how they communicate. REST APIs use HTTP methods to manage resources. The lingua franca of modern software."
90
  },
91
  {
92
  "text": "Explain neural networks deep learning artificial intelligence",
93
+ "response": "Neural networks are architectures inspired by the human brain. Layers of nodes process information through backpropagation. Deep learning uses many layers for remarkable pattern recognition."
94
  },
95
  {
96
  "text": "What is JavaScript? Web development frontend programming",
97
+ "response": "JavaScript is the language of the web browser. It powers interactive frontends, and with Node it conquered the backend as well. It is absolutely everywhere."
98
  },
99
  {
100
  "text": "Tell me about space astronomy planets stars universe cosmos",
101
+ "response": "The universe is approximately thirteen point eight billion years old, containing over two trillion galaxies. The scale is, quite frankly, humbling."
102
  },
103
  {
104
  "text": "How do I learn to code? Programming beginner start",
105
+ "response": "Start with Python. It is forgiving, readable, and powerful. Begin with variables, loops, functions. Then build small projects. Code a little every day, sir."
106
  },
107
  {
108
  "text": "What is quantum computing? Qubits superposition",
109
+ "response": "Quantum computing leverages superposition and entanglement to process information in ways classical computers cannot. A qubit can be both zero and one simultaneously."
110
  },
111
  {
112
  "text": "Tell me about cybersecurity hacking security encryption",
113
+ "response": "Cybersecurity protects systems and data from digital attacks. Encryption, firewalls, multi factor authentication are your shields. Security is not optional, it is essential."
114
  },
115
  ]
116
 
 
 
 
117
  FALLBACK_RESPONSES = [
118
+ "Interesting query, though it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
119
+ "I am not entirely certain about that one. My knowledge does have its boundaries. Perhaps I can help with a related topic?",
120
+ "That is a challenging one. I lack a confident answer, but I am happy to reason through it with you.",
121
+ "I appreciate the question, but I lack sufficient data to give a proper answer. Shall we explore a different angle?",
122
  ]
123
 
124
+ # ──────────────────────────────────────────
125
+ # HELPER: Clean text for TTS
126
+ # ──────────────────────────────────────────
127
+ def clean_text_for_tts(text):
128
+ """Remove special chars and convert numbers to words for TTS."""
129
+ # Remove markdown-like formatting
130
+ text = re.sub(r'[*_~`#\[\]]', '', text)
131
+
132
+ # Convert numbers to words (KittenTTS bug with raw numbers)
133
+ def replace_number(match):
134
+ try:
135
+ return num2words(int(match.group()))
136
+ except Exception:
137
+ return match.group()
138
+
139
+ text = re.sub(r'\b\d+\b', replace_number, text)
140
+
141
+ # Clean up extra whitespace
142
+ text = re.sub(r'\s+', ' ', text).strip()
143
+ return text
144
+
145
+
146
+ # ──────────────────────────────────────────
147
+ # INIT MODELS (with error handling)
148
+ # ──────────────────────────────────────────
149
+ print("=" * 50)
150
+ print(" J.A.R.V.I.S. β€” Booting Systems")
151
+ print("=" * 50)
152
+
153
+ # Load Sentence Transformer
154
+ print("[1/3] Loading Sentence Transformer...")
155
+ try:
156
+ embedder = SentenceTransformer(EMBED_MODEL)
157
+ print(" βœ… Sentence Transformer loaded.")
158
+ except Exception as e:
159
+ print(f" ❌ Sentence Transformer FAILED: {e}")
160
+ raise
161
+
162
+ # Load KittenTTS
163
+ print(f"[2/3] Loading KittenTTS: {TTS_MODEL_NAME}...")
164
+ tts = None
165
+ try:
166
+ from kittentts import KittenTTS
167
+ tts = KittenTTS(TTS_MODEL_NAME)
168
+ # Test generation to verify it works
169
+ test_audio = tts.generate("test", voice=TTS_VOICE, speed=TTS_SPEED)
170
+ if test_audio is not None and len(test_audio) > 0:
171
+ print(f" βœ… KittenTTS loaded. Voice: {TTS_VOICE}")
172
+ else:
173
+ print(" ⚠️ KittenTTS loaded but test generation returned empty audio!")
174
+ tts = None
175
+ except Exception as e:
176
+ print(f" ⚠️ KittenTTS FAILED: {e}")
177
+ print(" ⚠️ Voice output will be DISABLED. Text chat will still work.")
178
+ tts = None
179
+
180
+ # Pre-compute KB embeddings
181
+ print("[3/3] Embedding knowledge base...")
182
  kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
183
  kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
184
+ print(f" βœ… {len(KNOWLEDGE_BASE)} entries embedded.")
185
+ print("=" * 50)
186
+ print(" All systems online!" if tts else " Online (TTS disabled)")
187
+ print("=" * 50)
 
 
188
 
189
+ # ──────────────────────────────────────────
190
+ # CHAT MEMORY
191
+ # ──────────────────────────────────────────
192
+ sessions = {}
193
 
 
 
 
 
194
 
195
+ def get_memory(sid):
196
+ if sid not in sessions:
197
+ sessions[sid] = []
198
+ return sessions[sid]
 
 
 
 
 
 
 
199
 
200
 
201
+ def add_to_memory(sid, role, content):
202
+ mem = get_memory(sid)
203
+ mem.append({"role": role, "content": content, "ts": datetime.datetime.now().isoformat()})
204
+ if len(mem) > MAX_MEMORY * 2:
205
+ sessions[sid] = mem[-(MAX_MEMORY * 2):]
 
 
 
 
206
 
207
 
208
+ # ──────────────────────────────────────────
209
  # RESPONSE GENERATION
210
+ # ��─────────────────────────────────────────
211
  def generate_response(user_input, session_id):
212
+ user_emb = embedder.encode(user_input, convert_to_tensor=True)
213
+ scores = util.cos_sim(user_emb, kb_embeddings)[0]
214
+ best_idx = int(scores.argmax())
215
+ best_score = float(scores[best_idx])
 
 
 
 
 
 
 
 
216
 
 
217
  if best_score > 0.45:
218
  response = KNOWLEDGE_BASE[best_idx]["response"]
 
 
 
 
219
  else:
220
+ h = int(hashlib.md5(user_input.encode()).hexdigest(), 16)
221
+ response = FALLBACK_RESPONSES[h % len(FALLBACK_RESPONSES)]
 
 
 
222
 
 
223
  add_to_memory(session_id, "user", user_input)
224
  add_to_memory(session_id, "assistant", response)
 
225
  return response, best_score
226
 
227
 
228
+ # ──────────────────────────────────────────
229
+ # TTS SYNTHESIS
230
+ # ──────────────────────────────────────────
231
  def synthesize_speech(text):
232
+ """Convert text to base64 WAV. Returns None on failure."""
233
+ if tts is None:
234
+ return None
235
  try:
236
+ clean = clean_text_for_tts(text)
237
+ if not clean or len(clean) < 2:
 
 
 
238
  return None
239
 
240
+ # Limit length to prevent long generation times on CPU
241
+ if len(clean) > 300:
242
+ clean = clean[:300]
243
+
244
  audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
245
 
246
+ if audio is None or len(audio) == 0:
247
+ print("TTS returned empty audio")
248
+ return None
 
249
 
250
+ buf = io.BytesIO()
251
+ sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
252
+ buf.seek(0)
253
+ return base64.b64encode(buf.read()).decode('utf-8')
254
  except Exception as e:
255
  print(f"TTS Error: {e}")
256
  return None
257
 
258
 
259
+ # ──────────────────────────────────────────
260
  # FLASK APP
261
+ # ──────────────────────────────────────────
262
  app = Flask(__name__)
263
 
264
 
 
267
  return render_template("index.html")
268
 
269
 
270
+ # βœ… ENDPOINT 1: Text-only chat (FAST β€” returns instantly)
271
  @app.route("/chat", methods=["POST"])
272
  def chat():
273
+ data = request.json or {}
274
  user_input = data.get("message", "").strip()
275
  session_id = data.get("session_id", str(uuid.uuid4()))
 
276
 
277
  if not user_input:
278
  return jsonify({"error": "Empty message"}), 400
279
 
 
280
  response, confidence = generate_response(user_input, session_id)
281
 
 
 
 
 
 
282
  return jsonify({
283
  "response": response,
 
284
  "confidence": round(confidence, 3),
285
  "session_id": session_id,
286
+ "tts_available": tts is not None,
287
  "memory_length": len(get_memory(session_id))
288
  })
289
 
290
 
291
+ # βœ… ENDPOINT 2: TTS generation (SEPARATE β€” fetched async by browser)
292
+ @app.route("/tts", methods=["POST"])
293
+ def tts_endpoint():
294
+ data = request.json or {}
295
+ text = data.get("text", "").strip()
296
+
297
+ if not text:
298
+ return jsonify({"error": "Empty text"}), 400
299
+
300
+ if tts is None:
301
+ return jsonify({"error": "TTS not available", "audio": None}), 200
302
+
303
+ audio_b64 = synthesize_speech(text)
304
+ return jsonify({"audio": audio_b64})
305
 
306
 
307
  @app.route("/clear", methods=["POST"])
308
  def clear():
309
+ data = request.json or {}
310
+ sid = data.get("session_id", "")
311
+ if sid in sessions:
312
+ del sessions[sid]
313
  return jsonify({"status": "cleared"})
314
 
315
 
 
317
  def health():
318
  return jsonify({
319
  "status": "online",
320
+ "tts_model": TTS_MODEL_NAME if tts else "DISABLED",
321
  "tts_voice": TTS_VOICE,
322
  "embed_model": EMBED_MODEL,
323
  "knowledge_entries": len(KNOWLEDGE_BASE)
 
325
 
326
 
327
  if __name__ == "__main__":
328
+ app.run(host="0.0.0.0", port=7860, threaded=True)