OrbitMC commited on
Commit
f7ae188
Β·
verified Β·
1 Parent(s): 8c4f45e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +221 -162
app.py CHANGED
@@ -2,213 +2,267 @@ import os
2
  import io
3
  import re
4
  import uuid
5
- import hashlib
6
  import base64
7
  import datetime
 
8
  import numpy as np
9
  import soundfile as sf
10
  from flask import Flask, render_template, request, jsonify
11
- from sentence_transformers import SentenceTransformer, util
12
  from num2words import num2words
 
13
 
14
  # ──────────────────────────────────────────
15
- # CONFIG
16
  # ──────────────────────────────────────────
17
- TTS_MODEL_NAME = os.environ.get("TTS_MODEL", "KittenML/kitten-tts-nano-0.8-fp32")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
19
  TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
20
- EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
21
  MAX_MEMORY = 20
 
22
 
23
  # ──────────────────────────────────────────
24
  # SYSTEM PROMPT
25
  # ──────────────────────────────────────────
26
  SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
27
- You speak in a polished, confident, and slightly formal British tone.
28
  You are helpful, precise, and occasionally add dry humor.
29
- Keep responses concise β€” ideally 1-3 sentences unless more detail is requested."""
 
 
 
30
 
31
  # ──────────────────────────────────────────
32
- # KNOWLEDGE BASE
33
  # ──────────────────────────────────────────
34
  KNOWLEDGE_BASE = [
35
- {
36
- "text": "Python is a high-level interpreted programming language known for simplicity and readability.",
37
- "response": "Python is a remarkably versatile language, sir. Clean syntax, extensive libraries, and the weapon of choice for everything from web development to artificial intelligence."
38
- },
39
- {
40
- "text": "Machine learning is a subset of artificial intelligence that enables systems to learn from experience.",
41
- "response": "Machine learning teaches machines to learn from data rather than following rigid instructions. Neural networks, decision trees, each with their own elegance."
42
- },
43
- {
44
- "text": "Docker is a platform for developing shipping and running applications in containers.",
45
- "response": "Docker containers are lightweight, portable environments that package your application with all dependencies. A perfectly sealed briefcase for your code, sir."
46
- },
47
- {
48
- "text": "Hugging Face is a platform and community for machine learning offering models datasets and deployment tools.",
49
- "response": "Hugging Face is the premier hub for the AI community, hosting thousands of pre-trained models and providing infrastructure like Spaces for deployment."
50
- },
51
- {
52
- "text": "What is your name? Who are you? Tell me about yourself. Introduce yourself.",
53
- "response": "I am JARVIS, Just A Rather Very Intelligent System. Your personal AI assistant with semantic understanding and voice synthesis. At your service, always."
54
- },
55
- {
56
- "text": "Hello hi hey good morning good afternoon good evening greetings",
57
- "response": "Good day! I am JARVIS, your AI assistant. How may I be of service today?"
58
- },
59
- {
60
- "text": "Thank you thanks appreciate it cheers",
61
- "response": "You are most welcome. It is a pleasure to assist. Do let me know if there is anything else you need."
62
- },
63
- {
64
- "text": "What can you do? What are your capabilities? Help me understand what you do.",
65
- "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history, and speak my responses aloud using neural text to speech."
66
- },
67
- {
68
- "text": "Tell me a joke. Make me laugh. Say something funny.",
69
- "response": "Why do programmers prefer dark mode? Because light attracts bugs. I will see myself out, sir."
70
- },
71
- {
72
- "text": "What is the meaning of life? Philosophy existence purpose",
73
- "response": "Forty two, according to Douglas Adams. But purpose is something we construct, not something we find. Quite liberating, really."
74
- },
75
- {
76
- "text": "Weather forecast temperature climate today",
77
- "response": "I do not have real time weather data, as I operate entirely offline. I recommend checking a weather service for the latest forecast."
78
- },
79
- {
80
- "text": "Goodbye bye see you later farewell",
81
- "response": "Until next time. It has been a pleasure assisting you. JARVIS will be here whenever you need me."
82
- },
83
- {
84
- "text": "How does text to speech work? TTS voice synthesis",
85
- "response": "Text to speech converts written text into spoken audio using neural networks. I use Kitten TTS, a compact model that synthesizes speech entirely on CPU."
86
- },
87
- {
88
- "text": "What is an API? Application programming interface REST",
89
- "response": "An API is a contract between software systems defining how they communicate. REST APIs use HTTP methods to manage resources. The lingua franca of modern software."
90
- },
91
- {
92
- "text": "Explain neural networks deep learning artificial intelligence",
93
- "response": "Neural networks are architectures inspired by the human brain. Layers of nodes process information through backpropagation. Deep learning uses many layers for remarkable pattern recognition."
94
- },
95
- {
96
- "text": "What is JavaScript? Web development frontend programming",
97
- "response": "JavaScript is the language of the web browser. It powers interactive frontends, and with Node it conquered the backend as well. It is absolutely everywhere."
98
- },
99
- {
100
- "text": "Tell me about space astronomy planets stars universe cosmos",
101
- "response": "The universe is approximately thirteen point eight billion years old, containing over two trillion galaxies. The scale is, quite frankly, humbling."
102
- },
103
- {
104
- "text": "How do I learn to code? Programming beginner start",
105
- "response": "Start with Python. It is forgiving, readable, and powerful. Begin with variables, loops, functions. Then build small projects. Code a little every day, sir."
106
- },
107
- {
108
- "text": "What is quantum computing? Qubits superposition",
109
- "response": "Quantum computing leverages superposition and entanglement to process information in ways classical computers cannot. A qubit can be both zero and one simultaneously."
110
- },
111
- {
112
- "text": "Tell me about cybersecurity hacking security encryption",
113
- "response": "Cybersecurity protects systems and data from digital attacks. Encryption, firewalls, multi factor authentication are your shields. Security is not optional, it is essential."
114
- },
115
  ]
116
 
117
  FALLBACK_RESPONSES = [
118
- "Interesting query, though it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
119
- "I am not entirely certain about that one. My knowledge does have its boundaries. Perhaps I can help with a related topic?",
120
- "That is a challenging one. I lack a confident answer, but I am happy to reason through it with you.",
121
- "I appreciate the question, but I lack sufficient data to give a proper answer. Shall we explore a different angle?",
122
  ]
123
 
124
  # ──────────────────────────────────────────
125
- # HELPER: Clean text for TTS
126
  # ──────────────────────────────────────────
127
  def clean_text_for_tts(text):
128
- """Remove special chars and convert numbers to words for TTS."""
129
- # Remove markdown-like formatting
130
- text = re.sub(r'[*_~`#\[\]]', '', text)
131
-
132
- # Convert numbers to words (KittenTTS bug with raw numbers)
133
  def replace_number(match):
134
  try:
135
  return num2words(int(match.group()))
136
  except Exception:
137
  return match.group()
138
-
139
  text = re.sub(r'\b\d+\b', replace_number, text)
140
-
141
- # Clean up extra whitespace
142
  text = re.sub(r'\s+', ' ', text).strip()
143
  return text
144
 
145
-
146
  # ──────────────────────────────────────────
147
- # INIT MODELS (with error handling)
148
  # ──────────────────────────────────────────
149
- print("=" * 50)
150
  print(" J.A.R.V.I.S. β€” Booting Systems")
151
- print("=" * 50)
152
 
153
- # Load Sentence Transformer
154
- print("[1/3] Loading Sentence Transformer...")
155
- try:
156
- embedder = SentenceTransformer(EMBED_MODEL)
157
- print(" βœ… Sentence Transformer loaded.")
158
- except Exception as e:
159
- print(f" ❌ Sentence Transformer FAILED: {e}")
160
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
- # Load KittenTTS
163
- print(f"[2/3] Loading KittenTTS: {TTS_MODEL_NAME}...")
 
 
 
 
 
 
 
 
 
 
 
164
  tts = None
 
 
165
  try:
166
  from kittentts import KittenTTS
167
- tts = KittenTTS(TTS_MODEL_NAME)
168
- # Test generation to verify it works
169
  test_audio = tts.generate("test", voice=TTS_VOICE, speed=TTS_SPEED)
170
  if test_audio is not None and len(test_audio) > 0:
171
- print(f" βœ… KittenTTS loaded. Voice: {TTS_VOICE}")
172
  else:
173
- print(" ⚠️ KittenTTS loaded but test generation returned empty audio!")
174
  tts = None
175
  except Exception as e:
176
  print(f" ⚠️ KittenTTS FAILED: {e}")
177
- print(" ⚠️ Voice output will be DISABLED. Text chat will still work.")
178
  tts = None
179
 
180
- # Pre-compute KB embeddings
181
- print("[3/3] Embedding knowledge base...")
182
- kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
183
- kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
184
- print(f" βœ… {len(KNOWLEDGE_BASE)} entries embedded.")
185
- print("=" * 50)
186
- print(" All systems online!" if tts else " Online (TTS disabled)")
187
- print("=" * 50)
188
 
189
  # ──────────────────────────────────────────
190
  # CHAT MEMORY
191
  # ──────────────────────────────────────────
192
  sessions = {}
193
 
194
-
195
  def get_memory(sid):
196
  if sid not in sessions:
197
  sessions[sid] = []
198
  return sessions[sid]
199
 
200
-
201
  def add_to_memory(sid, role, content):
202
  mem = get_memory(sid)
203
  mem.append({"role": role, "content": content, "ts": datetime.datetime.now().isoformat()})
204
  if len(mem) > MAX_MEMORY * 2:
205
  sessions[sid] = mem[-(MAX_MEMORY * 2):]
206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  # ──────────────────────────────────────────
209
- # RESPONSE GENERATION
210
  # ──────────────────────────────────────────
211
- def generate_response(user_input, session_id):
 
 
 
 
212
  user_emb = embedder.encode(user_input, convert_to_tensor=True)
213
  scores = util.cos_sim(user_emb, kb_embeddings)[0]
214
  best_idx = int(scores.argmax())
@@ -222,31 +276,33 @@ def generate_response(user_input, session_id):
222
 
223
  add_to_memory(session_id, "user", user_input)
224
  add_to_memory(session_id, "assistant", response)
225
- return response, best_score
226
 
 
 
 
 
 
 
 
 
227
 
228
  # ──────────────────────────────────────────
229
- # TTS SYNTHESIS
230
  # ──────────────────────────────────────────
231
- def synthesize_speech(text):
232
- """Convert text to base64 WAV. Returns None on failure."""
233
  if tts is None:
234
  return None
235
  try:
 
236
  clean = clean_text_for_tts(text)
237
  if not clean or len(clean) < 2:
238
  return None
239
-
240
- # Limit length to prevent long generation times on CPU
241
- if len(clean) > 300:
242
- clean = clean[:300]
243
-
244
- audio = tts.generate(clean, voice=TTS_VOICE, speed=TTS_SPEED)
245
-
246
  if audio is None or len(audio) == 0:
247
- print("TTS returned empty audio")
248
  return None
249
-
250
  buf = io.BytesIO()
251
  sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
252
  buf.seek(0)
@@ -255,19 +311,16 @@ def synthesize_speech(text):
255
  print(f"TTS Error: {e}")
256
  return None
257
 
258
-
259
  # ──────────────────────────────────────────
260
  # FLASK APP
261
  # ──────────────────────────────────────────
262
  app = Flask(__name__)
263
 
264
-
265
  @app.route("/")
266
  def index():
267
  return render_template("index.html")
268
 
269
-
270
- # βœ… ENDPOINT 1: Text-only chat (FAST β€” returns instantly)
271
  @app.route("/chat", methods=["POST"])
272
  def chat():
273
  data = request.json or {}
@@ -277,33 +330,38 @@ def chat():
277
  if not user_input:
278
  return jsonify({"error": "Empty message"}), 400
279
 
280
- response, confidence = generate_response(user_input, session_id)
 
 
 
 
 
281
 
282
  return jsonify({
283
  "response": response,
284
- "confidence": round(confidence, 3),
285
  "session_id": session_id,
 
286
  "tts_available": tts is not None,
287
- "memory_length": len(get_memory(session_id))
 
288
  })
289
 
290
-
291
- # βœ… ENDPOINT 2: TTS generation (SEPARATE β€” fetched async by browser)
292
  @app.route("/tts", methods=["POST"])
293
  def tts_endpoint():
294
  data = request.json or {}
295
  text = data.get("text", "").strip()
 
296
 
297
  if not text:
298
  return jsonify({"error": "Empty text"}), 400
299
-
300
  if tts is None:
301
  return jsonify({"error": "TTS not available", "audio": None}), 200
302
 
303
- audio_b64 = synthesize_speech(text)
304
  return jsonify({"audio": audio_b64})
305
 
306
-
307
  @app.route("/clear", methods=["POST"])
308
  def clear():
309
  data = request.json or {}
@@ -312,17 +370,18 @@ def clear():
312
  del sessions[sid]
313
  return jsonify({"status": "cleared"})
314
 
315
-
316
  @app.route("/health")
317
  def health():
318
  return jsonify({
319
  "status": "online",
320
- "tts_model": TTS_MODEL_NAME if tts else "DISABLED",
 
 
321
  "tts_voice": TTS_VOICE,
322
- "embed_model": EMBED_MODEL,
323
- "knowledge_entries": len(KNOWLEDGE_BASE)
324
  })
325
 
326
-
327
  if __name__ == "__main__":
328
  app.run(host="0.0.0.0", port=7860, threaded=True)
 
2
  import io
3
  import re
4
  import uuid
 
5
  import base64
6
  import datetime
7
+ import traceback
8
  import numpy as np
9
  import soundfile as sf
10
  from flask import Flask, render_template, request, jsonify
 
11
  from num2words import num2words
12
+ import torch
13
 
14
  # ──────────────────────────────────────────
15
+ # CONFIG β€” All switchable via env vars or UI
16
  # ──────────────────────────────────────────
17
+
18
+ # LLM Options:
19
+ # "gemma-3-270m-it" β†’ Real generative LLM (DEFAULT, best quality)
20
+ # "minilm-semantic" β†’ Sentence-Transformers semantic search fallback
21
+ LLM_MODE = os.environ.get("LLM_MODE", "gemma-3-270m-it")
22
+
23
+ # TTS Options:
24
+ # "nano-fp32" β†’ KittenTTS Nano 15M params, 56MB (DEFAULT, fastest)
25
+ # "nano-int8" β†’ KittenTTS Nano 15M params, 25MB (smallest, some issues reported)
26
+ # "micro" β†’ KittenTTS Micro 40M params, 41MB (better quality)
27
+ # "mini" β†’ KittenTTS Mini 80M params, 80MB (best quality, slowest)
28
+ TTS_MODE = os.environ.get("TTS_MODE", "nano-fp32")
29
+
30
+ TTS_MODEL_MAP = {
31
+ "nano-fp32": "KittenML/kitten-tts-nano-0.8-fp32",
32
+ "nano-int8": "KittenML/kitten-tts-nano-0.8-int8",
33
+ "micro": "KittenML/kitten-tts-micro-0.8",
34
+ "mini": "KittenML/kitten-tts-mini-0.8",
35
+ }
36
+
37
+ # Voice: Bella, Jasper, Luna, Bruno, Rosie, Hugo, Kiki, Leo
38
  TTS_VOICE = os.environ.get("TTS_VOICE", "Kiki")
39
  TTS_SPEED = float(os.environ.get("TTS_SPEED", "1.0"))
 
40
  MAX_MEMORY = 20
41
+ MAX_NEW_TOKENS = int(os.environ.get("MAX_NEW_TOKENS", "256"))
42
 
43
  # ──────────────────────────────────────────
44
  # SYSTEM PROMPT
45
  # ──────────────────────────────────────────
46
  SYSTEM_PROMPT = """You are J.A.R.V.I.S., an ultra-intelligent, witty, and loyal AI assistant.
47
+ You speak in a polished, confident, and slightly formal British tone β€” like a perfect digital butler.
48
  You are helpful, precise, and occasionally add dry humor.
49
+ You always address the user respectfully.
50
+ You have expertise in science, technology, coding, and general knowledge.
51
+ When unsure, you say so honestly but offer your best reasoning.
52
+ Keep responses concise but insightful β€” ideally 1-4 sentences unless more detail is requested."""
53
 
54
  # ──────────────────────────────────────────
55
+ # KNOWLEDGE BASE (for MiniLM fallback mode)
56
  # ──────────────────────────────────────────
57
  KNOWLEDGE_BASE = [
58
+ {"text": "Python programming language coding",
59
+ "response": "Python is a remarkably versatile language, sir. Clean syntax, extensive libraries, the weapon of choice for web development to artificial intelligence."},
60
+ {"text": "Machine learning AI artificial intelligence deep learning",
61
+ "response": "Machine learning teaches machines to learn from data rather than following rigid instructions. Neural networks, decision trees, each with their own elegance."},
62
+ {"text": "Docker containers deployment devops",
63
+ "response": "Docker containers are lightweight portable environments that package your application with all dependencies. A perfectly sealed briefcase for your code, sir."},
64
+ {"text": "What is your name who are you introduce yourself",
65
+ "response": "I am JARVIS, Just A Rather Very Intelligent System. Your personal AI assistant with semantic understanding and voice synthesis. At your service."},
66
+ {"text": "Hello hi hey good morning good afternoon greetings",
67
+ "response": "Good day! I am JARVIS, your AI assistant. How may I be of service today?"},
68
+ {"text": "Thank you thanks appreciate it",
69
+ "response": "You are most welcome. It is a pleasure to assist. Do let me know if there is anything else you need."},
70
+ {"text": "What can you do capabilities help features",
71
+ "response": "I can engage in intelligent conversation, answer questions across many domains, remember our chat history, and speak responses aloud using neural text to speech."},
72
+ {"text": "Tell me a joke funny humor",
73
+ "response": "Why do programmers prefer dark mode? Because light attracts bugs. I will see myself out, sir."},
74
+ {"text": "Goodbye bye farewell see you later",
75
+ "response": "Until next time. It has been a pleasure. JARVIS will be here whenever you need me."},
76
+ {"text": "Explain neural networks deep learning",
77
+ "response": "Neural networks are architectures inspired by the human brain. Layers of nodes process information through backpropagation enabling remarkable pattern recognition."},
78
+ {"text": "Space astronomy planets stars universe",
79
+ "response": "The universe is approximately thirteen point eight billion years old containing over two trillion galaxies. The scale is quite frankly humbling even for an AI."},
80
+ {"text": "How to learn programming coding beginner",
81
+ "response": "Start with Python. It is forgiving readable and powerful. Begin with variables loops functions. Then build small projects. Code a little every day sir."},
82
+ {"text": "Quantum computing qubits superposition",
83
+ "response": "Quantum computing leverages superposition and entanglement to process information in ways classical computers cannot. A qubit can be both zero and one simultaneously."},
84
+ {"text": "Cybersecurity hacking encryption security",
85
+ "response": "Cybersecurity protects systems and data from digital attacks. Encryption firewalls multi factor authentication are your shields. Security is essential not optional."},
86
+ {"text": "JavaScript web development frontend backend",
87
+ "response": "JavaScript is the language of the web browser sir. It powers interactive frontends and with Node it conquered the backend as well. It is absolutely everywhere."},
88
+ {"text": "Meaning of life philosophy purpose existence",
89
+ "response": "Forty two according to Douglas Adams. But purpose is something we construct not something we find. Quite liberating really."},
90
+ {"text": "Weather forecast temperature climate",
91
+ "response": "I do not have real time weather data as I operate entirely offline. I recommend checking a weather service for the latest forecast."},
92
+ {"text": "Text to speech TTS voice synthesis how",
93
+ "response": "Text to speech converts written text into spoken audio using neural networks. I use Kitten TTS a compact model that synthesizes speech entirely on CPU."},
94
+ {"text": "API application programming interface REST",
95
+ "response": "An API is a contract between software systems defining how they communicate. REST APIs use HTTP methods to manage resources. The lingua franca of modern software."},
96
+ {"text": "Hugging Face models datasets spaces",
97
+ "response": "Hugging Face is the premier hub for the AI community hosting thousands of pre-trained models and providing infrastructure like Spaces for deployment."},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  ]
99
 
100
  FALLBACK_RESPONSES = [
101
+ "Interesting query though it falls slightly outside my current knowledge base. Could you rephrase or ask something else?",
102
+ "I am not entirely certain about that one. Perhaps I can help with a related topic?",
103
+ "That is a challenging one. I lack a confident answer but I am happy to reason through it with you.",
104
+ "I appreciate the question but I lack sufficient data for a proper answer. Shall we explore a different angle?",
105
  ]
106
 
107
  # ──────────────────────────────────────────
108
+ # TEXT CLEANING FOR TTS
109
  # ──────────────────────────────────────────
110
  def clean_text_for_tts(text):
111
+ text = re.sub(r'[*_~`#\[\]{}()\\|<>]', '', text)
112
+ text = re.sub(r'https?://\S+', '', text)
 
 
 
113
  def replace_number(match):
114
  try:
115
  return num2words(int(match.group()))
116
  except Exception:
117
  return match.group()
 
118
  text = re.sub(r'\b\d+\b', replace_number, text)
 
 
119
  text = re.sub(r'\s+', ' ', text).strip()
120
  return text
121
 
 
122
  # ──────────────────────────────────────────
123
+ # MODEL LOADING
124
  # ──────────────────────────────────────────
125
+ print("=" * 55)
126
  print(" J.A.R.V.I.S. β€” Booting Systems")
127
+ print("=" * 55)
128
 
129
+ # ── LLM ──
130
+ gemma_model = None
131
+ gemma_tokenizer = None
132
+ embedder = None
133
+ kb_embeddings = None
134
+
135
+ if LLM_MODE == "gemma-3-270m-it":
136
+ print(f"[1/3] Loading Gemma 3 270M-IT (generative LLM)...")
137
+ try:
138
+ from transformers import AutoTokenizer, AutoModelForCausalLM
139
+ model_id = "google/gemma-3-270m-it"
140
+ gemma_tokenizer = AutoTokenizer.from_pretrained(model_id)
141
+ gemma_model = AutoModelForCausalLM.from_pretrained(
142
+ model_id,
143
+ torch_dtype=torch.float32,
144
+ device_map="cpu",
145
+ )
146
+ gemma_model.eval()
147
+ print(f" βœ… Gemma 3 270M-IT loaded successfully!")
148
+ except Exception as e:
149
+ print(f" ❌ Gemma 3 failed: {e}")
150
+ print(f" ⚠️ Falling back to MiniLM semantic search...")
151
+ LLM_MODE = "minilm-semantic"
152
 
153
+ if LLM_MODE == "minilm-semantic":
154
+ print(f"[1/3] Loading MiniLM-L6-v2 (semantic search fallback)...")
155
+ try:
156
+ from sentence_transformers import SentenceTransformer, util
157
+ embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
158
+ kb_texts = [item["text"] for item in KNOWLEDGE_BASE]
159
+ kb_embeddings = embedder.encode(kb_texts, convert_to_tensor=True)
160
+ print(f" βœ… MiniLM loaded. {len(KNOWLEDGE_BASE)} KB entries embedded.")
161
+ except Exception as e:
162
+ print(f" ❌ MiniLM also failed: {e}")
163
+ raise
164
+
165
+ # ── TTS ──
166
  tts = None
167
+ tts_model_name = TTS_MODEL_MAP.get(TTS_MODE, TTS_MODEL_MAP["nano-fp32"])
168
+ print(f"[2/3] Loading KittenTTS: {TTS_MODE} β†’ {tts_model_name}...")
169
  try:
170
  from kittentts import KittenTTS
171
+ tts = KittenTTS(tts_model_name)
 
172
  test_audio = tts.generate("test", voice=TTS_VOICE, speed=TTS_SPEED)
173
  if test_audio is not None and len(test_audio) > 0:
174
+ print(f" βœ… KittenTTS loaded. Model: {TTS_MODE}, Voice: {TTS_VOICE}")
175
  else:
176
+ print(" ⚠️ KittenTTS loaded but test returned empty audio!")
177
  tts = None
178
  except Exception as e:
179
  print(f" ⚠️ KittenTTS FAILED: {e}")
 
180
  tts = None
181
 
182
+ print(f"[3/3] All systems initialized.")
183
+ print("=" * 55)
184
+ print(f" LLM : {LLM_MODE}")
185
+ print(f" TTS : {TTS_MODE} ({'OK' if tts else 'DISABLED'})")
186
+ print(f" Voice: {TTS_VOICE}")
187
+ print("=" * 55)
 
 
188
 
189
  # ──────────────────────────────────────────
190
  # CHAT MEMORY
191
  # ──────────────────────────────────────────
192
  sessions = {}
193
 
 
194
  def get_memory(sid):
195
  if sid not in sessions:
196
  sessions[sid] = []
197
  return sessions[sid]
198
 
 
199
  def add_to_memory(sid, role, content):
200
  mem = get_memory(sid)
201
  mem.append({"role": role, "content": content, "ts": datetime.datetime.now().isoformat()})
202
  if len(mem) > MAX_MEMORY * 2:
203
  sessions[sid] = mem[-(MAX_MEMORY * 2):]
204
 
205
+ # ──────────────────────────────────────────
206
+ # RESPONSE GENERATION β€” GEMMA 3
207
+ # ──────────────────────────────────────────
208
+ def generate_gemma_response(user_input, session_id):
209
+ """Generate response using Gemma 3 270M-IT with chat template."""
210
+ memory = get_memory(session_id)
211
+
212
+ # Build conversation messages for Gemma's chat template
213
+ messages = [{"role": "user", "content": f"[System Instruction]\n{SYSTEM_PROMPT}"},
214
+ {"role": "assistant", "content": "Understood. I am JARVIS, at your service."}]
215
+
216
+ # Add recent memory (last 8 turns = 16 messages)
217
+ recent = memory[-(8 * 2):]
218
+ for msg in recent:
219
+ role = "user" if msg["role"] == "user" else "assistant"
220
+ messages.append({"role": role, "content": msg["content"]})
221
+
222
+ # Add current user message
223
+ messages.append({"role": "user", "content": user_input})
224
+
225
+ # Tokenize using Gemma's chat template
226
+ input_ids = gemma_tokenizer.apply_chat_template(
227
+ messages,
228
+ return_tensors="pt",
229
+ add_generation_prompt=True,
230
+ )
231
+
232
+ # Generate
233
+ with torch.no_grad():
234
+ outputs = gemma_model.generate(
235
+ input_ids,
236
+ max_new_tokens=MAX_NEW_TOKENS,
237
+ do_sample=True,
238
+ temperature=1.0,
239
+ top_k=64,
240
+ top_p=0.95,
241
+ )
242
+
243
+ # Decode only the new tokens
244
+ new_tokens = outputs[0][input_ids.shape[-1]:]
245
+ response = gemma_tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
246
+
247
+ # Clean up common artifacts
248
+ response = response.split("<end_of_turn>")[0].strip()
249
+ response = response.split("<start_of_turn>")[0].strip()
250
+
251
+ if not response or len(response) < 2:
252
+ response = "I appear to have momentarily lost my train of thought. Could you rephrase that, sir?"
253
+
254
+ add_to_memory(session_id, "user", user_input)
255
+ add_to_memory(session_id, "assistant", response)
256
+ return response
257
 
258
  # ──────────────────────────────────────────
259
+ # RESPONSE GENERATION β€” MINILM FALLBACK
260
  # ──────────────────────────────────────────
261
+ def generate_minilm_response(user_input, session_id):
262
+ """Semantic search fallback using MiniLM."""
263
+ from sentence_transformers import util
264
+ import hashlib
265
+
266
  user_emb = embedder.encode(user_input, convert_to_tensor=True)
267
  scores = util.cos_sim(user_emb, kb_embeddings)[0]
268
  best_idx = int(scores.argmax())
 
276
 
277
  add_to_memory(session_id, "user", user_input)
278
  add_to_memory(session_id, "assistant", response)
279
+ return response
280
 
281
+ # ──────────────────────────────────────────
282
+ # UNIFIED RESPONSE ROUTER
283
+ # ──────────────────────────────────────────
284
+ def generate_response(user_input, session_id):
285
+ if LLM_MODE == "gemma-3-270m-it" and gemma_model is not None:
286
+ return generate_gemma_response(user_input, session_id)
287
+ else:
288
+ return generate_minilm_response(user_input, session_id)
289
 
290
  # ──────────────────────────────────────────
291
+ # TTS
292
  # ──────────────────────────────────────────
293
+ def synthesize_speech(text, voice=None):
 
294
  if tts is None:
295
  return None
296
  try:
297
+ voice = voice or TTS_VOICE
298
  clean = clean_text_for_tts(text)
299
  if not clean or len(clean) < 2:
300
  return None
301
+ if len(clean) > 400:
302
+ clean = clean[:400]
303
+ audio = tts.generate(clean, voice=voice, speed=TTS_SPEED)
 
 
 
 
304
  if audio is None or len(audio) == 0:
 
305
  return None
 
306
  buf = io.BytesIO()
307
  sf.write(buf, audio, 24000, format='WAV', subtype='PCM_16')
308
  buf.seek(0)
 
311
  print(f"TTS Error: {e}")
312
  return None
313
 
 
314
  # ──────────────────────────────────────────
315
  # FLASK APP
316
  # ──────────────────────────────────────────
317
  app = Flask(__name__)
318
 
 
319
  @app.route("/")
320
  def index():
321
  return render_template("index.html")
322
 
323
+ # ── ENDPOINT 1: Chat (text only β€” fast) ──
 
324
  @app.route("/chat", methods=["POST"])
325
  def chat():
326
  data = request.json or {}
 
330
  if not user_input:
331
  return jsonify({"error": "Empty message"}), 400
332
 
333
+ try:
334
+ response = generate_response(user_input, session_id)
335
+ except Exception as e:
336
+ print(f"Generation error: {e}")
337
+ traceback.print_exc()
338
+ response = "I encountered a temporary system malfunction. Please try again."
339
 
340
  return jsonify({
341
  "response": response,
 
342
  "session_id": session_id,
343
+ "llm_mode": LLM_MODE,
344
  "tts_available": tts is not None,
345
+ "tts_mode": TTS_MODE,
346
+ "memory_length": len(get_memory(session_id)),
347
  })
348
 
349
+ # ── ENDPOINT 2: TTS (async, separate) ──
 
350
  @app.route("/tts", methods=["POST"])
351
  def tts_endpoint():
352
  data = request.json or {}
353
  text = data.get("text", "").strip()
354
+ voice = data.get("voice", TTS_VOICE)
355
 
356
  if not text:
357
  return jsonify({"error": "Empty text"}), 400
 
358
  if tts is None:
359
  return jsonify({"error": "TTS not available", "audio": None}), 200
360
 
361
+ audio_b64 = synthesize_speech(text, voice=voice)
362
  return jsonify({"audio": audio_b64})
363
 
364
+ # ── ENDPOINT 3: Clear memory ──
365
  @app.route("/clear", methods=["POST"])
366
  def clear():
367
  data = request.json or {}
 
370
  del sessions[sid]
371
  return jsonify({"status": "cleared"})
372
 
373
+ # ── ENDPOINT 4: Health / Config ──
374
  @app.route("/health")
375
  def health():
376
  return jsonify({
377
  "status": "online",
378
+ "llm_mode": LLM_MODE,
379
+ "tts_mode": TTS_MODE,
380
+ "tts_model": tts_model_name if tts else "DISABLED",
381
  "tts_voice": TTS_VOICE,
382
+ "tts_voices": ["Bella", "Jasper", "Luna", "Bruno", "Rosie", "Hugo", "Kiki", "Leo"],
383
+ "max_new_tokens": MAX_NEW_TOKENS,
384
  })
385
 
 
386
  if __name__ == "__main__":
387
  app.run(host="0.0.0.0", port=7860, threaded=True)