kcrobot102 commited on
Commit
de984f1
Β·
verified Β·
1 Parent(s): 971e4c9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +829 -0
app.py ADDED
@@ -0,0 +1,829 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify, Response, send_file
2
+ import torch
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import os
5
+ import logging
6
+ import io
7
+ import numpy as np
8
+ import scipy.io.wavfile as wavfile
9
+ import soundfile as sf
10
+ from pydub import AudioSegment
11
+ import time
12
+ from functools import lru_cache
13
+ import gc
14
+ import psutil
15
+ import threading
16
+ import time
17
+ from queue import Queue
18
+ import uuid
19
+ import subprocess
20
+ import tempfile
21
+ import atexit
22
+ import requests
23
+ from datetime import datetime
24
+ import json
25
+ import re
26
+
27
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
28
+ logger = logging.getLogger(__name__)
29
+
30
+ IS_HF_SPACE = os.environ.get('SPACE_ID') is not None
31
+ HF_TOKEN = os.environ.get('HF_TOKEN')
32
+
33
+ if IS_HF_SPACE:
34
+ device = "cpu"
35
+ torch.set_num_threads(2)
36
+ os.environ['TOKENIZERS_PARALLELISM'] = 'false'
37
+ logger.info("Running on Hugging Face Spaces - CPU optimized mode")
38
+ else:
39
+ device = "cuda" if torch.cuda.is_available() else "cpu"
40
+ torch.set_num_threads(4)
41
+
42
+ logger.info(f"Using device: {device}")
43
+
44
+ app = Flask(__name__)
45
+ app.config['TEMP_AUDIO_DIR'] = '/tmp/audio_responses'
46
+ app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
47
+
48
+ stt_pipeline = None
49
+ llm_model = None
50
+ llm_tokenizer = None
51
+ tts_pipeline = None
52
+ tts_type = None
53
+
54
+ active_files = {}
55
+ file_cleanup_lock = threading.Lock()
56
+ cleanup_thread = None
57
+
58
+ SEARCH_KEYWORDS = [
59
+ 'today', 'yesterday', 'current', 'latest', 'recent', 'news',
60
+ 'now', 'this year', '2025', '2024', 'weather', 'price',
61
+ 'who is', 'what is', 'when did', 'where is', 'how much'
62
+ ]
63
+
64
+ def cleanup_old_files():
65
+ while True:
66
+ try:
67
+ with file_cleanup_lock:
68
+ current_time = time.time()
69
+ files_to_remove = []
70
+
71
+ for file_id, file_info in list(active_files.items()):
72
+ if current_time - file_info['created_time'] > 300:
73
+ files_to_remove.append(file_id)
74
+
75
+ for file_id in files_to_remove:
76
+ try:
77
+ if os.path.exists(active_files[file_id]['filepath']):
78
+ os.remove(active_files[file_id]['filepath'])
79
+ del active_files[file_id]
80
+ logger.info(f"Cleaned up file: {file_id}")
81
+ except Exception as e:
82
+ logger.warning(f"Cleanup error for {file_id}: {e}")
83
+ except Exception as e:
84
+ logger.error(f"Cleanup thread error: {e}")
85
+
86
+ time.sleep(60)
87
+
88
+ def start_cleanup_thread():
89
+ global cleanup_thread
90
+ if cleanup_thread is None or not cleanup_thread.is_alive():
91
+ cleanup_thread = threading.Thread(target=cleanup_old_files, daemon=True)
92
+ cleanup_thread.start()
93
+ logger.info("Cleanup thread started")
94
+
95
+ def cleanup_all_files():
96
+ try:
97
+ with file_cleanup_lock:
98
+ for file_id, file_info in active_files.items():
99
+ try:
100
+ if os.path.exists(file_info['filepath']):
101
+ os.remove(file_info['filepath'])
102
+ except:
103
+ pass
104
+ active_files.clear()
105
+
106
+ if os.path.exists(app.config['TEMP_AUDIO_DIR']):
107
+ import shutil
108
+ shutil.rmtree(app.config['TEMP_AUDIO_DIR'], ignore_errors=True)
109
+
110
+ logger.info("All temporary files cleaned up")
111
+ except Exception as e:
112
+ logger.warning(f"Final cleanup error: {e}")
113
+
114
+ atexit.register(cleanup_all_files)
115
+
116
+ def get_memory_usage():
117
+ try:
118
+ process = psutil.Process(os.getpid())
119
+ memory_info = process.memory_info()
120
+ return {
121
+ "rss_mb": memory_info.rss / 1024 / 1024,
122
+ "vms_mb": memory_info.vms / 1024 / 1024,
123
+ "available_mb": psutil.virtual_memory().available / 1024 / 1024,
124
+ "percent": psutil.virtual_memory().percent
125
+ }
126
+ except Exception as e:
127
+ logger.warning(f"Memory info error: {e}")
128
+ return {"rss_mb": 0, "vms_mb": 0, "available_mb": 0, "percent": 0}
129
+
130
+ def needs_web_search(text):
131
+ text_lower = text.lower()
132
+
133
+ for keyword in SEARCH_KEYWORDS:
134
+ if keyword in text_lower:
135
+ logger.info(f"Web search triggered by keyword: '{keyword}'")
136
+ return True
137
+
138
+ if re.search(r'\b(202[0-9]|2030)\b', text):
139
+ logger.info("Web search triggered by year reference")
140
+ return True
141
+
142
+ return False
143
+
144
+ def search_web(query, max_results=3):
145
+ try:
146
+ logger.info(f"πŸ” Searching web for: '{query}'")
147
+
148
+ url = "https://api.duckduckgo.com/"
149
+ params = {
150
+ 'q': query,
151
+ 'format': 'json',
152
+ 'no_html': 1,
153
+ 'skip_disambig': 1
154
+ }
155
+
156
+ response = requests.get(url, params=params, timeout=5)
157
+
158
+ if response.status_code == 200:
159
+ data = response.json()
160
+
161
+ results = []
162
+
163
+ if data.get('Abstract'):
164
+ results.append({
165
+ 'title': data.get('Heading', 'General Info'),
166
+ 'snippet': data['Abstract'][:300]
167
+ })
168
+
169
+ if data.get('RelatedTopics'):
170
+ for topic in data['RelatedTopics'][:max_results]:
171
+ if isinstance(topic, dict) and topic.get('Text'):
172
+ results.append({
173
+ 'title': topic.get('FirstURL', '').split('/')[-1].replace('_', ' '),
174
+ 'snippet': topic['Text'][:200]
175
+ })
176
+
177
+ if not results:
178
+ wiki_query = f"{query} site:wikipedia.org"
179
+ results = search_fallback(wiki_query)
180
+
181
+ if results:
182
+ logger.info(f"βœ… Found {len(results)} web results")
183
+ return results
184
+ else:
185
+ logger.warning("No web results found")
186
+ return []
187
+
188
+ return []
189
+
190
+ except Exception as e:
191
+ logger.error(f"Web search error: {e}")
192
+ return []
193
+
194
+ def search_fallback(query):
195
+ try:
196
+ url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}"
197
+ headers = {
198
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
199
+ }
200
+
201
+ response = requests.get(url, headers=headers, timeout=5)
202
+
203
+ if response.status_code == 200:
204
+ text = response.text
205
+ snippets = []
206
+
207
+ import re
208
+ matches = re.findall(r'class="result__snippet"[^>]*>([^<]+)<', text)
209
+
210
+ for match in matches[:3]:
211
+ snippets.append({
212
+ 'title': 'Search Result',
213
+ 'snippet': match.strip()[:200]
214
+ })
215
+
216
+ return snippets
217
+
218
+ return []
219
+
220
+ except Exception as e:
221
+ logger.error(f"Fallback search error: {e}")
222
+ return []
223
+
224
+ def format_search_context(search_results):
225
+ if not search_results:
226
+ return ""
227
+
228
+ context = "\n\nWeb Search Results:\n"
229
+ for i, result in enumerate(search_results, 1):
230
+ context += f"{i}. {result['title']}: {result['snippet']}\n"
231
+
232
+ return context
233
+
234
+ def initialize_models():
235
+ global stt_pipeline, llm_model, llm_tokenizer, tts_pipeline, tts_type
236
+
237
+ try:
238
+ logger.info(f"Initial memory usage: {get_memory_usage()}")
239
+
240
+ if stt_pipeline is None:
241
+ logger.info("Loading Whisper-tiny STT model...")
242
+ try:
243
+ stt_pipeline = pipeline(
244
+ "automatic-speech-recognition",
245
+ model="openai/whisper-tiny",
246
+ device=device,
247
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
248
+ token=HF_TOKEN,
249
+ return_timestamps=False
250
+ )
251
+ logger.info("βœ… STT model loaded successfully")
252
+ except Exception as e:
253
+ logger.error(f"STT loading failed: {e}")
254
+ raise
255
+
256
+ gc.collect()
257
+ logger.info(f"STT loaded. Memory: {get_memory_usage()}")
258
+
259
+ if llm_model is None:
260
+ logger.info("Loading FLAN-T5 LLM...")
261
+ try:
262
+ model_name = "google/flan-t5-base"
263
+
264
+ llm_tokenizer = AutoTokenizer.from_pretrained(
265
+ model_name,
266
+ token=HF_TOKEN,
267
+ trust_remote_code=True
268
+ )
269
+
270
+ llm_model = AutoModelForSeq2SeqLM.from_pretrained(
271
+ model_name,
272
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
273
+ token=HF_TOKEN,
274
+ trust_remote_code=True
275
+ ).to(device)
276
+
277
+ if llm_tokenizer.pad_token is None:
278
+ llm_tokenizer.pad_token = llm_tokenizer.eos_token
279
+
280
+ logger.info("βœ… LLM model loaded successfully")
281
+ except Exception as e:
282
+ logger.error(f"LLM loading failed: {e}")
283
+ raise
284
+
285
+ gc.collect()
286
+ logger.info(f"LLM loaded. Memory: {get_memory_usage()}")
287
+
288
+ if tts_pipeline is None:
289
+ logger.info("Loading TTS model...")
290
+ tts_loaded = False
291
+
292
+ try:
293
+ from gtts import gTTS
294
+ tts_pipeline = "gtts"
295
+ tts_type = "gtts"
296
+ tts_loaded = True
297
+ logger.info("βœ… Using gTTS (Google Text-to-Speech)")
298
+ except ImportError:
299
+ logger.warning("gTTS not available")
300
+
301
+ if not tts_loaded:
302
+ tts_pipeline = "silent"
303
+ tts_type = "silent"
304
+ logger.warning("Using silent fallback for TTS")
305
+
306
+ gc.collect()
307
+ logger.info(f"TTS loaded. Memory: {get_memory_usage()}")
308
+
309
+ logger.info("πŸŽ‰ All models loaded successfully!")
310
+ start_cleanup_thread()
311
+
312
+ except Exception as e:
313
+ logger.error(f"❌ Model loading error: {e}")
314
+ logger.error(f"Memory usage at error: {get_memory_usage()}")
315
+ raise e
316
+
317
+ def generate_llm_response(text, search_context=""):
318
+ try:
319
+ if len(text) > 200:
320
+ text = text[:200]
321
+
322
+ if not text.strip():
323
+ return "I'm listening. How can I help you?"
324
+
325
+ if search_context:
326
+ prompt = f"Based on the following information, answer the question concisely.\n{search_context}\n\nQuestion: {text}\nAnswer:"
327
+ else:
328
+ prompt = f"Answer concisely: {text}"
329
+
330
+ inputs = llm_tokenizer(
331
+ prompt,
332
+ return_tensors="pt",
333
+ truncation=True,
334
+ padding=True,
335
+ max_length=512
336
+ )
337
+ input_ids = inputs["input_ids"].to(device)
338
+ attention_mask = inputs.get("attention_mask")
339
+ if attention_mask is not None:
340
+ attention_mask = attention_mask.to(device)
341
+
342
+ with torch.no_grad():
343
+ gen_kwargs = dict(
344
+ max_new_tokens=60,
345
+ do_sample=True,
346
+ temperature=0.7,
347
+ top_k=50,
348
+ top_p=0.9,
349
+ no_repeat_ngram_size=2,
350
+ early_stopping=True,
351
+ pad_token_id=llm_tokenizer.pad_token_id or llm_tokenizer.eos_token_id,
352
+ use_cache=True
353
+ )
354
+
355
+ outputs_ids = llm_model.generate(
356
+ input_ids=input_ids,
357
+ attention_mask=attention_mask,
358
+ **gen_kwargs
359
+ )
360
+
361
+ response = llm_tokenizer.decode(outputs_ids[0], skip_special_tokens=True)
362
+
363
+ del inputs, input_ids, attention_mask, outputs_ids
364
+ gc.collect()
365
+ if device == "cuda":
366
+ torch.cuda.empty_cache()
367
+
368
+ response = response.strip()
369
+
370
+ if not response or len(response) < 3:
371
+ if search_context:
372
+ return "I found some information but couldn't process it properly."
373
+ return "I understand. What else would you like to know?"
374
+
375
+ return response
376
+
377
+ except Exception as e:
378
+ logger.error(f"LLM generation error: {e}", exc_info=True)
379
+ return "I'm having trouble processing that. Could you try again?"
380
+
381
+ def preprocess_audio_optimized(audio_bytes):
382
+ try:
383
+ logger.info(f"Processing audio: {len(audio_bytes)} bytes")
384
+
385
+ if len(audio_bytes) > 44 and audio_bytes[:4] == b'RIFF':
386
+ audio_bytes = audio_bytes[44:]
387
+ logger.info("WAV header removed")
388
+
389
+ audio_data = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
390
+
391
+ max_samples = 30 * 16000
392
+ if len(audio_data) > max_samples:
393
+ audio_data = audio_data[:max_samples]
394
+ logger.info("Audio trimmed to 30 seconds")
395
+
396
+ min_samples = int(0.5 * 16000)
397
+ if len(audio_data) < min_samples:
398
+ logger.warning(f"Audio too short: {len(audio_data)/16000:.2f} seconds")
399
+ return None, None
400
+
401
+ logger.info(f"Audio processed: {len(audio_data)/16000:.2f} seconds")
402
+ return 16000, audio_data
403
+
404
+ except Exception as e:
405
+ logger.error(f"Audio preprocessing error: {e}")
406
+ raise e
407
+
408
+ def generate_tts_audio(text):
409
+ try:
410
+ text = text.replace('\n', ' ').strip()
411
+ if len(text) > 200:
412
+ text = text[:200] + "..."
413
+ if not text:
414
+ text = "I understand."
415
+
416
+ logger.info(f"TTS generating: '{text[:50]}...'")
417
+
418
+ if tts_type == "gtts":
419
+ from gtts import gTTS
420
+ from pydub import AudioSegment
421
+ import wave
422
+ import numpy as np
423
+
424
+ max_retries = 3
425
+ retry_delay = 2
426
+
427
+ for attempt in range(max_retries):
428
+ try:
429
+ with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp_mp3:
430
+ try:
431
+ tts = gTTS(text=text, lang='en', slow=False, timeout=10)
432
+ tts.save(tmp_mp3.name)
433
+
434
+ audio = AudioSegment.from_file(tmp_mp3.name, format="mp3")
435
+ audio = audio.normalize()
436
+ audio = audio.set_frame_rate(16000)
437
+ audio = audio.set_channels(1)
438
+ audio = audio.set_sample_width(2)
439
+ audio = audio.fade_in(50).fade_out(100)
440
+
441
+ raw_data = np.array(audio.get_array_of_samples(), dtype=np.int16)
442
+
443
+ wav_buffer = io.BytesIO()
444
+
445
+ with wave.open(wav_buffer, 'wb') as wav_file:
446
+ wav_file.setnchannels(1)
447
+ wav_file.setsampwidth(2)
448
+ wav_file.setframerate(16000)
449
+ wav_file.writeframes(raw_data.tobytes())
450
+
451
+ wav_data = wav_buffer.getvalue()
452
+
453
+ os.unlink(tmp_mp3.name)
454
+
455
+ if len(wav_data) < 1000:
456
+ raise ValueError(f"Audio too short: {len(wav_data)} bytes")
457
+
458
+ if wav_data[:4] != b'RIFF' or wav_data[8:12] != b'WAVE':
459
+ raise ValueError("Invalid WAV format")
460
+
461
+ logger.info(f"βœ“ Clean WAV generated: {len(wav_data)} bytes")
462
+
463
+ return wav_data
464
+
465
+ except Exception as e:
466
+ if os.path.exists(tmp_mp3.name):
467
+ os.unlink(tmp_mp3.name)
468
+ raise e
469
+
470
+ except Exception as e:
471
+ error_str = str(e)
472
+ if "429" in error_str or "Too Many Requests" in error_str:
473
+ if attempt < max_retries - 1:
474
+ logger.warning(f"TTS retry {attempt + 1}...")
475
+ time.sleep(retry_delay)
476
+ retry_delay *= 1.5
477
+ continue
478
+ logger.error(f"TTS error: {e}")
479
+ raise e
480
+
481
+ logger.warning("Using silent fallback")
482
+ import wave
483
+ import numpy as np
484
+
485
+ silence_samples = np.zeros(16000, dtype=np.int16)
486
+
487
+ wav_buffer = io.BytesIO()
488
+ with wave.open(wav_buffer, 'wb') as wav_file:
489
+ wav_file.setnchannels(1)
490
+ wav_file.setsampwidth(2)
491
+ wav_file.setframerate(16000)
492
+ wav_file.writeframes(silence_samples.tobytes())
493
+
494
+ return wav_buffer.getvalue()
495
+
496
+ except Exception as e:
497
+ logger.error(f"TTS critical error: {e}")
498
+ import wave
499
+ import numpy as np
500
+
501
+ silence_samples = np.zeros(8000, dtype=np.int16)
502
+
503
+ wav_buffer = io.BytesIO()
504
+ with wave.open(wav_buffer, 'wb') as wav_file:
505
+ wav_file.setnchannels(1)
506
+ wav_file.setsampwidth(2)
507
+ wav_file.setframerate(16000)
508
+ wav_file.writeframes(silence_samples.tobytes())
509
+
510
+ return wav_buffer.getvalue()
511
+
512
+ @app.route('/process_audio', methods=['POST'])
513
+ def process_audio():
514
+ start_time = time.time()
515
+
516
+ if not all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline]):
517
+ logger.error("Models not ready")
518
+ return jsonify({"error": "Models are still loading, please wait..."}), 503
519
+
520
+ if not request.data:
521
+ return jsonify({"error": "No audio data received"}), 400
522
+
523
+ if len(request.data) < 1000:
524
+ return jsonify({"error": "Audio data too small"}), 400
525
+
526
+ initial_memory = get_memory_usage()
527
+ logger.info(f"🎯 Processing started. Memory: {initial_memory['rss_mb']:.1f}MB")
528
+
529
+ try:
530
+ logger.info("🎀 Converting speech to text...")
531
+ stt_start = time.time()
532
+
533
+ rate, audio_data = preprocess_audio_optimized(request.data)
534
+
535
+ if audio_data is None:
536
+ return jsonify({"error": "Invalid or too short audio"}), 400
537
+
538
+ stt_result = stt_pipeline(
539
+ {"sampling_rate": rate, "raw": audio_data},
540
+ generate_kwargs={"language": "vi"}
541
+ )
542
+ transcribed_text = stt_result.get('text', '').strip()
543
+
544
+ del audio_data
545
+ gc.collect()
546
+
547
+ stt_time = time.time() - stt_start
548
+ logger.info(f"βœ… STT: '{transcribed_text}' ({stt_time:.2f}s)")
549
+
550
+ if not transcribed_text or len(transcribed_text) < 2:
551
+ transcribed_text = "Could you repeat that please?"
552
+
553
+ search_context = ""
554
+ web_search_used = False
555
+
556
+ if needs_web_search(transcribed_text):
557
+ search_start = time.time()
558
+ search_results = search_web(transcribed_text)
559
+
560
+ if search_results:
561
+ search_context = format_search_context(search_results)
562
+ web_search_used = True
563
+ logger.info(f"🌐 Web search completed ({time.time() - search_start:.2f}s)")
564
+ else:
565
+ logger.info("No relevant search results found")
566
+
567
+ logger.info("πŸ€– Generating AI response...")
568
+ llm_start = time.time()
569
+
570
+ assistant_response = generate_llm_response(transcribed_text, search_context)
571
+
572
+ llm_time = time.time() - llm_start
573
+ logger.info(f"βœ… LLM: '{assistant_response}' ({llm_time:.2f}s)")
574
+
575
+ logger.info("πŸ”Š Converting to speech...")
576
+ tts_start = time.time()
577
+
578
+ audio_response = generate_tts_audio(assistant_response)
579
+
580
+ if not audio_response or len(audio_response) < 1000:
581
+ logger.error("TTS produced invalid audio")
582
+ return jsonify({"error": "TTS generation failed"}), 500
583
+
584
+ tts_time = time.time() - tts_start
585
+
586
+ if not os.path.exists(app.config['TEMP_AUDIO_DIR']):
587
+ os.makedirs(app.config['TEMP_AUDIO_DIR'])
588
+
589
+ file_id = str(uuid.uuid4())
590
+ temp_filename = os.path.join(app.config['TEMP_AUDIO_DIR'], f"{file_id}.wav")
591
+
592
+ with open(temp_filename, 'wb') as f:
593
+ f.write(audio_response)
594
+ f.flush()
595
+ os.fsync(f.fileno())
596
+
597
+ if not os.path.exists(temp_filename):
598
+ logger.error("File write failed")
599
+ return jsonify({"error": "File save failed"}), 500
600
+
601
+ file_size = os.path.getsize(temp_filename)
602
+ logger.info(f"Audio saved: {file_id}.wav ({file_size} bytes)")
603
+
604
+ time.sleep(0.1)
605
+
606
+ with file_cleanup_lock:
607
+ active_files[file_id] = {
608
+ 'filepath': temp_filename,
609
+ 'created_time': time.time(),
610
+ 'accessed': False,
611
+ 'size': file_size
612
+ }
613
+
614
+ total_time = time.time() - start_time
615
+
616
+ response_data = {
617
+ 'status': 'success',
618
+ 'file_id': file_id,
619
+ 'stream_url': f'/stream_audio/{file_id}',
620
+ 'message': assistant_response,
621
+ 'transcribed': transcribed_text,
622
+ 'processing_time': round(total_time, 2),
623
+ 'audio_size': file_size,
624
+ 'web_search_used': web_search_used
625
+ }
626
+
627
+ logger.info(f"βœ… Complete: {file_id} ({total_time:.2f}s) [Web:{web_search_used}]")
628
+ return jsonify(response_data)
629
+
630
+ except Exception as e:
631
+ logger.error(f"❌ Processing error: {e}", exc_info=True)
632
+ gc.collect()
633
+ torch.cuda.empty_cache() if device == "cuda" else None
634
+
635
+ return jsonify({
636
+ "error": "Processing failed",
637
+ "details": str(e) if not IS_HF_SPACE else "Internal server error"
638
+ }), 500
639
+
640
+ @app.route('/stream_audio/<file_id>')
641
+ def stream_audio(file_id):
642
+ with file_cleanup_lock:
643
+ file_info = active_files.get(file_id)
644
+
645
+ if not file_info or not os.path.exists(file_info['filepath']):
646
+ logger.error(f"File not found: {file_id}")
647
+ return jsonify({'error': 'File not found or expired.'}), 404
648
+
649
+ filepath = file_info['filepath']
650
+ file_size = os.path.getsize(filepath)
651
+ logger.info(f"Streaming {file_id}: {file_size} bytes")
652
+
653
+ def generate():
654
+ try:
655
+ with open(filepath, 'rb') as f:
656
+ data = f.read()
657
+ chunk_size = 1024
658
+ for i in range(0, len(data), chunk_size):
659
+ chunk = data[i:i + chunk_size]
660
+ yield chunk
661
+ time.sleep(0.001)
662
+
663
+ logger.info(f"Stream {file_id} completed")
664
+ except Exception as e:
665
+ logger.error(f"Stream error: {e}")
666
+
667
+ response = Response(
668
+ generate(),
669
+ mimetype='audio/wav',
670
+ direct_passthrough=False
671
+ )
672
+
673
+ response.headers['Content-Length'] = str(file_size)
674
+ response.headers['Accept-Ranges'] = 'bytes'
675
+ response.headers['Cache-Control'] = 'no-cache'
676
+ response.headers['Connection'] = 'keep-alive'
677
+
678
+ return response
679
+
680
+ @app.route('/health', methods=['GET'])
681
+ def health_check():
682
+ memory = get_memory_usage()
683
+
684
+ status = {
685
+ "status": "ready" if all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline]) else "loading",
686
+ "models": {
687
+ "stt": stt_pipeline is not None,
688
+ "llm": llm_model is not None and llm_tokenizer is not None,
689
+ "tts": tts_pipeline is not None,
690
+ "tts_type": tts_type
691
+ },
692
+ "system": {
693
+ "device": device,
694
+ "is_hf_space": IS_HF_SPACE,
695
+ "memory_mb": round(memory['rss_mb'], 1),
696
+ "available_mb": round(memory['available_mb'], 1),
697
+ "memory_percent": round(memory['percent'], 1)
698
+ },
699
+ "files": {
700
+ "active_count": len(active_files),
701
+ "cleanup_running": cleanup_thread is not None and cleanup_thread.is_alive()
702
+ },
703
+ "features": {
704
+ "web_search": True,
705
+ "search_keywords": len(SEARCH_KEYWORDS)
706
+ }
707
+ }
708
+
709
+ return jsonify(status)
710
+
711
+ @app.route('/status', methods=['GET'])
712
+ def simple_status():
713
+ models_ready = all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline])
714
+ return jsonify({"ready": models_ready})
715
+
716
+ @app.route('/', methods=['GET'])
717
+ def home():
718
+ return """
719
+ <!DOCTYPE html>
720
+ <html>
721
+ <head>
722
+ <title>Voice AI Assistant with Web Search</title>
723
+ <style>
724
+ body { font-family: Arial, sans-serif; margin: 40px; }
725
+ .status { font-size: 18px; margin: 20px 0; }
726
+ .ready { color: green; }
727
+ .loading { color: orange; }
728
+ .error { color: red; }
729
+ code { background: #f4f4f4; padding: 2px 5px; }
730
+ .feature { background: #e8f5e9; padding: 10px; margin: 10px 0; border-radius: 5px; }
731
+ </style>
732
+ </head>
733
+ <body>
734
+ <h1>πŸŽ™οΈ Voice AI Assistant with Web Search</h1>
735
+ <div class="status">Status: <span id="status">Checking...</span></div>
736
+
737
+ <div class="feature">
738
+ <h3>🌐 New: Web Search Integration</h3>
739
+ <p>The assistant can now search the web for current information!</p>
740
+ <p><strong>Triggers:</strong> today, latest, news, current events, weather, prices, "who is", "what is", years (2024, 2025), etc.</p>
741
+ </div>
742
+
743
+ <h2>API Endpoints:</h2>
744
+ <ul>
745
+ <li><code>POST /process_audio</code> - Process audio with AI + Web Search</li>
746
+ <li><code>GET /stream_audio/&lt;file_id&gt;</code> - Stream audio response</li>
747
+ <li><code>GET /health</code> - Detailed health check</li>
748
+ <li><code>GET /status</code> - Simple ready status</li>
749
+ </ul>
750
+
751
+ <h2>Features:</h2>
752
+ <ul>
753
+ <li>βœ… Speech-to-Text (Whisper Tiny)</li>
754
+ <li>βœ… AI Response (FLAN-T5)</li>
755
+ <li>βœ… <strong>Web Search (DuckDuckGo)</strong></li>
756
+ <li>βœ… Text-to-Speech (gTTS)</li>
757
+ <li>βœ… Automatic file cleanup</li>
758
+ <li>βœ… Memory optimization</li>
759
+ </ul>
760
+
761
+ <h2>Example Questions:</h2>
762
+ <ul>
763
+ <li>"What's the weather like today?"</li>
764
+ <li>"Who is the current president?"</li>
765
+ <li>"What happened in 2024?"</li>
766
+ <li>"Tell me the latest news"</li>
767
+ <li>"What is the price of Bitcoin?"</li>
768
+ </ul>
769
+
770
+ <p><em>Optimized for ESP32 and Hugging Face Spaces</em></p>
771
+
772
+ <script>
773
+ function updateStatus() {
774
+ fetch('/status')
775
+ .then(r => r.json())
776
+ .then(d => {
777
+ const statusEl = document.getElementById('status');
778
+ if (d.ready) {
779
+ statusEl.textContent = 'βœ… Ready';
780
+ statusEl.className = 'ready';
781
+ } else {
782
+ statusEl.textContent = '⏳ Loading models...';
783
+ statusEl.className = 'loading';
784
+ }
785
+ })
786
+ .catch(() => {
787
+ document.getElementById('status').textContent = '❌ Error';
788
+ document.getElementById('status').className = 'error';
789
+ });
790
+ }
791
+
792
+ updateStatus();
793
+ setInterval(updateStatus, 5000);
794
+ </script>
795
+ </body>
796
+ </html>
797
+ """
798
+
799
+ @app.errorhandler(Exception)
800
+ def handle_exception(e):
801
+ logger.error(f"Unhandled exception: {e}", exc_info=True)
802
+ return jsonify({"error": "Internal server error"}), 500
803
+
804
+ @app.errorhandler(413)
805
+ def handle_large_file(e):
806
+ return jsonify({"error": "Audio file too large (max 16MB)"}), 413
807
+
808
+ if __name__ == '__main__':
809
+ try:
810
+ logger.info("πŸš€ Starting Voice AI Assistant Server with Web Search")
811
+ logger.info(f"Environment: {'Hugging Face Spaces' if IS_HF_SPACE else 'Local'}")
812
+
813
+ initialize_models()
814
+ logger.info("πŸŽ‰ Server ready!")
815
+
816
+ except Exception as e:
817
+ logger.error(f"❌ Startup failed: {e}")
818
+ exit(1)
819
+
820
+ port = int(os.environ.get('PORT', 7860))
821
+ logger.info(f"🌐 Server starting on port {port}")
822
+
823
+ app.run(
824
+ host='0.0.0.0',
825
+ port=port,
826
+ debug=False,
827
+ threaded=True,
828
+ use_reloader=False
829
+ )