Spaces:

kcrobot102
/

kc

Sleeping

App Files Files Community

kcrobot102 commited on Nov 11, 2025

Commit

de984f1

verified ·

1 Parent(s): 971e4c9

Create app.py

Browse files

Files changed (1) hide show

app.py +829 -0

app.py ADDED Viewed

	@@ -0,0 +1,829 @@

+from flask import Flask, request, jsonify, Response, send_file
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
+import os
+import logging
+import io
+import numpy as np
+import scipy.io.wavfile as wavfile
+import soundfile as sf
+from pydub import AudioSegment
+import time
+from functools import lru_cache
+import gc
+import psutil
+import threading
+import time
+from queue import Queue
+import uuid
+import subprocess
+import tempfile
+import atexit
+import requests
+from datetime import datetime
+import json
+import re
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+IS_HF_SPACE = os.environ.get('SPACE_ID') is not None
+HF_TOKEN = os.environ.get('HF_TOKEN')
+if IS_HF_SPACE:
+    device = "cpu"
+    torch.set_num_threads(2)
+    os.environ['TOKENIZERS_PARALLELISM'] = 'false'
+    logger.info("Running on Hugging Face Spaces - CPU optimized mode")
+else:
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    torch.set_num_threads(4)
+logger.info(f"Using device: {device}")
+app = Flask(__name__)
+app.config['TEMP_AUDIO_DIR'] = '/tmp/audio_responses'
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
+stt_pipeline = None
+llm_model = None
+llm_tokenizer = None
+tts_pipeline = None
+tts_type = None
+active_files = {}
+file_cleanup_lock = threading.Lock()
+cleanup_thread = None
+SEARCH_KEYWORDS = [
+    'today', 'yesterday', 'current', 'latest', 'recent', 'news',
+    'now', 'this year', '2025', '2024', 'weather', 'price',
+    'who is', 'what is', 'when did', 'where is', 'how much'
+]
+def cleanup_old_files():
+    while True:
+        try:
+            with file_cleanup_lock:
+                current_time = time.time()
+                files_to_remove = []
+                for file_id, file_info in list(active_files.items()):
+                    if current_time - file_info['created_time'] > 300:
+                        files_to_remove.append(file_id)
+                for file_id in files_to_remove:
+                    try:
+                        if os.path.exists(active_files[file_id]['filepath']):
+                            os.remove(active_files[file_id]['filepath'])
+                        del active_files[file_id]
+                        logger.info(f"Cleaned up file: {file_id}")
+                    except Exception as e:
+                        logger.warning(f"Cleanup error for {file_id}: {e}")
+        except Exception as e:
+            logger.error(f"Cleanup thread error: {e}")
+        time.sleep(60)
+def start_cleanup_thread():
+    global cleanup_thread
+    if cleanup_thread is None or not cleanup_thread.is_alive():
+        cleanup_thread = threading.Thread(target=cleanup_old_files, daemon=True)
+        cleanup_thread.start()
+        logger.info("Cleanup thread started")
+def cleanup_all_files():
+    try:
+        with file_cleanup_lock:
+            for file_id, file_info in active_files.items():
+                try:
+                    if os.path.exists(file_info['filepath']):
+                        os.remove(file_info['filepath'])
+                except:
+                    pass
+            active_files.clear()
+        if os.path.exists(app.config['TEMP_AUDIO_DIR']):
+            import shutil
+            shutil.rmtree(app.config['TEMP_AUDIO_DIR'], ignore_errors=True)
+        logger.info("All temporary files cleaned up")
+    except Exception as e:
+        logger.warning(f"Final cleanup error: {e}")
+atexit.register(cleanup_all_files)
+def get_memory_usage():
+    try:
+        process = psutil.Process(os.getpid())
+        memory_info = process.memory_info()
+        return {
+            "rss_mb": memory_info.rss / 1024 / 1024,
+            "vms_mb": memory_info.vms / 1024 / 1024,
+            "available_mb": psutil.virtual_memory().available / 1024 / 1024,
+            "percent": psutil.virtual_memory().percent
+        }
+    except Exception as e:
+        logger.warning(f"Memory info error: {e}")
+        return {"rss_mb": 0, "vms_mb": 0, "available_mb": 0, "percent": 0}
+def needs_web_search(text):
+    text_lower = text.lower()
+    for keyword in SEARCH_KEYWORDS:
+        if keyword in text_lower:
+            logger.info(f"Web search triggered by keyword: '{keyword}'")
+            return True
+    if re.search(r'\b(202[0-9]|2030)\b', text):
+        logger.info("Web search triggered by year reference")
+        return True
+    return False
+def search_web(query, max_results=3):
+    try:
+        logger.info(f"🔍 Searching web for: '{query}'")
+        url = "https://api.duckduckgo.com/"
+        params = {
+            'q': query,
+            'format': 'json',
+            'no_html': 1,
+            'skip_disambig': 1
+        }
+        response = requests.get(url, params=params, timeout=5)
+        if response.status_code == 200:
+            data = response.json()
+            results = []
+            if data.get('Abstract'):
+                results.append({
+                    'title': data.get('Heading', 'General Info'),
+                    'snippet': data['Abstract'][:300]
+                })
+            if data.get('RelatedTopics'):
+                for topic in data['RelatedTopics'][:max_results]:
+                    if isinstance(topic, dict) and topic.get('Text'):
+                        results.append({
+                            'title': topic.get('FirstURL', '').split('/')[-1].replace('_', ' '),
+                            'snippet': topic['Text'][:200]
+                        })
+            if not results:
+                wiki_query = f"{query} site:wikipedia.org"
+                results = search_fallback(wiki_query)
+            if results:
+                logger.info(f"✅ Found {len(results)} web results")
+                return results
+            else:
+                logger.warning("No web results found")
+                return []
+        return []
+    except Exception as e:
+        logger.error(f"Web search error: {e}")
+        return []
+def search_fallback(query):
+    try:
+        url = f"https://html.duckduckgo.com/html/?q={requests.utils.quote(query)}"
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
+        }
+        response = requests.get(url, headers=headers, timeout=5)
+        if response.status_code == 200:
+            text = response.text
+            snippets = []
+            import re
+            matches = re.findall(r'class="result__snippet"[^>]*>([^<]+)<', text)
+            for match in matches[:3]:
+                snippets.append({
+                    'title': 'Search Result',
+                    'snippet': match.strip()[:200]
+                })
+            return snippets
+        return []
+    except Exception as e:
+        logger.error(f"Fallback search error: {e}")
+        return []
+def format_search_context(search_results):
+    if not search_results:
+        return ""
+    context = "\n\nWeb Search Results:\n"
+    for i, result in enumerate(search_results, 1):
+        context += f"{i}. {result['title']}: {result['snippet']}\n"
+    return context
+def initialize_models():
+    global stt_pipeline, llm_model, llm_tokenizer, tts_pipeline, tts_type
+    try:
+        logger.info(f"Initial memory usage: {get_memory_usage()}")
+        if stt_pipeline is None:
+            logger.info("Loading Whisper-tiny STT model...")
+            try:
+                stt_pipeline = pipeline(
+                    "automatic-speech-recognition",
+                    model="openai/whisper-tiny",
+                    device=device,
+                    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+                    token=HF_TOKEN,
+                    return_timestamps=False
+                )
+                logger.info("✅ STT model loaded successfully")
+            except Exception as e:
+                logger.error(f"STT loading failed: {e}")
+                raise
+            gc.collect()
+            logger.info(f"STT loaded. Memory: {get_memory_usage()}")
+        if llm_model is None:
+            logger.info("Loading FLAN-T5 LLM...")
+            try:
+                model_name = "google/flan-t5-base"
+                llm_tokenizer = AutoTokenizer.from_pretrained(
+                    model_name,
+                    token=HF_TOKEN,
+                    trust_remote_code=True
+                )
+                llm_model = AutoModelForSeq2SeqLM.from_pretrained(
+                    model_name,
+                    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+                    token=HF_TOKEN,
+                    trust_remote_code=True
+                ).to(device)
+                if llm_tokenizer.pad_token is None:
+                    llm_tokenizer.pad_token = llm_tokenizer.eos_token
+                logger.info("✅ LLM model loaded successfully")
+            except Exception as e:
+                logger.error(f"LLM loading failed: {e}")
+                raise
+            gc.collect()
+            logger.info(f"LLM loaded. Memory: {get_memory_usage()}")
+        if tts_pipeline is None:
+            logger.info("Loading TTS model...")
+            tts_loaded = False
+            try:
+                from gtts import gTTS
+                tts_pipeline = "gtts"
+                tts_type = "gtts"
+                tts_loaded = True
+                logger.info("✅ Using gTTS (Google Text-to-Speech)")
+            except ImportError:
+                logger.warning("gTTS not available")
+            if not tts_loaded:
+                tts_pipeline = "silent"
+                tts_type = "silent"
+                logger.warning("Using silent fallback for TTS")
+            gc.collect()
+            logger.info(f"TTS loaded. Memory: {get_memory_usage()}")
+        logger.info("🎉 All models loaded successfully!")
+        start_cleanup_thread()
+    except Exception as e:
+        logger.error(f"❌ Model loading error: {e}")
+        logger.error(f"Memory usage at error: {get_memory_usage()}")
+        raise e
+def generate_llm_response(text, search_context=""):
+    try:
+        if len(text) > 200:
+            text = text[:200]
+        if not text.strip():
+            return "I'm listening. How can I help you?"
+        if search_context:
+            prompt = f"Based on the following information, answer the question concisely.\n{search_context}\n\nQuestion: {text}\nAnswer:"
+        else:
+            prompt = f"Answer concisely: {text}"
+        inputs = llm_tokenizer(
+            prompt,
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=512
+        )
+        input_ids = inputs["input_ids"].to(device)
+        attention_mask = inputs.get("attention_mask")
+        if attention_mask is not None:
+            attention_mask = attention_mask.to(device)
+        with torch.no_grad():
+            gen_kwargs = dict(
+                max_new_tokens=60,
+                do_sample=True,
+                temperature=0.7,
+                top_k=50,
+                top_p=0.9,
+                no_repeat_ngram_size=2,
+                early_stopping=True,
+                pad_token_id=llm_tokenizer.pad_token_id or llm_tokenizer.eos_token_id,
+                use_cache=True
+            )
+            outputs_ids = llm_model.generate(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                **gen_kwargs
+            )
+        response = llm_tokenizer.decode(outputs_ids[0], skip_special_tokens=True)
+        del inputs, input_ids, attention_mask, outputs_ids
+        gc.collect()
+        if device == "cuda":
+            torch.cuda.empty_cache()
+        response = response.strip()
+        if not response or len(response) < 3:
+            if search_context:
+                return "I found some information but couldn't process it properly."
+            return "I understand. What else would you like to know?"
+        return response
+    except Exception as e:
+        logger.error(f"LLM generation error: {e}", exc_info=True)
+        return "I'm having trouble processing that. Could you try again?"
+def preprocess_audio_optimized(audio_bytes):
+    try:
+        logger.info(f"Processing audio: {len(audio_bytes)} bytes")
+        if len(audio_bytes) > 44 and audio_bytes[:4] == b'RIFF':
+            audio_bytes = audio_bytes[44:]
+            logger.info("WAV header removed")
+        audio_data = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+        max_samples = 30 * 16000
+        if len(audio_data) > max_samples:
+            audio_data = audio_data[:max_samples]
+            logger.info("Audio trimmed to 30 seconds")
+        min_samples = int(0.5 * 16000)
+        if len(audio_data) < min_samples:
+            logger.warning(f"Audio too short: {len(audio_data)/16000:.2f} seconds")
+            return None, None
+        logger.info(f"Audio processed: {len(audio_data)/16000:.2f} seconds")
+        return 16000, audio_data
+    except Exception as e:
+        logger.error(f"Audio preprocessing error: {e}")
+        raise e
+def generate_tts_audio(text):
+    try:
+        text = text.replace('\n', ' ').strip()
+        if len(text) > 200:
+            text = text[:200] + "..."
+        if not text:
+            text = "I understand."
+        logger.info(f"TTS generating: '{text[:50]}...'")
+        if tts_type == "gtts":
+            from gtts import gTTS
+            from pydub import AudioSegment
+            import wave
+            import numpy as np
+            max_retries = 3
+            retry_delay = 2
+            for attempt in range(max_retries):
+                try:
+                    with tempfile.NamedTemporaryFile(suffix='.mp3', delete=False) as tmp_mp3:
+                        try:
+                            tts = gTTS(text=text, lang='en', slow=False, timeout=10)
+                            tts.save(tmp_mp3.name)
+                            audio = AudioSegment.from_file(tmp_mp3.name, format="mp3")
+                            audio = audio.normalize()
+                            audio = audio.set_frame_rate(16000)
+                            audio = audio.set_channels(1)
+                            audio = audio.set_sample_width(2)
+                            audio = audio.fade_in(50).fade_out(100)
+                            raw_data = np.array(audio.get_array_of_samples(), dtype=np.int16)
+                            wav_buffer = io.BytesIO()
+                            with wave.open(wav_buffer, 'wb') as wav_file:
+                                wav_file.setnchannels(1)
+                                wav_file.setsampwidth(2)
+                                wav_file.setframerate(16000)
+                                wav_file.writeframes(raw_data.tobytes())
+                            wav_data = wav_buffer.getvalue()
+                            os.unlink(tmp_mp3.name)
+                            if len(wav_data) < 1000:
+                                raise ValueError(f"Audio too short: {len(wav_data)} bytes")
+                            if wav_data[:4] != b'RIFF' or wav_data[8:12] != b'WAVE':
+                                raise ValueError("Invalid WAV format")
+                            logger.info(f"✓ Clean WAV generated: {len(wav_data)} bytes")
+                            return wav_data
+                        except Exception as e:
+                            if os.path.exists(tmp_mp3.name):
+                                os.unlink(tmp_mp3.name)
+                            raise e
+                except Exception as e:
+                    error_str = str(e)
+                    if "429" in error_str or "Too Many Requests" in error_str:
+                        if attempt < max_retries - 1:
+                            logger.warning(f"TTS retry {attempt + 1}...")
+                            time.sleep(retry_delay)
+                            retry_delay *= 1.5
+                            continue
+                    logger.error(f"TTS error: {e}")
+                    raise e
+        logger.warning("Using silent fallback")
+        import wave
+        import numpy as np
+        silence_samples = np.zeros(16000, dtype=np.int16)
+        wav_buffer = io.BytesIO()
+        with wave.open(wav_buffer, 'wb') as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(16000)
+            wav_file.writeframes(silence_samples.tobytes())
+        return wav_buffer.getvalue()
+    except Exception as e:
+        logger.error(f"TTS critical error: {e}")
+        import wave
+        import numpy as np
+        silence_samples = np.zeros(8000, dtype=np.int16)
+        wav_buffer = io.BytesIO()
+        with wave.open(wav_buffer, 'wb') as wav_file:
+            wav_file.setnchannels(1)
+            wav_file.setsampwidth(2)
+            wav_file.setframerate(16000)
+            wav_file.writeframes(silence_samples.tobytes())
+        return wav_buffer.getvalue()
+@app.route('/process_audio', methods=['POST'])
+def process_audio():
+    start_time = time.time()
+    if not all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline]):
+        logger.error("Models not ready")
+        return jsonify({"error": "Models are still loading, please wait..."}), 503
+    if not request.data:
+        return jsonify({"error": "No audio data received"}), 400
+    if len(request.data) < 1000:
+        return jsonify({"error": "Audio data too small"}), 400
+    initial_memory = get_memory_usage()
+    logger.info(f"🎯 Processing started. Memory: {initial_memory['rss_mb']:.1f}MB")
+    try:
+        logger.info("🎤 Converting speech to text...")
+        stt_start = time.time()
+        rate, audio_data = preprocess_audio_optimized(request.data)
+        if audio_data is None:
+            return jsonify({"error": "Invalid or too short audio"}), 400
+        stt_result = stt_pipeline(
+            {"sampling_rate": rate, "raw": audio_data},
+            generate_kwargs={"language": "vi"}
+        )
+        transcribed_text = stt_result.get('text', '').strip()
+        del audio_data
+        gc.collect()
+        stt_time = time.time() - stt_start
+        logger.info(f"✅ STT: '{transcribed_text}' ({stt_time:.2f}s)")
+        if not transcribed_text or len(transcribed_text) < 2:
+            transcribed_text = "Could you repeat that please?"
+        search_context = ""
+        web_search_used = False
+        if needs_web_search(transcribed_text):
+            search_start = time.time()
+            search_results = search_web(transcribed_text)
+            if search_results:
+                search_context = format_search_context(search_results)
+                web_search_used = True
+                logger.info(f"🌐 Web search completed ({time.time() - search_start:.2f}s)")
+            else:
+                logger.info("No relevant search results found")
+        logger.info("🤖 Generating AI response...")
+        llm_start = time.time()
+        assistant_response = generate_llm_response(transcribed_text, search_context)
+        llm_time = time.time() - llm_start
+        logger.info(f"✅ LLM: '{assistant_response}' ({llm_time:.2f}s)")
+        logger.info("🔊 Converting to speech...")
+        tts_start = time.time()
+        audio_response = generate_tts_audio(assistant_response)
+        if not audio_response or len(audio_response) < 1000:
+            logger.error("TTS produced invalid audio")
+            return jsonify({"error": "TTS generation failed"}), 500
+        tts_time = time.time() - tts_start
+        if not os.path.exists(app.config['TEMP_AUDIO_DIR']):
+            os.makedirs(app.config['TEMP_AUDIO_DIR'])
+        file_id = str(uuid.uuid4())
+        temp_filename = os.path.join(app.config['TEMP_AUDIO_DIR'], f"{file_id}.wav")
+        with open(temp_filename, 'wb') as f:
+            f.write(audio_response)
+            f.flush()
+            os.fsync(f.fileno())
+        if not os.path.exists(temp_filename):
+            logger.error("File write failed")
+            return jsonify({"error": "File save failed"}), 500
+        file_size = os.path.getsize(temp_filename)
+        logger.info(f"Audio saved: {file_id}.wav ({file_size} bytes)")
+        time.sleep(0.1)
+        with file_cleanup_lock:
+            active_files[file_id] = {
+                'filepath': temp_filename,
+                'created_time': time.time(),
+                'accessed': False,
+                'size': file_size
+            }
+        total_time = time.time() - start_time
+        response_data = {
+            'status': 'success',
+            'file_id': file_id,
+            'stream_url': f'/stream_audio/{file_id}',
+            'message': assistant_response,
+            'transcribed': transcribed_text,
+            'processing_time': round(total_time, 2),
+            'audio_size': file_size,
+            'web_search_used': web_search_used
+        }
+        logger.info(f"✅ Complete: {file_id} ({total_time:.2f}s) [Web:{web_search_used}]")
+        return jsonify(response_data)
+    except Exception as e:
+        logger.error(f"❌ Processing error: {e}", exc_info=True)
+        gc.collect()
+        torch.cuda.empty_cache() if device == "cuda" else None
+        return jsonify({
+            "error": "Processing failed",
+            "details": str(e) if not IS_HF_SPACE else "Internal server error"
+        }), 500
+@app.route('/stream_audio/<file_id>')
+def stream_audio(file_id):
+    with file_cleanup_lock:
+        file_info = active_files.get(file_id)
+    if not file_info or not os.path.exists(file_info['filepath']):
+        logger.error(f"File not found: {file_id}")
+        return jsonify({'error': 'File not found or expired.'}), 404
+    filepath = file_info['filepath']
+    file_size = os.path.getsize(filepath)
+    logger.info(f"Streaming {file_id}: {file_size} bytes")
+    def generate():
+        try:
+            with open(filepath, 'rb') as f:
+                data = f.read()
+                chunk_size = 1024
+                for i in range(0, len(data), chunk_size):
+                    chunk = data[i:i + chunk_size]
+                    yield chunk
+                    time.sleep(0.001)
+            logger.info(f"Stream {file_id} completed")
+        except Exception as e:
+            logger.error(f"Stream error: {e}")
+    response = Response(
+        generate(),
+        mimetype='audio/wav',
+        direct_passthrough=False
+    )
+    response.headers['Content-Length'] = str(file_size)
+    response.headers['Accept-Ranges'] = 'bytes'
+    response.headers['Cache-Control'] = 'no-cache'
+    response.headers['Connection'] = 'keep-alive'
+    return response
+@app.route('/health', methods=['GET'])
+def health_check():
+    memory = get_memory_usage()
+    status = {
+        "status": "ready" if all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline]) else "loading",
+        "models": {
+            "stt": stt_pipeline is not None,
+            "llm": llm_model is not None and llm_tokenizer is not None,
+            "tts": tts_pipeline is not None,
+            "tts_type": tts_type
+        },
+        "system": {
+            "device": device,
+            "is_hf_space": IS_HF_SPACE,
+            "memory_mb": round(memory['rss_mb'], 1),
+            "available_mb": round(memory['available_mb'], 1),
+            "memory_percent": round(memory['percent'], 1)
+        },
+        "files": {
+            "active_count": len(active_files),
+            "cleanup_running": cleanup_thread is not None and cleanup_thread.is_alive()
+        },
+        "features": {
+            "web_search": True,
+            "search_keywords": len(SEARCH_KEYWORDS)
+        }
+    }
+    return jsonify(status)
+@app.route('/status', methods=['GET'])
+def simple_status():
+    models_ready = all([stt_pipeline, llm_model, llm_tokenizer, tts_pipeline])
+    return jsonify({"ready": models_ready})
+@app.route('/', methods=['GET'])
+def home():
+    return """
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <title>Voice AI Assistant with Web Search</title>
+        <style>
+            body { font-family: Arial, sans-serif; margin: 40px; }
+            .status { font-size: 18px; margin: 20px 0; }
+            .ready { color: green; }
+            .loading { color: orange; }
+            .error { color: red; }
+            code { background: #f4f4f4; padding: 2px 5px; }
+            .feature { background: #e8f5e9; padding: 10px; margin: 10px 0; border-radius: 5px; }
+        </style>
+    </head>
+    <body>
+        <h1>🎙️ Voice AI Assistant with Web Search</h1>
+        <div class="status">Status: <span id="status">Checking...</span></div>
+        <div class="feature">
+            <h3>🌐 New: Web Search Integration</h3>
+            <p>The assistant can now search the web for current information!</p>
+            <p><strong>Triggers:</strong> today, latest, news, current events, weather, prices, "who is", "what is", years (2024, 2025), etc.</p>
+        </div>
+        <h2>API Endpoints:</h2>
+        <ul>
+            <li><code>POST /process_audio</code> - Process audio with AI + Web Search</li>
+            <li><code>GET /stream_audio/&lt;file_id&gt;</code> - Stream audio response</li>
+            <li><code>GET /health</code> - Detailed health check</li>
+            <li><code>GET /status</code> - Simple ready status</li>
+        </ul>
+        <h2>Features:</h2>
+        <ul>
+            <li>✅ Speech-to-Text (Whisper Tiny)</li>
+            <li>✅ AI Response (FLAN-T5)</li>
+            <li>✅ <strong>Web Search (DuckDuckGo)</strong></li>
+            <li>✅ Text-to-Speech (gTTS)</li>
+            <li>✅ Automatic file cleanup</li>
+            <li>✅ Memory optimization</li>
+        </ul>
+        <h2>Example Questions:</h2>
+        <ul>
+            <li>"What's the weather like today?"</li>
+            <li>"Who is the current president?"</li>
+            <li>"What happened in 2024?"</li>
+            <li>"Tell me the latest news"</li>
+            <li>"What is the price of Bitcoin?"</li>
+        </ul>
+        <p><em>Optimized for ESP32 and Hugging Face Spaces</em></p>
+        <script>
+        function updateStatus() {
+            fetch('/status')
+            .then(r => r.json())
+            .then(d => {
+                const statusEl = document.getElementById('status');
+                if (d.ready) {
+                    statusEl.textContent = '✅ Ready';
+                    statusEl.className = 'ready';
+                } else {
+                    statusEl.textContent = '⏳ Loading models...';
+                    statusEl.className = 'loading';
+                }
+            })
+            .catch(() => {
+                document.getElementById('status').textContent = '❌ Error';
+                document.getElementById('status').className = 'error';
+            });
+        }
+        updateStatus();
+        setInterval(updateStatus, 5000);
+        </script>
+    </body>
+    </html>
+    """
+@app.errorhandler(Exception)
+def handle_exception(e):
+    logger.error(f"Unhandled exception: {e}", exc_info=True)
+    return jsonify({"error": "Internal server error"}), 500
+@app.errorhandler(413)
+def handle_large_file(e):
+    return jsonify({"error": "Audio file too large (max 16MB)"}), 413
+if __name__ == '__main__':
+    try:
+        logger.info("🚀 Starting Voice AI Assistant Server with Web Search")
+        logger.info(f"Environment: {'Hugging Face Spaces' if IS_HF_SPACE else 'Local'}")
+        initialize_models()
+        logger.info("🎉 Server ready!")
+    except Exception as e:
+        logger.error(f"❌ Startup failed: {e}")
+        exit(1)
+    port = int(os.environ.get('PORT', 7860))
+    logger.info(f"🌐 Server starting on port {port}")
+    app.run(
+        host='0.0.0.0',
+        port=port,
+        debug=False,
+        threaded=True,
+        use_reloader=False
+    )