Spaces:

ChAbhishek28
/

PensionBot

Sleeping

App Files Files Community

ChAbhishek28 commited on Oct 2, 2025

Commit

dad5387

1 Parent(s): 1a4bf58

Enhance voice bot with comprehensive multi-language and intelligent query handling

Browse files

Files changed (1) hide show

enhanced_websocket_handler.py +321 -26

enhanced_websocket_handler.py CHANGED Viewed

@@ -26,6 +26,216 @@ hybrid_llm_service = HybridLLMService()
 logger = logging.getLogger("voicebot")
 async def handle_enhanced_websocket_connection(websocket: WebSocket):
     """Enhanced WebSocket handler with hybrid LLM and voice features"""
     await websocket.accept()
@@ -315,9 +525,27 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
             })
             return
-        # Extract user language preference
         user_language = data.get("lang") or data.get("language") or session_data.get("language") or session_data["user_preferences"].get("language") or "english"
-        logger.info(f"🌍 Processing voice with language: {user_language}")
         # Save to temporary file
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
@@ -345,7 +573,7 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
         else:
             # Use server-side ASR (Whisper)
             logger.info(f"🎤 Processing audio with language preference: {user_language}")
-            transcribed_text = await voice_service.speech_to_text(temp_file_path, user_language)
             # Clean up temp file
             Path(temp_file_path).unlink()
@@ -357,23 +585,31 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
                 })
                 return
-        logger.info(f"🎤 Transcribed ({user_language}): {transcribed_text}")
-        # Send transcription with detected language info
         await websocket.send_json({
             "type": "transcription",
             "text": transcribed_text,
             "language": user_language or "auto-detected",
-            "confidence": "high"  # Could be dynamic based on Whisper confidence
         })
-        # Add language context to the prompt for better responses
-        language_context = ""
-        if user_language and user_language.lower() in ['hindi', 'hi', 'hi-in']:
-            language_context = " (User is speaking in Hindi, so you may include Hindi terms where appropriate for government policies in India)"
-        elif user_language and user_language.lower() in ['hinglish']:
-            language_context = " (User is speaking in Hinglish - Hindi-English mix, so feel free to use both languages in your response)"
         enhanced_message = transcribed_text + language_context
         # Process as text message with language context
@@ -421,14 +657,7 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
         # Send voice response if enabled
         if session_data["user_preferences"]["response_mode"] in ["voice", "both"]:
             # Choose appropriate voice based on user's language
-            voice_preference = session_data["user_preferences"]["preferred_voice"]
-            if not voice_preference and user_language:
-                if user_language.lower() in ['hindi', 'hi', 'hi-in']:
-                    voice_preference = "hi-IN-SwaraNeural"  # Hindi female voice
-                elif user_language.lower() in ['english', 'en', 'en-in']:
-                    voice_preference = "en-IN-NeerjaNeural"  # Indian English female voice
-                else:
-                    voice_preference = "en-US-AriaNeural"  # Default English
             voice_text = voice_service.create_voice_response_with_guidance(
                 response_text,
@@ -454,13 +683,39 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
         })
 async def get_hybrid_response(user_message: str, context: str, config: dict, knowledge_base: str):
-    """Get response using hybrid LLM with document search (streaming)"""
     try:
         logger.info(f"🔍 Searching documents for: '{user_message}' in knowledge base: {knowledge_base}")
         from rag_service import search_documents_async
         docs = await search_documents_async(user_message, limit=3)
         logger.info(f"📊 Document search returned {len(docs) if docs else 0} results")
-        if docs:
             try:
                 from scenario_analysis_service import run_scenario_analysis
                 # Detect scenario analysis intent (simple keyword match)
@@ -532,11 +787,51 @@ async def get_hybrid_response(user_message: str, context: str, config: dict, kno
                 }
                 yield response_obj
         else:
-            logger.info("📚 No documents found, using existing context")
-            yield {"clause_text": context, "summary": "", "role_checklist": [], "source_title": "", "clause_id": "", "date": "", "url": "", "score": 1.0}
     except Exception as e:
-        logger.warning(f"❌ Document search failed: {e}, using existing context")
-        yield {"clause_text": context, "summary": "", "role_checklist": [], "source_title": "", "clause_id": "", "date": "", "url": "", "score": 1.0}
 async def send_text_response(websocket: WebSocket, response_text: str, provider_used: str, session_data: dict):
     """Send text response to client"""

 logger = logging.getLogger("voicebot")
+def analyze_query_context(query: str) -> dict:
+    """Analyze query to determine if it's document-related or general"""
+    query_lower = query.lower()
+    # Government document keywords
+    doc_keywords = [
+        'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
+        'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
+        'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
+        'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission'
+    ]
+    # General conversation keywords
+    general_keywords = [
+        'hello', 'hi', 'thank you', 'thanks', 'goodbye', 'bye', 'help',
+        'how are you', 'what is your name', 'who are you', 'weather',
+        'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
+    ]
+    # Count matches
+    doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
+    general_matches = sum(1 for kw in general_keywords if kw in query_lower)
+    # Determine query type
+    if doc_matches > 0:
+        query_type = "document_related"
+        confidence = min(doc_matches * 0.3, 1.0)
+    elif general_matches > 0:
+        query_type = "general_conversation"
+        confidence = min(general_matches * 0.4, 1.0)
+    elif len(query.strip().split()) < 3:
+        query_type = "general_conversation"  # Short queries likely general
+        confidence = 0.6
+    else:
+        query_type = "document_related"  # Default to document search for longer queries
+        confidence = 0.3
+    return {
+        "type": query_type,
+        "confidence": confidence,
+        "doc_keywords_found": doc_matches,
+        "general_keywords_found": general_matches
+    }
+async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
+    """Generate response using Groq/Gemini for out-of-context queries"""
+    try:
+        # Determine which LLM to use based on query complexity
+        provider = hybrid_llm_service.choose_llm_provider(user_message)
+        # Create appropriate system prompt based on query type
+        if query_context.get("type") == "general_conversation":
+            system_prompt = """You are a helpful assistant for a government document system.
+            The user is asking a general question not related to government documents.
+            Provide a friendly, helpful response and gently guide them to ask about government policies,
+            pension rules, leave policies, or other administrative matters if they need official information."""
+        else:
+            system_prompt = """You are an AI assistant for government document queries.
+            The user asked about something that wasn't found in the document database.
+            Provide helpful general information if you can, but always remind them that for official
+            government policies and procedures, they should consult official sources or contact
+            the relevant government office. Keep responses concise and professional."""
+        # Generate response using hybrid LLM service
+        if provider:
+            response = await hybrid_llm_service.generate_response(
+                user_message,
+                system_prompt=system_prompt,
+                provider=provider
+            )
+            logger.info(f"✅ Generated LLM fallback response using {provider.value}")
+            return response
+        else:
+            logger.warning("⚠️ No LLM provider available")
+            return "I understand your question, but I'm currently unable to access my AI capabilities. Please try again later or contact the relevant government office for official information."
+    except Exception as e:
+        logger.error(f"❌ Error generating LLM fallback response: {e}")
+        return f"I apologize, but I encountered an error while processing your query: '{user_message}'. Please try rephrasing your question or contact the relevant authorities for assistance."
+def validate_transcription_quality(text: str, language: str) -> dict:
+    """Validate transcription quality and provide suggestions"""
+    if not text or not text.strip():
+        return {
+            "score": 0.0,
+            "level": "very_low",
+            "suggestions": ["No speech detected", "Check microphone", "Speak closer to microphone"]
+        }
+    text_clean = text.strip()
+    # Quality indicators
+    word_count = len(text_clean.split())
+    avg_word_length = sum(len(word) for word in text_clean.split()) / max(word_count, 1)
+    has_meaningful_words = any(len(word) > 2 for word in text_clean.split())
+    # Language-specific checks
+    if language in ['en', 'hi-en']:
+        # Check for common English/Hinglish patterns
+        common_words = ['the', 'and', 'is', 'in', 'to', 'of', 'for', 'with', 'on', 'pension', 'government']
+        has_common_words = any(word.lower() in common_words for word in text_clean.split())
+    else:
+        has_common_words = True  # Assume valid for other languages
+    # Calculate quality score
+    score = 0.0
+    if word_count > 0:
+        score += 0.3
+    if word_count >= 3:
+        score += 0.2
+    if avg_word_length > 2:
+        score += 0.2
+    if has_meaningful_words:
+        score += 0.2
+    if has_common_words:
+        score += 0.1
+    # Penalize very short or nonsensical text
+    if word_count < 2 or avg_word_length < 2:
+        score *= 0.5
+    # Determine quality level
+    if score >= 0.8:
+        level = "high"
+        suggestions = []
+    elif score >= 0.5:
+        level = "medium"
+        suggestions = ["Speak a bit more clearly for better recognition"]
+    elif score >= 0.3:
+        level = "low"
+        suggestions = ["Speak more clearly", "Reduce background noise", "Speak closer to microphone"]
+    else:
+        level = "very_low"
+        suggestions = ["Audio unclear", "Check microphone", "Reduce noise", "Speak more slowly"]
+    return {
+        "score": score,
+        "level": level,
+        "suggestions": suggestions
+    }
+def create_language_context(user_language: str, normalized_language: str) -> str:
+    """Create appropriate language context for LLM responses"""
+    if not user_language:
+        return ""
+    lang_lower = user_language.lower()
+    if lang_lower in ['hindi', 'hi', 'hi-in']:
+        return " (User is speaking in Hindi. You may include relevant Hindi terms for government policies in India, especially for technical terms like 'सरकारी नीति', 'पेंशन', 'भत्ता' etc.)"
+    elif lang_lower in ['hinglish', 'hi-en']:
+        return " (User is speaking in Hinglish - Hindi-English mix. Feel free to use both languages naturally in your response, especially for government terminology.)"
+    elif lang_lower in ['spanish', 'es']:
+        return " (User is speaking in Spanish. Respond in Spanish if possible, or provide translations for key terms.)"
+    elif lang_lower in ['french', 'fr']:
+        return " (User is speaking in French. Respond in French if possible, or provide translations for key terms.)"
+    elif lang_lower in ['arabic', 'ar']:
+        return " (User is speaking in Arabic. Respond in Arabic if possible, or provide translations for key terms.)"
+    elif lang_lower in ['chinese', 'zh']:
+        return " (User is speaking in Chinese. Respond in Chinese if possible, or provide translations for key terms.)"
+    elif lang_lower in ['japanese', 'ja']:
+        return " (User is speaking in Japanese. Respond in Japanese if possible, or provide translations for key terms.)"
+    elif lang_lower in ['english', 'en', 'en-us', 'en-in']:
+        return " (User is speaking in English. Provide clear, professional responses.)"
+    else:
+        return f" (User language preference: {user_language}. Adapt response accordingly if possible.)"
+def select_voice_for_language(user_language: str, preferred_voice: str = None) -> str:
+    """Select appropriate TTS voice based on user's language"""
+    if preferred_voice:
+        return preferred_voice
+    if not user_language:
+        return "en-US-AriaNeural"  # Default
+    lang_lower = user_language.lower()
+    # Voice mapping for different languages
+    voice_map = {
+        'hindi': 'hi-IN-SwaraNeural',
+        'hi': 'hi-IN-SwaraNeural',
+        'hi-in': 'hi-IN-SwaraNeural',
+        'hinglish': 'en-IN-NeerjaNeural',  # Indian English for Hinglish
+        'hi-en': 'en-IN-NeerjaNeural',
+        'english': 'en-US-AriaNeural',
+        'en': 'en-US-AriaNeural',
+        'en-us': 'en-US-AriaNeural',
+        'en-in': 'en-IN-NeerjaNeural',
+        'spanish': 'es-ES-ElviraNeural',
+        'es': 'es-ES-ElviraNeural',
+        'french': 'fr-FR-DeniseNeural',
+        'fr': 'fr-FR-DeniseNeural',
+        'german': 'de-DE-KatjaNeural',
+        'de': 'de-DE-KatjaNeural',
+        'portuguese': 'pt-BR-FranciscaNeural',
+        'pt': 'pt-BR-FranciscaNeural',
+        'italian': 'it-IT-ElsaNeural',
+        'it': 'it-IT-ElsaNeural',
+        'russian': 'ru-RU-SvetlanaNeural',
+        'ru': 'ru-RU-SvetlanaNeural',
+        'chinese': 'zh-CN-XiaoxiaoNeural',
+        'zh': 'zh-CN-XiaoxiaoNeural',
+        'japanese': 'ja-JP-NanamiNeural',
+        'ja': 'ja-JP-NanamiNeural',
+        'arabic': 'ar-SA-ZariyahNeural',
+        'ar': 'ar-SA-ZariyahNeural'
+    }
+    return voice_map.get(lang_lower, 'en-US-AriaNeural')
 async def handle_enhanced_websocket_connection(websocket: WebSocket):
     """Enhanced WebSocket handler with hybrid LLM and voice features"""
     await websocket.accept()
             })
             return
+        # Extract and validate user language preference
         user_language = data.get("lang") or data.get("language") or session_data.get("language") or session_data["user_preferences"].get("language") or "english"
+        # Normalize language codes
+        language_map = {
+            'english': 'en', 'en': 'en', 'en-us': 'en', 'en-in': 'en',
+            'hindi': 'hi', 'hi': 'hi', 'hi-in': 'hi',
+            'hinglish': 'hi-en', 'hi-en': 'hi-en',
+            'spanish': 'es', 'es': 'es',
+            'french': 'fr', 'fr': 'fr',
+            'german': 'de', 'de': 'de',
+            'portuguese': 'pt', 'pt': 'pt',
+            'italian': 'it', 'it': 'it',
+            'russian': 'ru', 'ru': 'ru',
+            'chinese': 'zh', 'zh': 'zh',
+            'japanese': 'ja', 'ja': 'ja',
+            'arabic': 'ar', 'ar': 'ar'
+        }
+        normalized_language = language_map.get(user_language.lower(), 'en')
+        logger.info(f"🌍 Processing voice with language: {user_language} (normalized: {normalized_language})")
         # Save to temporary file
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
         else:
             # Use server-side ASR (Whisper)
             logger.info(f"🎤 Processing audio with language preference: {user_language}")
+            transcribed_text = await voice_service.speech_to_text(temp_file_path, normalized_language)
             # Clean up temp file
             Path(temp_file_path).unlink()
                 })
                 return
+        # Validate transcription quality
+        transcription_quality = validate_transcription_quality(transcribed_text, normalized_language)
+        logger.info(f"🎤 Transcribed ({user_language}): {transcribed_text} | Quality: {transcription_quality['score']:.2f}")
+        # Send transcription with quality info
         await websocket.send_json({
             "type": "transcription",
             "text": transcribed_text,
             "language": user_language or "auto-detected",
+            "confidence": transcription_quality['level'],
+            "quality_score": transcription_quality['score'],
+            "suggestions": transcription_quality['suggestions']
         })
+        # Handle low-quality transcription
+        if transcription_quality['score'] < 0.3:
+            await websocket.send_json({
+                "type": "transcription_warning",
+                "message": "The audio quality seems low. Please speak clearly and try again.",
+                "suggestions": transcription_quality['suggestions']
+            })
+            return
+        # Add comprehensive language context to the prompt for better responses
+        language_context = create_language_context(user_language, normalized_language)
         enhanced_message = transcribed_text + language_context
         # Process as text message with language context
         # Send voice response if enabled
         if session_data["user_preferences"]["response_mode"] in ["voice", "both"]:
             # Choose appropriate voice based on user's language
+            voice_preference = select_voice_for_language(user_language, session_data["user_preferences"]["preferred_voice"])
             voice_text = voice_service.create_voice_response_with_guidance(
                 response_text,
         })
 async def get_hybrid_response(user_message: str, context: str, config: dict, knowledge_base: str):
+    """Get response using hybrid LLM with intelligent document search and fallback (streaming)"""
     try:
+        # First, determine if this is a government document query or general query
+        query_context = analyze_query_context(user_message)
+        logger.info(f"🔍 Query analysis: {query_context}")
         logger.info(f"🔍 Searching documents for: '{user_message}' in knowledge base: {knowledge_base}")
         from rag_service import search_documents_async
         docs = await search_documents_async(user_message, limit=3)
         logger.info(f"📊 Document search returned {len(docs) if docs else 0} results")
+        # Check if we have relevant documents
+        has_relevant_docs = docs and any(doc.get("score", 0) > 0.5 for doc in docs)
+        # For general conversation queries, use LLM even if we have some documents
+        if query_context.get("type") == "general_conversation" and query_context.get("confidence", 0) > 0.6:
+            logger.info("📱 Detected general conversation, using LLM directly")
+            llm_response = await generate_llm_fallback_response(user_message, query_context)
+            yield {
+                "clause_text": llm_response,
+                "summary": "AI-generated response for general conversation",
+                "role_checklist": ["This is general information", "For official queries, ask about government policies"],
+                "source_title": "AI Assistant",
+                "clause_id": "AI_GENERAL",
+                "date": "2024",
+                "url": "",
+                "score": 0.9,
+                "scenario_analysis": None,
+                "charts": []
+            }
+            return
+        if has_relevant_docs:
             try:
                 from scenario_analysis_service import run_scenario_analysis
                 # Detect scenario analysis intent (simple keyword match)
                 }
                 yield response_obj
         else:
+            # No relevant documents found - use LLM fallback
+            logger.info("📚 No relevant documents found, using LLM fallback")
+            llm_response = await generate_llm_fallback_response(user_message, query_context)
+            yield {
+                "clause_text": llm_response,
+                "summary": "Generated by AI assistant for general query",
+                "role_checklist": ["Consider if this relates to government policies", "Contact relevant office for official information"],
+                "source_title": "AI Assistant",
+                "clause_id": "AI_001",
+                "date": "2024",
+                "url": "",
+                "score": 0.8,
+                "scenario_analysis": None,
+                "charts": []
+            }
     except Exception as e:
+        logger.warning(f"❌ Document search failed: {e}, using LLM fallback")
+        try:
+            llm_response = await generate_llm_fallback_response(user_message, {"type": "unknown", "confidence": 0.3})
+            yield {
+                "clause_text": llm_response,
+                "summary": "AI-generated response due to system error",
+                "role_checklist": ["Verify information independently", "Try rephrasing your query"],
+                "source_title": "AI Assistant (Fallback)",
+                "clause_id": "AI_ERROR",
+                "date": "2024",
+                "url": "",
+                "score": 0.5,
+                "scenario_analysis": None,
+                "charts": []
+            }
+        except Exception as fallback_error:
+            logger.error(f"❌ LLM fallback also failed: {fallback_error}")
+            yield {
+                "clause_text": "I apologize, but I'm experiencing technical difficulties. Please try again later or rephrase your question.",
+                "summary": "System error occurred",
+                "role_checklist": ["Try again later", "Rephrase your question", "Contact technical support"],
+                "source_title": "System Error",
+                "clause_id": "ERROR_001",
+                "date": "2024",
+                "url": "",
+                "score": 0.1,
+                "scenario_analysis": None,
+                "charts": []
+            }
 async def send_text_response(websocket: WebSocket, response_text: str, provider_used: str, session_data: dict):
     """Send text response to client"""