ChAbhishek28 commited on
Commit
dad5387
Β·
1 Parent(s): 1a4bf58

Enhance voice bot with comprehensive multi-language and intelligent query handling

Browse files
Files changed (1) hide show
  1. enhanced_websocket_handler.py +321 -26
enhanced_websocket_handler.py CHANGED
@@ -26,6 +26,216 @@ hybrid_llm_service = HybridLLMService()
26
 
27
  logger = logging.getLogger("voicebot")
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  async def handle_enhanced_websocket_connection(websocket: WebSocket):
30
  """Enhanced WebSocket handler with hybrid LLM and voice features"""
31
  await websocket.accept()
@@ -315,9 +525,27 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
315
  })
316
  return
317
 
318
- # Extract user language preference
319
  user_language = data.get("lang") or data.get("language") or session_data.get("language") or session_data["user_preferences"].get("language") or "english"
320
- logger.info(f"🌍 Processing voice with language: {user_language}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
 
322
  # Save to temporary file
323
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
@@ -345,7 +573,7 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
345
  else:
346
  # Use server-side ASR (Whisper)
347
  logger.info(f"🎀 Processing audio with language preference: {user_language}")
348
- transcribed_text = await voice_service.speech_to_text(temp_file_path, user_language)
349
 
350
  # Clean up temp file
351
  Path(temp_file_path).unlink()
@@ -357,23 +585,31 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
357
  })
358
  return
359
 
360
- logger.info(f"🎀 Transcribed ({user_language}): {transcribed_text}")
 
 
361
 
362
- # Send transcription with detected language info
363
  await websocket.send_json({
364
  "type": "transcription",
365
  "text": transcribed_text,
366
  "language": user_language or "auto-detected",
367
- "confidence": "high" # Could be dynamic based on Whisper confidence
 
 
368
  })
369
 
370
- # Add language context to the prompt for better responses
371
- language_context = ""
372
- if user_language and user_language.lower() in ['hindi', 'hi', 'hi-in']:
373
- language_context = " (User is speaking in Hindi, so you may include Hindi terms where appropriate for government policies in India)"
374
- elif user_language and user_language.lower() in ['hinglish']:
375
- language_context = " (User is speaking in Hinglish - Hindi-English mix, so feel free to use both languages in your response)"
 
 
376
 
 
 
377
  enhanced_message = transcribed_text + language_context
378
 
379
  # Process as text message with language context
@@ -421,14 +657,7 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
421
  # Send voice response if enabled
422
  if session_data["user_preferences"]["response_mode"] in ["voice", "both"]:
423
  # Choose appropriate voice based on user's language
424
- voice_preference = session_data["user_preferences"]["preferred_voice"]
425
- if not voice_preference and user_language:
426
- if user_language.lower() in ['hindi', 'hi', 'hi-in']:
427
- voice_preference = "hi-IN-SwaraNeural" # Hindi female voice
428
- elif user_language.lower() in ['english', 'en', 'en-in']:
429
- voice_preference = "en-IN-NeerjaNeural" # Indian English female voice
430
- else:
431
- voice_preference = "en-US-AriaNeural" # Default English
432
 
433
  voice_text = voice_service.create_voice_response_with_guidance(
434
  response_text,
@@ -454,13 +683,39 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
454
  })
455
 
456
  async def get_hybrid_response(user_message: str, context: str, config: dict, knowledge_base: str):
457
- """Get response using hybrid LLM with document search (streaming)"""
458
  try:
 
 
 
 
459
  logger.info(f"πŸ” Searching documents for: '{user_message}' in knowledge base: {knowledge_base}")
460
  from rag_service import search_documents_async
461
  docs = await search_documents_async(user_message, limit=3)
462
  logger.info(f"πŸ“Š Document search returned {len(docs) if docs else 0} results")
463
- if docs:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  try:
465
  from scenario_analysis_service import run_scenario_analysis
466
  # Detect scenario analysis intent (simple keyword match)
@@ -532,11 +787,51 @@ async def get_hybrid_response(user_message: str, context: str, config: dict, kno
532
  }
533
  yield response_obj
534
  else:
535
- logger.info("πŸ“š No documents found, using existing context")
536
- yield {"clause_text": context, "summary": "", "role_checklist": [], "source_title": "", "clause_id": "", "date": "", "url": "", "score": 1.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
537
  except Exception as e:
538
- logger.warning(f"❌ Document search failed: {e}, using existing context")
539
- yield {"clause_text": context, "summary": "", "role_checklist": [], "source_title": "", "clause_id": "", "date": "", "url": "", "score": 1.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
 
541
  async def send_text_response(websocket: WebSocket, response_text: str, provider_used: str, session_data: dict):
542
  """Send text response to client"""
 
26
 
27
  logger = logging.getLogger("voicebot")
28
 
29
+ def analyze_query_context(query: str) -> dict:
30
+ """Analyze query to determine if it's document-related or general"""
31
+ query_lower = query.lower()
32
+
33
+ # Government document keywords
34
+ doc_keywords = [
35
+ 'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
36
+ 'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
37
+ 'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
38
+ 'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission'
39
+ ]
40
+
41
+ # General conversation keywords
42
+ general_keywords = [
43
+ 'hello', 'hi', 'thank you', 'thanks', 'goodbye', 'bye', 'help',
44
+ 'how are you', 'what is your name', 'who are you', 'weather',
45
+ 'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
46
+ ]
47
+
48
+ # Count matches
49
+ doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
50
+ general_matches = sum(1 for kw in general_keywords if kw in query_lower)
51
+
52
+ # Determine query type
53
+ if doc_matches > 0:
54
+ query_type = "document_related"
55
+ confidence = min(doc_matches * 0.3, 1.0)
56
+ elif general_matches > 0:
57
+ query_type = "general_conversation"
58
+ confidence = min(general_matches * 0.4, 1.0)
59
+ elif len(query.strip().split()) < 3:
60
+ query_type = "general_conversation" # Short queries likely general
61
+ confidence = 0.6
62
+ else:
63
+ query_type = "document_related" # Default to document search for longer queries
64
+ confidence = 0.3
65
+
66
+ return {
67
+ "type": query_type,
68
+ "confidence": confidence,
69
+ "doc_keywords_found": doc_matches,
70
+ "general_keywords_found": general_matches
71
+ }
72
+
73
+ async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
74
+ """Generate response using Groq/Gemini for out-of-context queries"""
75
+ try:
76
+ # Determine which LLM to use based on query complexity
77
+ provider = hybrid_llm_service.choose_llm_provider(user_message)
78
+
79
+ # Create appropriate system prompt based on query type
80
+ if query_context.get("type") == "general_conversation":
81
+ system_prompt = """You are a helpful assistant for a government document system.
82
+ The user is asking a general question not related to government documents.
83
+ Provide a friendly, helpful response and gently guide them to ask about government policies,
84
+ pension rules, leave policies, or other administrative matters if they need official information."""
85
+ else:
86
+ system_prompt = """You are an AI assistant for government document queries.
87
+ The user asked about something that wasn't found in the document database.
88
+ Provide helpful general information if you can, but always remind them that for official
89
+ government policies and procedures, they should consult official sources or contact
90
+ the relevant government office. Keep responses concise and professional."""
91
+
92
+ # Generate response using hybrid LLM service
93
+ if provider:
94
+ response = await hybrid_llm_service.generate_response(
95
+ user_message,
96
+ system_prompt=system_prompt,
97
+ provider=provider
98
+ )
99
+ logger.info(f"βœ… Generated LLM fallback response using {provider.value}")
100
+ return response
101
+ else:
102
+ logger.warning("⚠️ No LLM provider available")
103
+ return "I understand your question, but I'm currently unable to access my AI capabilities. Please try again later or contact the relevant government office for official information."
104
+
105
+ except Exception as e:
106
+ logger.error(f"❌ Error generating LLM fallback response: {e}")
107
+ return f"I apologize, but I encountered an error while processing your query: '{user_message}'. Please try rephrasing your question or contact the relevant authorities for assistance."
108
+
109
+ def validate_transcription_quality(text: str, language: str) -> dict:
110
+ """Validate transcription quality and provide suggestions"""
111
+ if not text or not text.strip():
112
+ return {
113
+ "score": 0.0,
114
+ "level": "very_low",
115
+ "suggestions": ["No speech detected", "Check microphone", "Speak closer to microphone"]
116
+ }
117
+
118
+ text_clean = text.strip()
119
+
120
+ # Quality indicators
121
+ word_count = len(text_clean.split())
122
+ avg_word_length = sum(len(word) for word in text_clean.split()) / max(word_count, 1)
123
+ has_meaningful_words = any(len(word) > 2 for word in text_clean.split())
124
+
125
+ # Language-specific checks
126
+ if language in ['en', 'hi-en']:
127
+ # Check for common English/Hinglish patterns
128
+ common_words = ['the', 'and', 'is', 'in', 'to', 'of', 'for', 'with', 'on', 'pension', 'government']
129
+ has_common_words = any(word.lower() in common_words for word in text_clean.split())
130
+ else:
131
+ has_common_words = True # Assume valid for other languages
132
+
133
+ # Calculate quality score
134
+ score = 0.0
135
+ if word_count > 0:
136
+ score += 0.3
137
+ if word_count >= 3:
138
+ score += 0.2
139
+ if avg_word_length > 2:
140
+ score += 0.2
141
+ if has_meaningful_words:
142
+ score += 0.2
143
+ if has_common_words:
144
+ score += 0.1
145
+
146
+ # Penalize very short or nonsensical text
147
+ if word_count < 2 or avg_word_length < 2:
148
+ score *= 0.5
149
+
150
+ # Determine quality level
151
+ if score >= 0.8:
152
+ level = "high"
153
+ suggestions = []
154
+ elif score >= 0.5:
155
+ level = "medium"
156
+ suggestions = ["Speak a bit more clearly for better recognition"]
157
+ elif score >= 0.3:
158
+ level = "low"
159
+ suggestions = ["Speak more clearly", "Reduce background noise", "Speak closer to microphone"]
160
+ else:
161
+ level = "very_low"
162
+ suggestions = ["Audio unclear", "Check microphone", "Reduce noise", "Speak more slowly"]
163
+
164
+ return {
165
+ "score": score,
166
+ "level": level,
167
+ "suggestions": suggestions
168
+ }
169
+
170
+ def create_language_context(user_language: str, normalized_language: str) -> str:
171
+ """Create appropriate language context for LLM responses"""
172
+ if not user_language:
173
+ return ""
174
+
175
+ lang_lower = user_language.lower()
176
+
177
+ if lang_lower in ['hindi', 'hi', 'hi-in']:
178
+ return " (User is speaking in Hindi. You may include relevant Hindi terms for government policies in India, especially for technical terms like 'ΰ€Έΰ€°ΰ€•ΰ€Ύΰ€°ΰ₯€ ΰ€¨ΰ₯€ΰ€€ΰ€Ώ', 'ΰ€ͺΰ₯‡ΰ€‚ΰ€Άΰ€¨', 'ΰ€­ΰ€€ΰ₯ΰ€€ΰ€Ύ' etc.)"
179
+ elif lang_lower in ['hinglish', 'hi-en']:
180
+ return " (User is speaking in Hinglish - Hindi-English mix. Feel free to use both languages naturally in your response, especially for government terminology.)"
181
+ elif lang_lower in ['spanish', 'es']:
182
+ return " (User is speaking in Spanish. Respond in Spanish if possible, or provide translations for key terms.)"
183
+ elif lang_lower in ['french', 'fr']:
184
+ return " (User is speaking in French. Respond in French if possible, or provide translations for key terms.)"
185
+ elif lang_lower in ['arabic', 'ar']:
186
+ return " (User is speaking in Arabic. Respond in Arabic if possible, or provide translations for key terms.)"
187
+ elif lang_lower in ['chinese', 'zh']:
188
+ return " (User is speaking in Chinese. Respond in Chinese if possible, or provide translations for key terms.)"
189
+ elif lang_lower in ['japanese', 'ja']:
190
+ return " (User is speaking in Japanese. Respond in Japanese if possible, or provide translations for key terms.)"
191
+ elif lang_lower in ['english', 'en', 'en-us', 'en-in']:
192
+ return " (User is speaking in English. Provide clear, professional responses.)"
193
+ else:
194
+ return f" (User language preference: {user_language}. Adapt response accordingly if possible.)"
195
+
196
+ def select_voice_for_language(user_language: str, preferred_voice: str = None) -> str:
197
+ """Select appropriate TTS voice based on user's language"""
198
+ if preferred_voice:
199
+ return preferred_voice
200
+
201
+ if not user_language:
202
+ return "en-US-AriaNeural" # Default
203
+
204
+ lang_lower = user_language.lower()
205
+
206
+ # Voice mapping for different languages
207
+ voice_map = {
208
+ 'hindi': 'hi-IN-SwaraNeural',
209
+ 'hi': 'hi-IN-SwaraNeural',
210
+ 'hi-in': 'hi-IN-SwaraNeural',
211
+ 'hinglish': 'en-IN-NeerjaNeural', # Indian English for Hinglish
212
+ 'hi-en': 'en-IN-NeerjaNeural',
213
+ 'english': 'en-US-AriaNeural',
214
+ 'en': 'en-US-AriaNeural',
215
+ 'en-us': 'en-US-AriaNeural',
216
+ 'en-in': 'en-IN-NeerjaNeural',
217
+ 'spanish': 'es-ES-ElviraNeural',
218
+ 'es': 'es-ES-ElviraNeural',
219
+ 'french': 'fr-FR-DeniseNeural',
220
+ 'fr': 'fr-FR-DeniseNeural',
221
+ 'german': 'de-DE-KatjaNeural',
222
+ 'de': 'de-DE-KatjaNeural',
223
+ 'portuguese': 'pt-BR-FranciscaNeural',
224
+ 'pt': 'pt-BR-FranciscaNeural',
225
+ 'italian': 'it-IT-ElsaNeural',
226
+ 'it': 'it-IT-ElsaNeural',
227
+ 'russian': 'ru-RU-SvetlanaNeural',
228
+ 'ru': 'ru-RU-SvetlanaNeural',
229
+ 'chinese': 'zh-CN-XiaoxiaoNeural',
230
+ 'zh': 'zh-CN-XiaoxiaoNeural',
231
+ 'japanese': 'ja-JP-NanamiNeural',
232
+ 'ja': 'ja-JP-NanamiNeural',
233
+ 'arabic': 'ar-SA-ZariyahNeural',
234
+ 'ar': 'ar-SA-ZariyahNeural'
235
+ }
236
+
237
+ return voice_map.get(lang_lower, 'en-US-AriaNeural')
238
+
239
  async def handle_enhanced_websocket_connection(websocket: WebSocket):
240
  """Enhanced WebSocket handler with hybrid LLM and voice features"""
241
  await websocket.accept()
 
525
  })
526
  return
527
 
528
+ # Extract and validate user language preference
529
  user_language = data.get("lang") or data.get("language") or session_data.get("language") or session_data["user_preferences"].get("language") or "english"
530
+
531
+ # Normalize language codes
532
+ language_map = {
533
+ 'english': 'en', 'en': 'en', 'en-us': 'en', 'en-in': 'en',
534
+ 'hindi': 'hi', 'hi': 'hi', 'hi-in': 'hi',
535
+ 'hinglish': 'hi-en', 'hi-en': 'hi-en',
536
+ 'spanish': 'es', 'es': 'es',
537
+ 'french': 'fr', 'fr': 'fr',
538
+ 'german': 'de', 'de': 'de',
539
+ 'portuguese': 'pt', 'pt': 'pt',
540
+ 'italian': 'it', 'it': 'it',
541
+ 'russian': 'ru', 'ru': 'ru',
542
+ 'chinese': 'zh', 'zh': 'zh',
543
+ 'japanese': 'ja', 'ja': 'ja',
544
+ 'arabic': 'ar', 'ar': 'ar'
545
+ }
546
+
547
+ normalized_language = language_map.get(user_language.lower(), 'en')
548
+ logger.info(f"🌍 Processing voice with language: {user_language} (normalized: {normalized_language})")
549
 
550
  # Save to temporary file
551
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
 
573
  else:
574
  # Use server-side ASR (Whisper)
575
  logger.info(f"🎀 Processing audio with language preference: {user_language}")
576
+ transcribed_text = await voice_service.speech_to_text(temp_file_path, normalized_language)
577
 
578
  # Clean up temp file
579
  Path(temp_file_path).unlink()
 
585
  })
586
  return
587
 
588
+ # Validate transcription quality
589
+ transcription_quality = validate_transcription_quality(transcribed_text, normalized_language)
590
+ logger.info(f"🎀 Transcribed ({user_language}): {transcribed_text} | Quality: {transcription_quality['score']:.2f}")
591
 
592
+ # Send transcription with quality info
593
  await websocket.send_json({
594
  "type": "transcription",
595
  "text": transcribed_text,
596
  "language": user_language or "auto-detected",
597
+ "confidence": transcription_quality['level'],
598
+ "quality_score": transcription_quality['score'],
599
+ "suggestions": transcription_quality['suggestions']
600
  })
601
 
602
+ # Handle low-quality transcription
603
+ if transcription_quality['score'] < 0.3:
604
+ await websocket.send_json({
605
+ "type": "transcription_warning",
606
+ "message": "The audio quality seems low. Please speak clearly and try again.",
607
+ "suggestions": transcription_quality['suggestions']
608
+ })
609
+ return
610
 
611
+ # Add comprehensive language context to the prompt for better responses
612
+ language_context = create_language_context(user_language, normalized_language)
613
  enhanced_message = transcribed_text + language_context
614
 
615
  # Process as text message with language context
 
657
  # Send voice response if enabled
658
  if session_data["user_preferences"]["response_mode"] in ["voice", "both"]:
659
  # Choose appropriate voice based on user's language
660
+ voice_preference = select_voice_for_language(user_language, session_data["user_preferences"]["preferred_voice"])
 
 
 
 
 
 
 
661
 
662
  voice_text = voice_service.create_voice_response_with_guidance(
663
  response_text,
 
683
  })
684
 
685
  async def get_hybrid_response(user_message: str, context: str, config: dict, knowledge_base: str):
686
+ """Get response using hybrid LLM with intelligent document search and fallback (streaming)"""
687
  try:
688
+ # First, determine if this is a government document query or general query
689
+ query_context = analyze_query_context(user_message)
690
+ logger.info(f"πŸ” Query analysis: {query_context}")
691
+
692
  logger.info(f"πŸ” Searching documents for: '{user_message}' in knowledge base: {knowledge_base}")
693
  from rag_service import search_documents_async
694
  docs = await search_documents_async(user_message, limit=3)
695
  logger.info(f"πŸ“Š Document search returned {len(docs) if docs else 0} results")
696
+
697
+ # Check if we have relevant documents
698
+ has_relevant_docs = docs and any(doc.get("score", 0) > 0.5 for doc in docs)
699
+
700
+ # For general conversation queries, use LLM even if we have some documents
701
+ if query_context.get("type") == "general_conversation" and query_context.get("confidence", 0) > 0.6:
702
+ logger.info("πŸ“± Detected general conversation, using LLM directly")
703
+ llm_response = await generate_llm_fallback_response(user_message, query_context)
704
+ yield {
705
+ "clause_text": llm_response,
706
+ "summary": "AI-generated response for general conversation",
707
+ "role_checklist": ["This is general information", "For official queries, ask about government policies"],
708
+ "source_title": "AI Assistant",
709
+ "clause_id": "AI_GENERAL",
710
+ "date": "2024",
711
+ "url": "",
712
+ "score": 0.9,
713
+ "scenario_analysis": None,
714
+ "charts": []
715
+ }
716
+ return
717
+
718
+ if has_relevant_docs:
719
  try:
720
  from scenario_analysis_service import run_scenario_analysis
721
  # Detect scenario analysis intent (simple keyword match)
 
787
  }
788
  yield response_obj
789
  else:
790
+ # No relevant documents found - use LLM fallback
791
+ logger.info("πŸ“š No relevant documents found, using LLM fallback")
792
+ llm_response = await generate_llm_fallback_response(user_message, query_context)
793
+ yield {
794
+ "clause_text": llm_response,
795
+ "summary": "Generated by AI assistant for general query",
796
+ "role_checklist": ["Consider if this relates to government policies", "Contact relevant office for official information"],
797
+ "source_title": "AI Assistant",
798
+ "clause_id": "AI_001",
799
+ "date": "2024",
800
+ "url": "",
801
+ "score": 0.8,
802
+ "scenario_analysis": None,
803
+ "charts": []
804
+ }
805
  except Exception as e:
806
+ logger.warning(f"❌ Document search failed: {e}, using LLM fallback")
807
+ try:
808
+ llm_response = await generate_llm_fallback_response(user_message, {"type": "unknown", "confidence": 0.3})
809
+ yield {
810
+ "clause_text": llm_response,
811
+ "summary": "AI-generated response due to system error",
812
+ "role_checklist": ["Verify information independently", "Try rephrasing your query"],
813
+ "source_title": "AI Assistant (Fallback)",
814
+ "clause_id": "AI_ERROR",
815
+ "date": "2024",
816
+ "url": "",
817
+ "score": 0.5,
818
+ "scenario_analysis": None,
819
+ "charts": []
820
+ }
821
+ except Exception as fallback_error:
822
+ logger.error(f"❌ LLM fallback also failed: {fallback_error}")
823
+ yield {
824
+ "clause_text": "I apologize, but I'm experiencing technical difficulties. Please try again later or rephrase your question.",
825
+ "summary": "System error occurred",
826
+ "role_checklist": ["Try again later", "Rephrase your question", "Contact technical support"],
827
+ "source_title": "System Error",
828
+ "clause_id": "ERROR_001",
829
+ "date": "2024",
830
+ "url": "",
831
+ "score": 0.1,
832
+ "scenario_analysis": None,
833
+ "charts": []
834
+ }
835
 
836
  async def send_text_response(websocket: WebSocket, response_text: str, provider_used: str, session_data: dict):
837
  """Send text response to client"""