Spaces:
Sleeping
Sleeping
Commit Β·
dad5387
1
Parent(s): 1a4bf58
Enhance voice bot with comprehensive multi-language and intelligent query handling
Browse files- enhanced_websocket_handler.py +321 -26
enhanced_websocket_handler.py
CHANGED
|
@@ -26,6 +26,216 @@ hybrid_llm_service = HybridLLMService()
|
|
| 26 |
|
| 27 |
logger = logging.getLogger("voicebot")
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
async def handle_enhanced_websocket_connection(websocket: WebSocket):
|
| 30 |
"""Enhanced WebSocket handler with hybrid LLM and voice features"""
|
| 31 |
await websocket.accept()
|
|
@@ -315,9 +525,27 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
|
|
| 315 |
})
|
| 316 |
return
|
| 317 |
|
| 318 |
-
# Extract user language preference
|
| 319 |
user_language = data.get("lang") or data.get("language") or session_data.get("language") or session_data["user_preferences"].get("language") or "english"
|
| 320 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
|
| 322 |
# Save to temporary file
|
| 323 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
|
@@ -345,7 +573,7 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
|
|
| 345 |
else:
|
| 346 |
# Use server-side ASR (Whisper)
|
| 347 |
logger.info(f"π€ Processing audio with language preference: {user_language}")
|
| 348 |
-
transcribed_text = await voice_service.speech_to_text(temp_file_path,
|
| 349 |
|
| 350 |
# Clean up temp file
|
| 351 |
Path(temp_file_path).unlink()
|
|
@@ -357,23 +585,31 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
|
|
| 357 |
})
|
| 358 |
return
|
| 359 |
|
| 360 |
-
|
|
|
|
|
|
|
| 361 |
|
| 362 |
-
# Send transcription with
|
| 363 |
await websocket.send_json({
|
| 364 |
"type": "transcription",
|
| 365 |
"text": transcribed_text,
|
| 366 |
"language": user_language or "auto-detected",
|
| 367 |
-
"confidence":
|
|
|
|
|
|
|
| 368 |
})
|
| 369 |
|
| 370 |
-
#
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
| 376 |
|
|
|
|
|
|
|
| 377 |
enhanced_message = transcribed_text + language_context
|
| 378 |
|
| 379 |
# Process as text message with language context
|
|
@@ -421,14 +657,7 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
|
|
| 421 |
# Send voice response if enabled
|
| 422 |
if session_data["user_preferences"]["response_mode"] in ["voice", "both"]:
|
| 423 |
# Choose appropriate voice based on user's language
|
| 424 |
-
voice_preference = session_data["user_preferences"]["preferred_voice"]
|
| 425 |
-
if not voice_preference and user_language:
|
| 426 |
-
if user_language.lower() in ['hindi', 'hi', 'hi-in']:
|
| 427 |
-
voice_preference = "hi-IN-SwaraNeural" # Hindi female voice
|
| 428 |
-
elif user_language.lower() in ['english', 'en', 'en-in']:
|
| 429 |
-
voice_preference = "en-IN-NeerjaNeural" # Indian English female voice
|
| 430 |
-
else:
|
| 431 |
-
voice_preference = "en-US-AriaNeural" # Default English
|
| 432 |
|
| 433 |
voice_text = voice_service.create_voice_response_with_guidance(
|
| 434 |
response_text,
|
|
@@ -454,13 +683,39 @@ async def handle_voice_message(websocket: WebSocket, data: dict, session_data: d
|
|
| 454 |
})
|
| 455 |
|
| 456 |
async def get_hybrid_response(user_message: str, context: str, config: dict, knowledge_base: str):
|
| 457 |
-
"""Get response using hybrid LLM with document search (streaming)"""
|
| 458 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
logger.info(f"π Searching documents for: '{user_message}' in knowledge base: {knowledge_base}")
|
| 460 |
from rag_service import search_documents_async
|
| 461 |
docs = await search_documents_async(user_message, limit=3)
|
| 462 |
logger.info(f"π Document search returned {len(docs) if docs else 0} results")
|
| 463 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 464 |
try:
|
| 465 |
from scenario_analysis_service import run_scenario_analysis
|
| 466 |
# Detect scenario analysis intent (simple keyword match)
|
|
@@ -532,11 +787,51 @@ async def get_hybrid_response(user_message: str, context: str, config: dict, kno
|
|
| 532 |
}
|
| 533 |
yield response_obj
|
| 534 |
else:
|
| 535 |
-
|
| 536 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
except Exception as e:
|
| 538 |
-
logger.warning(f"β Document search failed: {e}, using
|
| 539 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
|
| 541 |
async def send_text_response(websocket: WebSocket, response_text: str, provider_used: str, session_data: dict):
|
| 542 |
"""Send text response to client"""
|
|
|
|
| 26 |
|
| 27 |
logger = logging.getLogger("voicebot")
|
| 28 |
|
| 29 |
+
def analyze_query_context(query: str) -> dict:
|
| 30 |
+
"""Analyze query to determine if it's document-related or general"""
|
| 31 |
+
query_lower = query.lower()
|
| 32 |
+
|
| 33 |
+
# Government document keywords
|
| 34 |
+
doc_keywords = [
|
| 35 |
+
'pension', 'leave', 'allowance', 'da', 'dearness', 'procurement', 'tender',
|
| 36 |
+
'medical', 'reimbursement', 'transfer', 'posting', 'promotion', 'service',
|
| 37 |
+
'rules', 'policy', 'government', 'circular', 'notification', 'benefits',
|
| 38 |
+
'gratuity', 'provident fund', 'retirement', 'salary', 'pay commission'
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
# General conversation keywords
|
| 42 |
+
general_keywords = [
|
| 43 |
+
'hello', 'hi', 'thank you', 'thanks', 'goodbye', 'bye', 'help',
|
| 44 |
+
'how are you', 'what is your name', 'who are you', 'weather',
|
| 45 |
+
'time', 'date', 'joke', 'story', 'song', 'recipe', 'movie'
|
| 46 |
+
]
|
| 47 |
+
|
| 48 |
+
# Count matches
|
| 49 |
+
doc_matches = sum(1 for kw in doc_keywords if kw in query_lower)
|
| 50 |
+
general_matches = sum(1 for kw in general_keywords if kw in query_lower)
|
| 51 |
+
|
| 52 |
+
# Determine query type
|
| 53 |
+
if doc_matches > 0:
|
| 54 |
+
query_type = "document_related"
|
| 55 |
+
confidence = min(doc_matches * 0.3, 1.0)
|
| 56 |
+
elif general_matches > 0:
|
| 57 |
+
query_type = "general_conversation"
|
| 58 |
+
confidence = min(general_matches * 0.4, 1.0)
|
| 59 |
+
elif len(query.strip().split()) < 3:
|
| 60 |
+
query_type = "general_conversation" # Short queries likely general
|
| 61 |
+
confidence = 0.6
|
| 62 |
+
else:
|
| 63 |
+
query_type = "document_related" # Default to document search for longer queries
|
| 64 |
+
confidence = 0.3
|
| 65 |
+
|
| 66 |
+
return {
|
| 67 |
+
"type": query_type,
|
| 68 |
+
"confidence": confidence,
|
| 69 |
+
"doc_keywords_found": doc_matches,
|
| 70 |
+
"general_keywords_found": general_matches
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
async def generate_llm_fallback_response(user_message: str, query_context: dict) -> str:
|
| 74 |
+
"""Generate response using Groq/Gemini for out-of-context queries"""
|
| 75 |
+
try:
|
| 76 |
+
# Determine which LLM to use based on query complexity
|
| 77 |
+
provider = hybrid_llm_service.choose_llm_provider(user_message)
|
| 78 |
+
|
| 79 |
+
# Create appropriate system prompt based on query type
|
| 80 |
+
if query_context.get("type") == "general_conversation":
|
| 81 |
+
system_prompt = """You are a helpful assistant for a government document system.
|
| 82 |
+
The user is asking a general question not related to government documents.
|
| 83 |
+
Provide a friendly, helpful response and gently guide them to ask about government policies,
|
| 84 |
+
pension rules, leave policies, or other administrative matters if they need official information."""
|
| 85 |
+
else:
|
| 86 |
+
system_prompt = """You are an AI assistant for government document queries.
|
| 87 |
+
The user asked about something that wasn't found in the document database.
|
| 88 |
+
Provide helpful general information if you can, but always remind them that for official
|
| 89 |
+
government policies and procedures, they should consult official sources or contact
|
| 90 |
+
the relevant government office. Keep responses concise and professional."""
|
| 91 |
+
|
| 92 |
+
# Generate response using hybrid LLM service
|
| 93 |
+
if provider:
|
| 94 |
+
response = await hybrid_llm_service.generate_response(
|
| 95 |
+
user_message,
|
| 96 |
+
system_prompt=system_prompt,
|
| 97 |
+
provider=provider
|
| 98 |
+
)
|
| 99 |
+
logger.info(f"β
Generated LLM fallback response using {provider.value}")
|
| 100 |
+
return response
|
| 101 |
+
else:
|
| 102 |
+
logger.warning("β οΈ No LLM provider available")
|
| 103 |
+
return "I understand your question, but I'm currently unable to access my AI capabilities. Please try again later or contact the relevant government office for official information."
|
| 104 |
+
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.error(f"β Error generating LLM fallback response: {e}")
|
| 107 |
+
return f"I apologize, but I encountered an error while processing your query: '{user_message}'. Please try rephrasing your question or contact the relevant authorities for assistance."
|
| 108 |
+
|
| 109 |
+
def validate_transcription_quality(text: str, language: str) -> dict:
|
| 110 |
+
"""Validate transcription quality and provide suggestions"""
|
| 111 |
+
if not text or not text.strip():
|
| 112 |
+
return {
|
| 113 |
+
"score": 0.0,
|
| 114 |
+
"level": "very_low",
|
| 115 |
+
"suggestions": ["No speech detected", "Check microphone", "Speak closer to microphone"]
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
text_clean = text.strip()
|
| 119 |
+
|
| 120 |
+
# Quality indicators
|
| 121 |
+
word_count = len(text_clean.split())
|
| 122 |
+
avg_word_length = sum(len(word) for word in text_clean.split()) / max(word_count, 1)
|
| 123 |
+
has_meaningful_words = any(len(word) > 2 for word in text_clean.split())
|
| 124 |
+
|
| 125 |
+
# Language-specific checks
|
| 126 |
+
if language in ['en', 'hi-en']:
|
| 127 |
+
# Check for common English/Hinglish patterns
|
| 128 |
+
common_words = ['the', 'and', 'is', 'in', 'to', 'of', 'for', 'with', 'on', 'pension', 'government']
|
| 129 |
+
has_common_words = any(word.lower() in common_words for word in text_clean.split())
|
| 130 |
+
else:
|
| 131 |
+
has_common_words = True # Assume valid for other languages
|
| 132 |
+
|
| 133 |
+
# Calculate quality score
|
| 134 |
+
score = 0.0
|
| 135 |
+
if word_count > 0:
|
| 136 |
+
score += 0.3
|
| 137 |
+
if word_count >= 3:
|
| 138 |
+
score += 0.2
|
| 139 |
+
if avg_word_length > 2:
|
| 140 |
+
score += 0.2
|
| 141 |
+
if has_meaningful_words:
|
| 142 |
+
score += 0.2
|
| 143 |
+
if has_common_words:
|
| 144 |
+
score += 0.1
|
| 145 |
+
|
| 146 |
+
# Penalize very short or nonsensical text
|
| 147 |
+
if word_count < 2 or avg_word_length < 2:
|
| 148 |
+
score *= 0.5
|
| 149 |
+
|
| 150 |
+
# Determine quality level
|
| 151 |
+
if score >= 0.8:
|
| 152 |
+
level = "high"
|
| 153 |
+
suggestions = []
|
| 154 |
+
elif score >= 0.5:
|
| 155 |
+
level = "medium"
|
| 156 |
+
suggestions = ["Speak a bit more clearly for better recognition"]
|
| 157 |
+
elif score >= 0.3:
|
| 158 |
+
level = "low"
|
| 159 |
+
suggestions = ["Speak more clearly", "Reduce background noise", "Speak closer to microphone"]
|
| 160 |
+
else:
|
| 161 |
+
level = "very_low"
|
| 162 |
+
suggestions = ["Audio unclear", "Check microphone", "Reduce noise", "Speak more slowly"]
|
| 163 |
+
|
| 164 |
+
return {
|
| 165 |
+
"score": score,
|
| 166 |
+
"level": level,
|
| 167 |
+
"suggestions": suggestions
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
def create_language_context(user_language: str, normalized_language: str) -> str:
|
| 171 |
+
"""Create appropriate language context for LLM responses"""
|
| 172 |
+
if not user_language:
|
| 173 |
+
return ""
|
| 174 |
+
|
| 175 |
+
lang_lower = user_language.lower()
|
| 176 |
+
|
| 177 |
+
if lang_lower in ['hindi', 'hi', 'hi-in']:
|
| 178 |
+
return " (User is speaking in Hindi. You may include relevant Hindi terms for government policies in India, especially for technical terms like 'ΰ€Έΰ€°ΰ€ΰ€Ύΰ€°ΰ₯ ΰ€¨ΰ₯ΰ€€ΰ€Ώ', 'ΰ€ͺΰ₯ΰ€ΰ€Άΰ€¨', 'ΰ€ΰ€€ΰ₯ΰ€€ΰ€Ύ' etc.)"
|
| 179 |
+
elif lang_lower in ['hinglish', 'hi-en']:
|
| 180 |
+
return " (User is speaking in Hinglish - Hindi-English mix. Feel free to use both languages naturally in your response, especially for government terminology.)"
|
| 181 |
+
elif lang_lower in ['spanish', 'es']:
|
| 182 |
+
return " (User is speaking in Spanish. Respond in Spanish if possible, or provide translations for key terms.)"
|
| 183 |
+
elif lang_lower in ['french', 'fr']:
|
| 184 |
+
return " (User is speaking in French. Respond in French if possible, or provide translations for key terms.)"
|
| 185 |
+
elif lang_lower in ['arabic', 'ar']:
|
| 186 |
+
return " (User is speaking in Arabic. Respond in Arabic if possible, or provide translations for key terms.)"
|
| 187 |
+
elif lang_lower in ['chinese', 'zh']:
|
| 188 |
+
return " (User is speaking in Chinese. Respond in Chinese if possible, or provide translations for key terms.)"
|
| 189 |
+
elif lang_lower in ['japanese', 'ja']:
|
| 190 |
+
return " (User is speaking in Japanese. Respond in Japanese if possible, or provide translations for key terms.)"
|
| 191 |
+
elif lang_lower in ['english', 'en', 'en-us', 'en-in']:
|
| 192 |
+
return " (User is speaking in English. Provide clear, professional responses.)"
|
| 193 |
+
else:
|
| 194 |
+
return f" (User language preference: {user_language}. Adapt response accordingly if possible.)"
|
| 195 |
+
|
| 196 |
+
def select_voice_for_language(user_language: str, preferred_voice: str = None) -> str:
|
| 197 |
+
"""Select appropriate TTS voice based on user's language"""
|
| 198 |
+
if preferred_voice:
|
| 199 |
+
return preferred_voice
|
| 200 |
+
|
| 201 |
+
if not user_language:
|
| 202 |
+
return "en-US-AriaNeural" # Default
|
| 203 |
+
|
| 204 |
+
lang_lower = user_language.lower()
|
| 205 |
+
|
| 206 |
+
# Voice mapping for different languages
|
| 207 |
+
voice_map = {
|
| 208 |
+
'hindi': 'hi-IN-SwaraNeural',
|
| 209 |
+
'hi': 'hi-IN-SwaraNeural',
|
| 210 |
+
'hi-in': 'hi-IN-SwaraNeural',
|
| 211 |
+
'hinglish': 'en-IN-NeerjaNeural', # Indian English for Hinglish
|
| 212 |
+
'hi-en': 'en-IN-NeerjaNeural',
|
| 213 |
+
'english': 'en-US-AriaNeural',
|
| 214 |
+
'en': 'en-US-AriaNeural',
|
| 215 |
+
'en-us': 'en-US-AriaNeural',
|
| 216 |
+
'en-in': 'en-IN-NeerjaNeural',
|
| 217 |
+
'spanish': 'es-ES-ElviraNeural',
|
| 218 |
+
'es': 'es-ES-ElviraNeural',
|
| 219 |
+
'french': 'fr-FR-DeniseNeural',
|
| 220 |
+
'fr': 'fr-FR-DeniseNeural',
|
| 221 |
+
'german': 'de-DE-KatjaNeural',
|
| 222 |
+
'de': 'de-DE-KatjaNeural',
|
| 223 |
+
'portuguese': 'pt-BR-FranciscaNeural',
|
| 224 |
+
'pt': 'pt-BR-FranciscaNeural',
|
| 225 |
+
'italian': 'it-IT-ElsaNeural',
|
| 226 |
+
'it': 'it-IT-ElsaNeural',
|
| 227 |
+
'russian': 'ru-RU-SvetlanaNeural',
|
| 228 |
+
'ru': 'ru-RU-SvetlanaNeural',
|
| 229 |
+
'chinese': 'zh-CN-XiaoxiaoNeural',
|
| 230 |
+
'zh': 'zh-CN-XiaoxiaoNeural',
|
| 231 |
+
'japanese': 'ja-JP-NanamiNeural',
|
| 232 |
+
'ja': 'ja-JP-NanamiNeural',
|
| 233 |
+
'arabic': 'ar-SA-ZariyahNeural',
|
| 234 |
+
'ar': 'ar-SA-ZariyahNeural'
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
return voice_map.get(lang_lower, 'en-US-AriaNeural')
|
| 238 |
+
|
| 239 |
async def handle_enhanced_websocket_connection(websocket: WebSocket):
|
| 240 |
"""Enhanced WebSocket handler with hybrid LLM and voice features"""
|
| 241 |
await websocket.accept()
|
|
|
|
| 525 |
})
|
| 526 |
return
|
| 527 |
|
| 528 |
+
# Extract and validate user language preference
|
| 529 |
user_language = data.get("lang") or data.get("language") or session_data.get("language") or session_data["user_preferences"].get("language") or "english"
|
| 530 |
+
|
| 531 |
+
# Normalize language codes
|
| 532 |
+
language_map = {
|
| 533 |
+
'english': 'en', 'en': 'en', 'en-us': 'en', 'en-in': 'en',
|
| 534 |
+
'hindi': 'hi', 'hi': 'hi', 'hi-in': 'hi',
|
| 535 |
+
'hinglish': 'hi-en', 'hi-en': 'hi-en',
|
| 536 |
+
'spanish': 'es', 'es': 'es',
|
| 537 |
+
'french': 'fr', 'fr': 'fr',
|
| 538 |
+
'german': 'de', 'de': 'de',
|
| 539 |
+
'portuguese': 'pt', 'pt': 'pt',
|
| 540 |
+
'italian': 'it', 'it': 'it',
|
| 541 |
+
'russian': 'ru', 'ru': 'ru',
|
| 542 |
+
'chinese': 'zh', 'zh': 'zh',
|
| 543 |
+
'japanese': 'ja', 'ja': 'ja',
|
| 544 |
+
'arabic': 'ar', 'ar': 'ar'
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
normalized_language = language_map.get(user_language.lower(), 'en')
|
| 548 |
+
logger.info(f"π Processing voice with language: {user_language} (normalized: {normalized_language})")
|
| 549 |
|
| 550 |
# Save to temporary file
|
| 551 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
|
|
|
|
| 573 |
else:
|
| 574 |
# Use server-side ASR (Whisper)
|
| 575 |
logger.info(f"π€ Processing audio with language preference: {user_language}")
|
| 576 |
+
transcribed_text = await voice_service.speech_to_text(temp_file_path, normalized_language)
|
| 577 |
|
| 578 |
# Clean up temp file
|
| 579 |
Path(temp_file_path).unlink()
|
|
|
|
| 585 |
})
|
| 586 |
return
|
| 587 |
|
| 588 |
+
# Validate transcription quality
|
| 589 |
+
transcription_quality = validate_transcription_quality(transcribed_text, normalized_language)
|
| 590 |
+
logger.info(f"π€ Transcribed ({user_language}): {transcribed_text} | Quality: {transcription_quality['score']:.2f}")
|
| 591 |
|
| 592 |
+
# Send transcription with quality info
|
| 593 |
await websocket.send_json({
|
| 594 |
"type": "transcription",
|
| 595 |
"text": transcribed_text,
|
| 596 |
"language": user_language or "auto-detected",
|
| 597 |
+
"confidence": transcription_quality['level'],
|
| 598 |
+
"quality_score": transcription_quality['score'],
|
| 599 |
+
"suggestions": transcription_quality['suggestions']
|
| 600 |
})
|
| 601 |
|
| 602 |
+
# Handle low-quality transcription
|
| 603 |
+
if transcription_quality['score'] < 0.3:
|
| 604 |
+
await websocket.send_json({
|
| 605 |
+
"type": "transcription_warning",
|
| 606 |
+
"message": "The audio quality seems low. Please speak clearly and try again.",
|
| 607 |
+
"suggestions": transcription_quality['suggestions']
|
| 608 |
+
})
|
| 609 |
+
return
|
| 610 |
|
| 611 |
+
# Add comprehensive language context to the prompt for better responses
|
| 612 |
+
language_context = create_language_context(user_language, normalized_language)
|
| 613 |
enhanced_message = transcribed_text + language_context
|
| 614 |
|
| 615 |
# Process as text message with language context
|
|
|
|
| 657 |
# Send voice response if enabled
|
| 658 |
if session_data["user_preferences"]["response_mode"] in ["voice", "both"]:
|
| 659 |
# Choose appropriate voice based on user's language
|
| 660 |
+
voice_preference = select_voice_for_language(user_language, session_data["user_preferences"]["preferred_voice"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
|
| 662 |
voice_text = voice_service.create_voice_response_with_guidance(
|
| 663 |
response_text,
|
|
|
|
| 683 |
})
|
| 684 |
|
| 685 |
async def get_hybrid_response(user_message: str, context: str, config: dict, knowledge_base: str):
|
| 686 |
+
"""Get response using hybrid LLM with intelligent document search and fallback (streaming)"""
|
| 687 |
try:
|
| 688 |
+
# First, determine if this is a government document query or general query
|
| 689 |
+
query_context = analyze_query_context(user_message)
|
| 690 |
+
logger.info(f"π Query analysis: {query_context}")
|
| 691 |
+
|
| 692 |
logger.info(f"π Searching documents for: '{user_message}' in knowledge base: {knowledge_base}")
|
| 693 |
from rag_service import search_documents_async
|
| 694 |
docs = await search_documents_async(user_message, limit=3)
|
| 695 |
logger.info(f"π Document search returned {len(docs) if docs else 0} results")
|
| 696 |
+
|
| 697 |
+
# Check if we have relevant documents
|
| 698 |
+
has_relevant_docs = docs and any(doc.get("score", 0) > 0.5 for doc in docs)
|
| 699 |
+
|
| 700 |
+
# For general conversation queries, use LLM even if we have some documents
|
| 701 |
+
if query_context.get("type") == "general_conversation" and query_context.get("confidence", 0) > 0.6:
|
| 702 |
+
logger.info("π± Detected general conversation, using LLM directly")
|
| 703 |
+
llm_response = await generate_llm_fallback_response(user_message, query_context)
|
| 704 |
+
yield {
|
| 705 |
+
"clause_text": llm_response,
|
| 706 |
+
"summary": "AI-generated response for general conversation",
|
| 707 |
+
"role_checklist": ["This is general information", "For official queries, ask about government policies"],
|
| 708 |
+
"source_title": "AI Assistant",
|
| 709 |
+
"clause_id": "AI_GENERAL",
|
| 710 |
+
"date": "2024",
|
| 711 |
+
"url": "",
|
| 712 |
+
"score": 0.9,
|
| 713 |
+
"scenario_analysis": None,
|
| 714 |
+
"charts": []
|
| 715 |
+
}
|
| 716 |
+
return
|
| 717 |
+
|
| 718 |
+
if has_relevant_docs:
|
| 719 |
try:
|
| 720 |
from scenario_analysis_service import run_scenario_analysis
|
| 721 |
# Detect scenario analysis intent (simple keyword match)
|
|
|
|
| 787 |
}
|
| 788 |
yield response_obj
|
| 789 |
else:
|
| 790 |
+
# No relevant documents found - use LLM fallback
|
| 791 |
+
logger.info("π No relevant documents found, using LLM fallback")
|
| 792 |
+
llm_response = await generate_llm_fallback_response(user_message, query_context)
|
| 793 |
+
yield {
|
| 794 |
+
"clause_text": llm_response,
|
| 795 |
+
"summary": "Generated by AI assistant for general query",
|
| 796 |
+
"role_checklist": ["Consider if this relates to government policies", "Contact relevant office for official information"],
|
| 797 |
+
"source_title": "AI Assistant",
|
| 798 |
+
"clause_id": "AI_001",
|
| 799 |
+
"date": "2024",
|
| 800 |
+
"url": "",
|
| 801 |
+
"score": 0.8,
|
| 802 |
+
"scenario_analysis": None,
|
| 803 |
+
"charts": []
|
| 804 |
+
}
|
| 805 |
except Exception as e:
|
| 806 |
+
logger.warning(f"β Document search failed: {e}, using LLM fallback")
|
| 807 |
+
try:
|
| 808 |
+
llm_response = await generate_llm_fallback_response(user_message, {"type": "unknown", "confidence": 0.3})
|
| 809 |
+
yield {
|
| 810 |
+
"clause_text": llm_response,
|
| 811 |
+
"summary": "AI-generated response due to system error",
|
| 812 |
+
"role_checklist": ["Verify information independently", "Try rephrasing your query"],
|
| 813 |
+
"source_title": "AI Assistant (Fallback)",
|
| 814 |
+
"clause_id": "AI_ERROR",
|
| 815 |
+
"date": "2024",
|
| 816 |
+
"url": "",
|
| 817 |
+
"score": 0.5,
|
| 818 |
+
"scenario_analysis": None,
|
| 819 |
+
"charts": []
|
| 820 |
+
}
|
| 821 |
+
except Exception as fallback_error:
|
| 822 |
+
logger.error(f"β LLM fallback also failed: {fallback_error}")
|
| 823 |
+
yield {
|
| 824 |
+
"clause_text": "I apologize, but I'm experiencing technical difficulties. Please try again later or rephrase your question.",
|
| 825 |
+
"summary": "System error occurred",
|
| 826 |
+
"role_checklist": ["Try again later", "Rephrase your question", "Contact technical support"],
|
| 827 |
+
"source_title": "System Error",
|
| 828 |
+
"clause_id": "ERROR_001",
|
| 829 |
+
"date": "2024",
|
| 830 |
+
"url": "",
|
| 831 |
+
"score": 0.1,
|
| 832 |
+
"scenario_analysis": None,
|
| 833 |
+
"charts": []
|
| 834 |
+
}
|
| 835 |
|
| 836 |
async def send_text_response(websocket: WebSocket, response_text: str, provider_used: str, session_data: dict):
|
| 837 |
"""Send text response to client"""
|