Peterase commited on
Commit
75375c8
·
1 Parent(s): 53c5af5

feat(language-gate): detect non-English queries and return polite response

Browse files

- Added language detection as first step in AgentRouterUseCase (before intent classification)
- Non-English queries now get a polite bilingual response instead of hallucinating
- Native language messages for: Arabic, Amharic, Somali, Swahili, French
- Generic English fallback for all other languages
- Both execute_chat and execute_stream paths protected
- Includes 3 English follow-up question suggestions to guide user
- Zero LLM calls for non-English queries (fast, no hallucination risk)

src/core/use_cases/agent_router_use_case.py CHANGED
@@ -6,6 +6,50 @@ from src.core.use_cases.rag_chat_use_case import RagChatUseCase
6
  from src.core.use_cases.account_use_case import AccountUseCase
7
  from src.infrastructure.adapters.intent_classifier import intent_classifier
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  class AgentRouterUseCase:
11
  """
@@ -23,13 +67,62 @@ class AgentRouterUseCase:
23
  print(f"DEBUG: Intent Classification: {intent} for query: '{query[:80]}'")
24
  return intent
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  async def execute_chat(self, request: ChatRequest) -> Dict[str, Any]:
27
- intent = self._classify_intent(request.query)
28
- # Generate a unique session ID if none provided — never use a shared fallback
29
  if not request.session_id:
30
- import uuid
31
  request.session_id = str(uuid.uuid4())
32
  session_id = request.session_id
 
 
 
 
 
 
 
 
 
33
 
34
  if intent == "OTHER":
35
  print(f"DEBUG: Routing to OTHER (Direct LLM Response)")
@@ -58,13 +151,22 @@ Response:"""
58
  return await self.rag_chat.execute_chat(request)
59
 
60
  async def execute_stream(self, request: ChatRequest, is_guest: bool = False, user_id: int = None) -> AsyncGenerator[str, None]:
61
- intent = self._classify_intent(request.query)
62
- # Generate a unique session ID if none provided — never use a shared fallback
63
  if not request.session_id:
64
- import uuid
65
  request.session_id = str(uuid.uuid4())
66
  session_id = request.session_id
67
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  if intent == "OTHER":
69
  full_answer = ""
70
  # Identity-safe prompt — never reveals underlying model
@@ -84,14 +186,12 @@ Response:"""
84
  yield chunk
85
  if chunk.startswith("data: "):
86
  try:
87
- import json
88
  data = json.loads(chunk[6:])
89
  if "token" in data:
90
  full_answer += data["token"]
91
  except:
92
  pass
93
 
94
- import json
95
  final_response = {
96
  "answer": full_answer,
97
  "sources": [],
 
6
  from src.core.use_cases.account_use_case import AccountUseCase
7
  from src.infrastructure.adapters.intent_classifier import intent_classifier
8
 
9
+ import json
10
+ import uuid
11
+ import logging
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Languages we support for search (backend retrieval)
16
+ SUPPORTED_SEARCH_LANGUAGES = {"en"}
17
+
18
+ # Human-readable names for polite response
19
+ LANGUAGE_NAMES = {
20
+ "ar": "Arabic (العربية)",
21
+ "am": "Amharic (አማርኛ)",
22
+ "so": "Somali (Soomaali)",
23
+ "sw": "Swahili (Kiswahili)",
24
+ "fr": "French (Français)",
25
+ "zh": "Chinese (中文)",
26
+ "es": "Spanish (Español)",
27
+ "de": "German (Deutsch)",
28
+ "pt": "Portuguese (Português)",
29
+ "hi": "Hindi (हिन्दी)",
30
+ "tr": "Turkish (Türkçe)",
31
+ "ru": "Russian (Русский)",
32
+ "it": "Italian (Italiano)",
33
+ "ja": "Japanese (日本語)",
34
+ "ko": "Korean (한국어)",
35
+ }
36
+
37
+ # Polite "not yet supported" message in each language
38
+ LANGUAGE_NOT_SUPPORTED_MESSAGES = {
39
+ "ar": "مرحباً! أنا ARKI AI. حالياً أدعم اللغة الإنجليزية فقط. دعم اللغة العربية قادم قريباً! 🌍\n\nHello! I'm ARKI AI. I currently support English only. Arabic support is coming soon!",
40
+ "am": "ሰላም! እኔ ARKI AI ነኝ። አሁን እንግሊዝኛ ቋንቋ ብቻ እጠቀማለሁ። አማርኛ ድጋፍ በቅርቡ ይመጣል! 🌍\n\nHello! I'm ARKI AI. I currently support English only. Amharic support is coming soon!",
41
+ "so": "Salaan! Waxaan ahay ARKI AI. Hadda waxaan taageeraa Ingiriisiga kaliya. Taageerada Soomaaliga waxay timaadaa dhawaan! 🌍\n\nHello! I'm ARKI AI. I currently support English only. Somali support is coming soon!",
42
+ "sw": "Habari! Mimi ni ARKI AI. Kwa sasa ninasaidia Kiingereza tu. Msaada wa Kiswahili unakuja hivi karibuni! 🌍\n\nHello! I'm ARKI AI. I currently support English only. Swahili support is coming soon!",
43
+ "fr": "Bonjour! Je suis ARKI AI. Je supporte actuellement l'anglais uniquement. Le support du français arrive bientôt! 🌍\n\nHello! I'm ARKI AI. I currently support English only. French support is coming soon!",
44
+ }
45
+
46
+ DEFAULT_NOT_SUPPORTED_MESSAGE = (
47
+ "Hello! I'm ARKI AI, a real-time news assistant.\n\n"
48
+ "I currently support **English** queries only.\n"
49
+ "Support for {lang_name} is coming soon! 🌍\n\n"
50
+ "Please try asking your question in English and I'll do my best to help."
51
+ )
52
+
53
 
54
  class AgentRouterUseCase:
55
  """
 
67
  print(f"DEBUG: Intent Classification: {intent} for query: '{query[:80]}'")
68
  return intent
69
 
70
+ def _detect_language(self, query: str) -> str:
71
+ """
72
+ Detect query language. Returns ISO 639-1 code (en, ar, am, so, sw, fr...).
73
+ Falls back to 'en' if detection fails.
74
+ """
75
+ try:
76
+ from src.infrastructure.adapters.language_detector import language_detector
77
+ if language_detector:
78
+ result = language_detector.detect(query)
79
+ logger.debug(
80
+ f"Language detected: {result.language} "
81
+ f"(confidence={result.confidence:.2f}, method={result.method})"
82
+ )
83
+ return result.language
84
+ except Exception as e:
85
+ logger.debug(f"Language detection failed: {e}")
86
+ return "en"
87
+
88
+ def _build_not_supported_response(self, lang_code: str, session_id: str) -> Dict[str, Any]:
89
+ """Build a polite 'language not supported yet' response."""
90
+ # Use native language message if available, else generic English
91
+ if lang_code in LANGUAGE_NOT_SUPPORTED_MESSAGES:
92
+ message = LANGUAGE_NOT_SUPPORTED_MESSAGES[lang_code]
93
+ else:
94
+ lang_name = LANGUAGE_NAMES.get(lang_code, f"your language ({lang_code})")
95
+ message = DEFAULT_NOT_SUPPORTED_MESSAGE.format(lang_name=lang_name)
96
+
97
+ return {
98
+ "answer": message,
99
+ "sources": [],
100
+ "follow_up_questions": [
101
+ "What is the latest news from Ethiopia?",
102
+ "Tell me about current events in Africa",
103
+ "What happened in Ethiopia today?"
104
+ ],
105
+ "session_id": session_id,
106
+ "debug": {
107
+ "intent": "LANGUAGE_NOT_SUPPORTED",
108
+ "detected_language": lang_code,
109
+ "routed_to": "LanguageGate"
110
+ }
111
+ }
112
+
113
  async def execute_chat(self, request: ChatRequest) -> Dict[str, Any]:
 
 
114
  if not request.session_id:
 
115
  request.session_id = str(uuid.uuid4())
116
  session_id = request.session_id
117
+
118
+ # ── Language Gate: detect non-English queries first ───────────────────
119
+ detected_lang = self._detect_language(request.query)
120
+ if detected_lang not in SUPPORTED_SEARCH_LANGUAGES:
121
+ logger.info(f"Non-English query detected: {detected_lang} — returning language gate response")
122
+ print(f"DEBUG: Language gate triggered: {detected_lang} for query: '{request.query[:60]}'")
123
+ return self._build_not_supported_response(detected_lang, session_id)
124
+
125
+ intent = self._classify_intent(request.query)
126
 
127
  if intent == "OTHER":
128
  print(f"DEBUG: Routing to OTHER (Direct LLM Response)")
 
151
  return await self.rag_chat.execute_chat(request)
152
 
153
  async def execute_stream(self, request: ChatRequest, is_guest: bool = False, user_id: int = None) -> AsyncGenerator[str, None]:
 
 
154
  if not request.session_id:
 
155
  request.session_id = str(uuid.uuid4())
156
  session_id = request.session_id
157
 
158
+ # ── Language Gate: detect non-English queries first ───────────────────
159
+ detected_lang = self._detect_language(request.query)
160
+ if detected_lang not in SUPPORTED_SEARCH_LANGUAGES:
161
+ logger.info(f"Non-English query detected: {detected_lang} — returning language gate response")
162
+ print(f"DEBUG: Language gate triggered: {detected_lang} for query: '{request.query[:60]}'")
163
+ response = self._build_not_supported_response(detected_lang, session_id)
164
+ yield f"data: {json.dumps(response)}\n\n"
165
+ yield "data: [DONE]\n\n"
166
+ return
167
+
168
+ intent = self._classify_intent(request.query)
169
+
170
  if intent == "OTHER":
171
  full_answer = ""
172
  # Identity-safe prompt — never reveals underlying model
 
186
  yield chunk
187
  if chunk.startswith("data: "):
188
  try:
 
189
  data = json.loads(chunk[6:])
190
  if "token" in data:
191
  full_answer += data["token"]
192
  except:
193
  pass
194
 
 
195
  final_response = {
196
  "answer": full_answer,
197
  "sources": [],