teofizzy commited on
Commit
0830be5
·
1 Parent(s): 4089852

added swahili language

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. src/load/mshauri_demo.py +90 -13
requirements.txt CHANGED
@@ -10,3 +10,4 @@ langchain-chroma
10
  chromadb
11
  huggingface_hub
12
  pysqlite3-binary
 
 
10
  chromadb
11
  huggingface_hub
12
  pysqlite3-binary
13
+ deep-translator
src/load/mshauri_demo.py CHANGED
@@ -7,6 +7,9 @@ import time
7
  from contextlib import redirect_stdout
8
  from typing import Any, List, Optional, Mapping
9
 
 
 
 
10
  # Replaces HuggingFaceEndpoint with the robust Client
11
  from huggingface_hub import InferenceClient
12
  from langchain_core.language_models.llms import LLM
@@ -24,7 +27,7 @@ DEFAULT_EMBED_MODEL = "nomic-embed-text"
24
  DEFAULT_LLM_MODEL = "qwen2.5:3b"
25
  DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"
26
 
27
- # --- CUSTOM WRAPPER
28
  class HuggingFaceChat(LLM):
29
  """
30
  Custom LangChain wrapper that hits the Chat API (v1/chat/completions).
@@ -175,7 +178,8 @@ class SimpleReActAgent:
175
  def invoke(self, inputs):
176
  query = inputs["input"]
177
  scratchpad = ""
178
- print(f"🚀 Starting Agent Loop for: '{query}'")
 
179
 
180
  for step in range(10):
181
  prompt = self.prompt_template.format(
@@ -195,7 +199,7 @@ class SimpleReActAgent:
195
  return {"output": "Error contacting AI service. Please try again."}
196
 
197
  if self.verbose:
198
- print(f"\nStep {step+1}: {response_text.strip()}")
199
 
200
  scratchpad += response_text
201
 
@@ -217,7 +221,7 @@ class SimpleReActAgent:
217
 
218
  if action_name in self.tools:
219
  if self.verbose:
220
- print(f"🛠️ Calling '{action_name}' with: '{action_input}'")
221
  try:
222
  tool = self.tools[action_name]
223
  if hasattr(tool, 'invoke'):
@@ -228,7 +232,7 @@ class SimpleReActAgent:
228
  tool_result = f"Error: {e}"
229
 
230
  if self.verbose:
231
- print(f"Observation: {str(tool_result)[:200]}...")
232
  observation = f"\nObservation: {tool_result}\n"
233
  else:
234
  observation = f"\nObservation: Error: Tool '{action_name}' not found.\n"
@@ -241,6 +245,75 @@ class SimpleReActAgent:
241
 
242
  return {"output": "Agent timed out."}
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  # --- MAIN SETUP FUNCTION ---
245
 
246
  def create_mshauri_agent(
@@ -255,10 +328,10 @@ def create_mshauri_agent(
255
 
256
  # 1. ROBUST SERVERLESS LOADING LOOP
257
  if hf_token:
258
- print("⚡ HF Token found. Testing models...")
259
 
260
  for model_id in CANDIDATE_MODELS:
261
- print(f"Trying model: {model_id}...")
262
  try:
263
  # USE CUSTOM WRAPPER
264
  candidate_llm = HuggingFaceChat(
@@ -269,16 +342,16 @@ def create_mshauri_agent(
269
  # TEST CALL
270
  candidate_llm.invoke("Ping")
271
 
272
- print(f"SUCCESS: Connected to {model_id}")
273
  llm = candidate_llm
274
  break
275
  except Exception as e:
276
- print(f"Failed: {str(e)[:100]}...")
277
  time.sleep(1)
278
 
279
  # FALLBACK
280
  if not llm:
281
- print("\nFalling back to Local CPU Ollama...")
282
  try:
283
  llm = ChatOllama(model="qwen2.5:3b", base_url=ollama_url, temperature=0.1)
284
  except Exception as e:
@@ -340,8 +413,9 @@ def create_mshauri_agent(
340
  tools = sql_tools + [retriever_tool, repl_tool]
341
  agent = SimpleReActAgent(llm, tools)
342
 
343
- print("Agent Ready.")
344
- return agent
 
345
 
346
  def ask_mshauri(agent, query):
347
  if not agent:
@@ -363,4 +437,7 @@ def ask_mshauri(agent, query):
363
  if __name__ == "__main__":
364
  # Quick Test
365
  agent = create_mshauri_agent()
366
- ask_mshauri(agent, "What is the inflation rate?")
 
 
 
 
7
  from contextlib import redirect_stdout
8
  from typing import Any, List, Optional, Mapping
9
 
10
+ # --- NEW IMPORT FOR TRANSLATION ---
11
+ from deep_translator import GoogleTranslator
12
+
13
  # Replaces HuggingFaceEndpoint with the robust Client
14
  from huggingface_hub import InferenceClient
15
  from langchain_core.language_models.llms import LLM
 
27
  DEFAULT_LLM_MODEL = "qwen2.5:3b"
28
  DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"
29
 
30
+ # --- CUSTOM WRAPPER ---
31
  class HuggingFaceChat(LLM):
32
  """
33
  Custom LangChain wrapper that hits the Chat API (v1/chat/completions).
 
178
  def invoke(self, inputs):
179
  query = inputs["input"]
180
  scratchpad = ""
181
+ # Flush=True forces logs to appear instantly
182
+ print(f"🚀 Starting Agent Loop for: '{query}'", flush=True)
183
 
184
  for step in range(10):
185
  prompt = self.prompt_template.format(
 
199
  return {"output": "Error contacting AI service. Please try again."}
200
 
201
  if self.verbose:
202
+ print(f"\nStep {step+1}: {response_text.strip()}", flush=True)
203
 
204
  scratchpad += response_text
205
 
 
221
 
222
  if action_name in self.tools:
223
  if self.verbose:
224
+ print(f"🛠️ Calling '{action_name}' with: '{action_input}'", flush=True)
225
  try:
226
  tool = self.tools[action_name]
227
  if hasattr(tool, 'invoke'):
 
232
  tool_result = f"Error: {e}"
233
 
234
  if self.verbose:
235
+ print(f"Observation: {str(tool_result)[:200]}...", flush=True)
236
  observation = f"\nObservation: {tool_result}\n"
237
  else:
238
  observation = f"\nObservation: Error: Tool '{action_name}' not found.\n"
 
245
 
246
  return {"output": "Agent timed out."}
247
 
248
+ # --- 3. NEW MULTILINGUAL AGENT WRAPPER ---
249
+ class MultilingualAgent:
250
+ """
251
+ Wraps the core agent to handle English/Swahili translation transparently.
252
+ 1. Detects Swahili input.
253
+ 2. Translates SW -> EN.
254
+ 3. Runs Agent in English (better reasoning/tools).
255
+ 4. Translates EN Output -> SW.
256
+ """
257
+ def __init__(self, agent):
258
+ self.agent = agent
259
+ # Initialize translators
260
+ self.en_to_sw = GoogleTranslator(source='en', target='sw')
261
+ self.sw_to_en = GoogleTranslator(source='sw', target='en')
262
+
263
+ def detect_and_translate_input(self, query):
264
+ """
265
+ Heuristic check: If query contains common Swahili words, treat as Swahili.
266
+ """
267
+ # Keywords common in Kenyan financial/economic context
268
+ swahili_keywords = [
269
+ 'habari', 'pesa', 'shilingi', 'bei', 'uchumi', 'mkopo', 'faida',
270
+ 'hasara', 'benki', 'riba', 'soko', 'mfumuko', 'kodi', 'ushuru',
271
+ 'serikali', 'biashara', 'uwekezaji', 'bajeti', 'deni', 'kipato'
272
+ ]
273
+
274
+ # Check if any keyword exists
275
+ is_swahili = any(word in query.lower() for word in swahili_keywords)
276
+
277
+ # Also check if user explicitly requested Swahili
278
+ if "swahili" in query.lower() or "kiswahili" in query.lower():
279
+ is_swahili = True
280
+
281
+ if is_swahili:
282
+ print(f"🌍 Swahili context detected. Translating input...", flush=True)
283
+ try:
284
+ translated_query = self.sw_to_en.translate(query)
285
+ print(f" Original: '{query}' -> English: '{translated_query}'", flush=True)
286
+ return translated_query, True
287
+ except Exception as e:
288
+ print(f" ⚠️ Translation failed: {e}. Using original.", flush=True)
289
+ return query, False
290
+
291
+ return query, False
292
+
293
+ def invoke(self, inputs):
294
+ query = inputs["input"]
295
+
296
+ # 1. PRE-PROCESS: Translate Input
297
+ processed_query, is_swahili_mode = self.detect_and_translate_input(query)
298
+
299
+ # 2. CORE PROCESS: Run Agent in English
300
+ # We pass the ENGLISH query to the agent so it can use SQL/Vector tools correctly.
301
+ result = self.agent.invoke({"input": processed_query})
302
+ english_output = result.get("output", "Error")
303
+
304
+ # 3. POST-PROCESS: Translate Output if needed
305
+ if is_swahili_mode:
306
+ print(f"🌍 Translating response to Swahili...", flush=True)
307
+ try:
308
+ # Translating the final answer
309
+ swahili_output = self.en_to_sw.translate(english_output)
310
+ return {"output": swahili_output}
311
+ except Exception as e:
312
+ print(f" ⚠️ Response translation failed: {e}", flush=True)
313
+ return {"output": f"{english_output} (Translation Error)"}
314
+
315
+ return {"output": english_output}
316
+
317
  # --- MAIN SETUP FUNCTION ---
318
 
319
  def create_mshauri_agent(
 
328
 
329
  # 1. ROBUST SERVERLESS LOADING LOOP
330
  if hf_token:
331
+ print("⚡ HF Token found. Testing models...", flush=True)
332
 
333
  for model_id in CANDIDATE_MODELS:
334
+ print(f"Trying model: {model_id}...", flush=True)
335
  try:
336
  # USE CUSTOM WRAPPER
337
  candidate_llm = HuggingFaceChat(
 
342
  # TEST CALL
343
  candidate_llm.invoke("Ping")
344
 
345
+ print(f"SUCCESS: Connected to {model_id}", flush=True)
346
  llm = candidate_llm
347
  break
348
  except Exception as e:
349
+ print(f"Failed: {str(e)[:100]}...", flush=True)
350
  time.sleep(1)
351
 
352
  # FALLBACK
353
  if not llm:
354
+ print("\nFalling back to Local CPU Ollama...", flush=True)
355
  try:
356
  llm = ChatOllama(model="qwen2.5:3b", base_url=ollama_url, temperature=0.1)
357
  except Exception as e:
 
413
  tools = sql_tools + [retriever_tool, repl_tool]
414
  agent = SimpleReActAgent(llm, tools)
415
 
416
+ # WRAP THE AGENT IN TRANSLATION LAYER
417
+ print("Agent Ready (Multilingual Mode).", flush=True)
418
+ return MultilingualAgent(agent)
419
 
420
  def ask_mshauri(agent, query):
421
  if not agent:
 
437
  if __name__ == "__main__":
438
  # Quick Test
439
  agent = create_mshauri_agent()
440
+ # Test English
441
+ ask_mshauri(agent, "What is the inflation rate?")
442
+ # Test Swahili
443
+ ask_mshauri(agent, "Hali ya uchumi ni vipi?")