Spaces:
Running
Running
added swahili language
Browse files- requirements.txt +1 -0
- src/load/mshauri_demo.py +90 -13
requirements.txt
CHANGED
|
@@ -10,3 +10,4 @@ langchain-chroma
|
|
| 10 |
chromadb
|
| 11 |
huggingface_hub
|
| 12 |
pysqlite3-binary
|
|
|
|
|
|
| 10 |
chromadb
|
| 11 |
huggingface_hub
|
| 12 |
pysqlite3-binary
|
| 13 |
+
deep-translator
|
src/load/mshauri_demo.py
CHANGED
|
@@ -7,6 +7,9 @@ import time
|
|
| 7 |
from contextlib import redirect_stdout
|
| 8 |
from typing import Any, List, Optional, Mapping
|
| 9 |
|
|
|
|
|
|
|
|
|
|
| 10 |
# Replaces HuggingFaceEndpoint with the robust Client
|
| 11 |
from huggingface_hub import InferenceClient
|
| 12 |
from langchain_core.language_models.llms import LLM
|
|
@@ -24,7 +27,7 @@ DEFAULT_EMBED_MODEL = "nomic-embed-text"
|
|
| 24 |
DEFAULT_LLM_MODEL = "qwen2.5:3b"
|
| 25 |
DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"
|
| 26 |
|
| 27 |
-
# --- CUSTOM WRAPPER
|
| 28 |
class HuggingFaceChat(LLM):
|
| 29 |
"""
|
| 30 |
Custom LangChain wrapper that hits the Chat API (v1/chat/completions).
|
|
@@ -175,7 +178,8 @@ class SimpleReActAgent:
|
|
| 175 |
def invoke(self, inputs):
|
| 176 |
query = inputs["input"]
|
| 177 |
scratchpad = ""
|
| 178 |
-
|
|
|
|
| 179 |
|
| 180 |
for step in range(10):
|
| 181 |
prompt = self.prompt_template.format(
|
|
@@ -195,7 +199,7 @@ class SimpleReActAgent:
|
|
| 195 |
return {"output": "Error contacting AI service. Please try again."}
|
| 196 |
|
| 197 |
if self.verbose:
|
| 198 |
-
print(f"\nStep {step+1}: {response_text.strip()}")
|
| 199 |
|
| 200 |
scratchpad += response_text
|
| 201 |
|
|
@@ -217,7 +221,7 @@ class SimpleReActAgent:
|
|
| 217 |
|
| 218 |
if action_name in self.tools:
|
| 219 |
if self.verbose:
|
| 220 |
-
print(f"🛠️ Calling '{action_name}' with: '{action_input}'")
|
| 221 |
try:
|
| 222 |
tool = self.tools[action_name]
|
| 223 |
if hasattr(tool, 'invoke'):
|
|
@@ -228,7 +232,7 @@ class SimpleReActAgent:
|
|
| 228 |
tool_result = f"Error: {e}"
|
| 229 |
|
| 230 |
if self.verbose:
|
| 231 |
-
print(f"Observation: {str(tool_result)[:200]}...")
|
| 232 |
observation = f"\nObservation: {tool_result}\n"
|
| 233 |
else:
|
| 234 |
observation = f"\nObservation: Error: Tool '{action_name}' not found.\n"
|
|
@@ -241,6 +245,75 @@ class SimpleReActAgent:
|
|
| 241 |
|
| 242 |
return {"output": "Agent timed out."}
|
| 243 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
# --- MAIN SETUP FUNCTION ---
|
| 245 |
|
| 246 |
def create_mshauri_agent(
|
|
@@ -255,10 +328,10 @@ def create_mshauri_agent(
|
|
| 255 |
|
| 256 |
# 1. ROBUST SERVERLESS LOADING LOOP
|
| 257 |
if hf_token:
|
| 258 |
-
print("⚡ HF Token found. Testing models...")
|
| 259 |
|
| 260 |
for model_id in CANDIDATE_MODELS:
|
| 261 |
-
print(f"Trying model: {model_id}...")
|
| 262 |
try:
|
| 263 |
# USE CUSTOM WRAPPER
|
| 264 |
candidate_llm = HuggingFaceChat(
|
|
@@ -269,16 +342,16 @@ def create_mshauri_agent(
|
|
| 269 |
# TEST CALL
|
| 270 |
candidate_llm.invoke("Ping")
|
| 271 |
|
| 272 |
-
print(f"SUCCESS: Connected to {model_id}")
|
| 273 |
llm = candidate_llm
|
| 274 |
break
|
| 275 |
except Exception as e:
|
| 276 |
-
print(f"Failed: {str(e)[:100]}...")
|
| 277 |
time.sleep(1)
|
| 278 |
|
| 279 |
# FALLBACK
|
| 280 |
if not llm:
|
| 281 |
-
print("\nFalling back to Local CPU Ollama...")
|
| 282 |
try:
|
| 283 |
llm = ChatOllama(model="qwen2.5:3b", base_url=ollama_url, temperature=0.1)
|
| 284 |
except Exception as e:
|
|
@@ -340,8 +413,9 @@ def create_mshauri_agent(
|
|
| 340 |
tools = sql_tools + [retriever_tool, repl_tool]
|
| 341 |
agent = SimpleReActAgent(llm, tools)
|
| 342 |
|
| 343 |
-
|
| 344 |
-
|
|
|
|
| 345 |
|
| 346 |
def ask_mshauri(agent, query):
|
| 347 |
if not agent:
|
|
@@ -363,4 +437,7 @@ def ask_mshauri(agent, query):
|
|
| 363 |
if __name__ == "__main__":
|
| 364 |
# Quick Test
|
| 365 |
agent = create_mshauri_agent()
|
| 366 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from contextlib import redirect_stdout
|
| 8 |
from typing import Any, List, Optional, Mapping
|
| 9 |
|
| 10 |
+
# --- NEW IMPORT FOR TRANSLATION ---
|
| 11 |
+
from deep_translator import GoogleTranslator
|
| 12 |
+
|
| 13 |
# Replaces HuggingFaceEndpoint with the robust Client
|
| 14 |
from huggingface_hub import InferenceClient
|
| 15 |
from langchain_core.language_models.llms import LLM
|
|
|
|
| 27 |
DEFAULT_LLM_MODEL = "qwen2.5:3b"
|
| 28 |
DEFAULT_OLLAMA_URL = "http://127.0.0.1:11434"
|
| 29 |
|
| 30 |
+
# --- CUSTOM WRAPPER ---
|
| 31 |
class HuggingFaceChat(LLM):
|
| 32 |
"""
|
| 33 |
Custom LangChain wrapper that hits the Chat API (v1/chat/completions).
|
|
|
|
| 178 |
def invoke(self, inputs):
|
| 179 |
query = inputs["input"]
|
| 180 |
scratchpad = ""
|
| 181 |
+
# Flush=True forces logs to appear instantly
|
| 182 |
+
print(f"🚀 Starting Agent Loop for: '{query}'", flush=True)
|
| 183 |
|
| 184 |
for step in range(10):
|
| 185 |
prompt = self.prompt_template.format(
|
|
|
|
| 199 |
return {"output": "Error contacting AI service. Please try again."}
|
| 200 |
|
| 201 |
if self.verbose:
|
| 202 |
+
print(f"\nStep {step+1}: {response_text.strip()}", flush=True)
|
| 203 |
|
| 204 |
scratchpad += response_text
|
| 205 |
|
|
|
|
| 221 |
|
| 222 |
if action_name in self.tools:
|
| 223 |
if self.verbose:
|
| 224 |
+
print(f"🛠️ Calling '{action_name}' with: '{action_input}'", flush=True)
|
| 225 |
try:
|
| 226 |
tool = self.tools[action_name]
|
| 227 |
if hasattr(tool, 'invoke'):
|
|
|
|
| 232 |
tool_result = f"Error: {e}"
|
| 233 |
|
| 234 |
if self.verbose:
|
| 235 |
+
print(f"Observation: {str(tool_result)[:200]}...", flush=True)
|
| 236 |
observation = f"\nObservation: {tool_result}\n"
|
| 237 |
else:
|
| 238 |
observation = f"\nObservation: Error: Tool '{action_name}' not found.\n"
|
|
|
|
| 245 |
|
| 246 |
return {"output": "Agent timed out."}
|
| 247 |
|
| 248 |
+
# --- 3. NEW MULTILINGUAL AGENT WRAPPER ---
|
| 249 |
+
class MultilingualAgent:
|
| 250 |
+
"""
|
| 251 |
+
Wraps the core agent to handle English/Swahili translation transparently.
|
| 252 |
+
1. Detects Swahili input.
|
| 253 |
+
2. Translates SW -> EN.
|
| 254 |
+
3. Runs Agent in English (better reasoning/tools).
|
| 255 |
+
4. Translates EN Output -> SW.
|
| 256 |
+
"""
|
| 257 |
+
def __init__(self, agent):
|
| 258 |
+
self.agent = agent
|
| 259 |
+
# Initialize translators
|
| 260 |
+
self.en_to_sw = GoogleTranslator(source='en', target='sw')
|
| 261 |
+
self.sw_to_en = GoogleTranslator(source='sw', target='en')
|
| 262 |
+
|
| 263 |
+
def detect_and_translate_input(self, query):
|
| 264 |
+
"""
|
| 265 |
+
Heuristic check: If query contains common Swahili words, treat as Swahili.
|
| 266 |
+
"""
|
| 267 |
+
# Keywords common in Kenyan financial/economic context
|
| 268 |
+
swahili_keywords = [
|
| 269 |
+
'habari', 'pesa', 'shilingi', 'bei', 'uchumi', 'mkopo', 'faida',
|
| 270 |
+
'hasara', 'benki', 'riba', 'soko', 'mfumuko', 'kodi', 'ushuru',
|
| 271 |
+
'serikali', 'biashara', 'uwekezaji', 'bajeti', 'deni', 'kipato'
|
| 272 |
+
]
|
| 273 |
+
|
| 274 |
+
# Check if any keyword exists
|
| 275 |
+
is_swahili = any(word in query.lower() for word in swahili_keywords)
|
| 276 |
+
|
| 277 |
+
# Also check if user explicitly requested Swahili
|
| 278 |
+
if "swahili" in query.lower() or "kiswahili" in query.lower():
|
| 279 |
+
is_swahili = True
|
| 280 |
+
|
| 281 |
+
if is_swahili:
|
| 282 |
+
print(f"🌍 Swahili context detected. Translating input...", flush=True)
|
| 283 |
+
try:
|
| 284 |
+
translated_query = self.sw_to_en.translate(query)
|
| 285 |
+
print(f" Original: '{query}' -> English: '{translated_query}'", flush=True)
|
| 286 |
+
return translated_query, True
|
| 287 |
+
except Exception as e:
|
| 288 |
+
print(f" ⚠️ Translation failed: {e}. Using original.", flush=True)
|
| 289 |
+
return query, False
|
| 290 |
+
|
| 291 |
+
return query, False
|
| 292 |
+
|
| 293 |
+
def invoke(self, inputs):
|
| 294 |
+
query = inputs["input"]
|
| 295 |
+
|
| 296 |
+
# 1. PRE-PROCESS: Translate Input
|
| 297 |
+
processed_query, is_swahili_mode = self.detect_and_translate_input(query)
|
| 298 |
+
|
| 299 |
+
# 2. CORE PROCESS: Run Agent in English
|
| 300 |
+
# We pass the ENGLISH query to the agent so it can use SQL/Vector tools correctly.
|
| 301 |
+
result = self.agent.invoke({"input": processed_query})
|
| 302 |
+
english_output = result.get("output", "Error")
|
| 303 |
+
|
| 304 |
+
# 3. POST-PROCESS: Translate Output if needed
|
| 305 |
+
if is_swahili_mode:
|
| 306 |
+
print(f"🌍 Translating response to Swahili...", flush=True)
|
| 307 |
+
try:
|
| 308 |
+
# Translating the final answer
|
| 309 |
+
swahili_output = self.en_to_sw.translate(english_output)
|
| 310 |
+
return {"output": swahili_output}
|
| 311 |
+
except Exception as e:
|
| 312 |
+
print(f" ⚠️ Response translation failed: {e}", flush=True)
|
| 313 |
+
return {"output": f"{english_output} (Translation Error)"}
|
| 314 |
+
|
| 315 |
+
return {"output": english_output}
|
| 316 |
+
|
| 317 |
# --- MAIN SETUP FUNCTION ---
|
| 318 |
|
| 319 |
def create_mshauri_agent(
|
|
|
|
| 328 |
|
| 329 |
# 1. ROBUST SERVERLESS LOADING LOOP
|
| 330 |
if hf_token:
|
| 331 |
+
print("⚡ HF Token found. Testing models...", flush=True)
|
| 332 |
|
| 333 |
for model_id in CANDIDATE_MODELS:
|
| 334 |
+
print(f"Trying model: {model_id}...", flush=True)
|
| 335 |
try:
|
| 336 |
# USE CUSTOM WRAPPER
|
| 337 |
candidate_llm = HuggingFaceChat(
|
|
|
|
| 342 |
# TEST CALL
|
| 343 |
candidate_llm.invoke("Ping")
|
| 344 |
|
| 345 |
+
print(f"SUCCESS: Connected to {model_id}", flush=True)
|
| 346 |
llm = candidate_llm
|
| 347 |
break
|
| 348 |
except Exception as e:
|
| 349 |
+
print(f"Failed: {str(e)[:100]}...", flush=True)
|
| 350 |
time.sleep(1)
|
| 351 |
|
| 352 |
# FALLBACK
|
| 353 |
if not llm:
|
| 354 |
+
print("\nFalling back to Local CPU Ollama...", flush=True)
|
| 355 |
try:
|
| 356 |
llm = ChatOllama(model="qwen2.5:3b", base_url=ollama_url, temperature=0.1)
|
| 357 |
except Exception as e:
|
|
|
|
| 413 |
tools = sql_tools + [retriever_tool, repl_tool]
|
| 414 |
agent = SimpleReActAgent(llm, tools)
|
| 415 |
|
| 416 |
+
# WRAP THE AGENT IN TRANSLATION LAYER
|
| 417 |
+
print("Agent Ready (Multilingual Mode).", flush=True)
|
| 418 |
+
return MultilingualAgent(agent)
|
| 419 |
|
| 420 |
def ask_mshauri(agent, query):
|
| 421 |
if not agent:
|
|
|
|
| 437 |
if __name__ == "__main__":
|
| 438 |
# Quick Test
|
| 439 |
agent = create_mshauri_agent()
|
| 440 |
+
# Test English
|
| 441 |
+
ask_mshauri(agent, "What is the inflation rate?")
|
| 442 |
+
# Test Swahili
|
| 443 |
+
ask_mshauri(agent, "Hali ya uchumi ni vipi?")
|