from pathlib import Path import json import re from typing import Dict, Any, List, Optional import random import pandas as pd from retriever import get_retriever from config import KINYARWANDA_STOPWORDS, calculate_similarity_score ROOT = Path(__file__).parent CONTEXT_PATH = ROOT / 'conversation_contexts.json' class Assistant: def __init__(self): self.retriever = get_retriever() # load datasets self.laws = None self.punishments = None self.greetings = None self.contexts: Dict[str, List[Dict[str, Any]]] = {} self.intent_keywords = { 'greeting': ['mwaramutse', 'muraho', 'amakuru', 'bite', 'mwiriwe', 'urabeho', 'urakomeye', 'ndabona'], 'law': ['itegeko', 'ingingo', 'article', 'ingingo ya', 'itegeko rya', 'law', 'article', 'ingingo'], 'punishment': ['igihano', 'ibihano', 'ihazabu', 'igifungo', 'fine', 'imyaka', 'years', 'punishment'] } self.load_datasets() self._load_contexts() def load_datasets(self): # laws are loaded by retriever; ensure available try: if self.retriever.laws_df is None: self.retriever.load_laws() self.laws = self.retriever.laws_df except Exception: self.laws = None # punishments fallback to penal_code.csv ppath = ROOT / 'penal_code.csv' if ppath.exists(): try: self.punishments = pd.read_csv(ppath).fillna('') except Exception: self.punishments = None else: self.punishments = None # greetings gpath = ROOT / 'greetings.csv' if gpath.exists(): try: self.greetings = pd.read_csv(gpath).fillna('') except Exception: self.greetings = None else: # retriever uses greetings.csv or kin... so try retriever load try: self.greetings = self.retriever.greetings_df except Exception: self.greetings = None def _load_contexts(self): if CONTEXT_PATH.exists(): try: with open(CONTEXT_PATH, 'r', encoding='utf-8') as f: self.contexts = json.load(f) except Exception: self.contexts = {} def _save_contexts(self): try: with open(CONTEXT_PATH, 'w', encoding='utf-8') as f: json.dump(self.contexts, f, ensure_ascii=False, indent=2) except Exception: pass def tokenize(self, text: str) -> List[str]: if not text: return [] txt = str(text).lower() # simple cleaning txt = re.sub(r"[\r\n]+", " ", txt) txt = re.sub(r"[^\w\s\u00C0-\u017F]", " ", txt) toks = [t for t in txt.split() if t and t not in KINYARWANDA_STOPWORDS] return toks def detect_intent(self, text: str) -> str: t = str(text).lower() toks = set(self.tokenize(t)) # keyword scoring scores = {k: 0 for k in self.intent_keywords} for intent, keywords in self.intent_keywords.items(): for kw in keywords: if kw in t or kw in toks: scores[intent] += 1 # greeting detection via retriever as fallback if scores.get('greeting', 0) > 0: return 'greeting' # if punishment keywords present if scores.get('punishment', 0) > 0: return 'punishment' if scores.get('law', 0) > 0: return 'law' # try to detect if users mention known law numbers or article numbers if re.search(r'\bingingo\b|\bingingo ya\b|\barticle\b|\bitegeko\b', t): return 'law' # default unclear return 'unclear' def _update_context(self, user_id: str, entry: Dict[str, Any]): self.contexts.setdefault(user_id, []).append(entry) # keep only last 50 if len(self.contexts[user_id]) > 50: self.contexts[user_id] = self.contexts[user_id][-50:] self._save_contexts() def handle_query(self, user_id: str, text: str) -> Dict[str, Any]: # first, try language-aware greeting detection (this handles ky/en/fr greetings) # record the raw user message self._update_context(user_id, {'role': 'user', 'text': text}) try: reply = self.retriever.detect_and_reply_greeting(text) except Exception: reply = None if reply: out = {'type': 'greeting', 'response': reply.get('response', ''), 'followup': reply.get('followup', '')} # record assistant response self._update_context(user_id, {'role': 'assistant', 'text': out}) return out # no greeting detected, continue with intent detection intent = self.detect_intent(text) # update the last user message with intent information as well self._update_context(user_id, {'role': 'user', 'text': text, 'intent': intent}) if intent == 'law': # use retriever find_similar try: # ensure embeddings built self.retriever.build_or_load_embeddings() results = self.retriever.find_similar(text, top_k=1) except Exception: results = [] if results: score, meta = results[0] law_row = meta.get('row') out = {'type': 'law', 'score': score, 'law': law_row} self._update_context(user_id, {'role': 'assistant', 'text': out}) return out return {'type': 'unclear', 'text': "I couldn't find a matching law. Can you be more specific?"} if intent == 'punishment': # use simple matching against penal_code.csv if available, otherwise use overlap scoring if self.punishments is not None: # score by overlap best = None best_score = 0.0 for _, row in self.punishments.iterrows(): desc = ' '.join([str(row.get(c, '')) for c in row.index]) s = calculate_similarity_score(text, desc) if s > best_score: best_score = s best = row.to_dict() if best is not None and best_score > 0: out = {'type': 'punishment', 'score': best_score, 'punishment_row': best} self._update_context(user_id, {'role': 'assistant', 'text': out}) return out # fallback: return unclear return {'type': 'unclear', 'text': "I couldn't find a matching punishment. Can you provide more detail?"} # unclear out = {'type': 'unclear', 'text': "Can you please try a legal question? I'm here to assist you."} self._update_context(user_id, {'role': 'assistant', 'text': out}) return out # Singleton assistant _ASSISTANT: Optional[Assistant] = None def get_assistant() -> Assistant: global _ASSISTANT if _ASSISTANT is None: _ASSISTANT = Assistant() return _ASSISTANT