Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

menikev commited on Aug 20, 2025

Commit

ec071b2

verified ·

1 Parent(s): ccec758

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -84

app.py CHANGED Viewed

@@ -1,170 +1,193 @@
 import os
 from pathlib import Path
 import gradio as gr
 from dotenv import load_dotenv
 from langchain.prompts import PromptTemplate
-from langchain_chroma import Chroma
-from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 from langchain.schema.runnable import RunnablePassthrough
-# --- 1. CONFIGURATION ---
-load_dotenv()
 if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
-    print("❌ HUGGINGFACEHUB_API_TOKEN not found in secrets. Please add it.")
-    exit(1)
-# --- 2. LOAD VECTOR DATABASE ---
-print("📂 Loading vector database...")
 PERSIST_DIR = Path("data/processed/vector_db")
 if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
     print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
     raise SystemExit(1)
 embedding_model = HuggingFaceEmbeddings(
     model_name="BAAI/bge-small-en",
-    model_kwargs={'device': 'cpu'}
 )
 vectordb = Chroma(
     persist_directory=str(PERSIST_DIR),
     embedding_function=embedding_model,
 )
-retriever = vectordb.as_retriever(search_kwargs={"k": 4})
-print("✅ Vector database loaded.")
-# --- 3. SETUP LLM ---
-print("🤖 Initializing LLM...")
 llm = HuggingFaceEndpoint(
-    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",  # or mistral-7b-instruct
-    task="conversational",   # ✅ must match HF endpoint type
-    temperature=0.1,
     max_new_tokens=512,
-    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
-print("✅ LLM ready.")
-# --- 4. PROMPT TEMPLATE ---
-RAG_PROMPT_TEMPLATE = """
-You are an expert Nigerian Legal Assistant. Your goal is to help users understand Nigerian law by providing clear, concise explanations.
-**TASK:** Analyze the provided legal context below to answer the user's question.
-**CONTEXT:**
 {context}
-**RULES:**
-1. Do not just copy the text. Summarize and explain in simple language.
-2. Be conversational and helpful.
-3. Base your answer ONLY on the provided context. If not found, say:
-   "The provided legal documents do not contain specific information on this topic."
-4. Respond in the user's chosen language (English or Pidgin).
-5. At the end, cite the referenced sources.
-**QUESTION:** {question}
-**ANSWER:**
 """
 RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
-# --- 5. RAG CHAIN ---
 def format_docs(docs):
-    return "\n\n---\n\n".join(
-        f"Source: {d.metadata.get('source', 'Unknown')}\n"
-        f"Section: {d.metadata.get('section', 'Unknown')}\n"
-        f"Content: {d.page_content}"
-        for d in docs
-    )
-def extract_text_from_conversational(response):
-    """Normalize HF conversational outputs to plain text."""
-    if isinstance(response, dict) and "generated_text" in response:
-        return response["generated_text"]
-    elif isinstance(response, str):
-        return response
-    elif isinstance(response, list):
-        return response[0].get("generated_text", str(response))
-    return str(response)
 rag_chain = (
     {"context": retriever | format_docs, "question": RunnablePassthrough()}
     | RAG_PROMPT
     | llm
-    | extract_text_from_conversational
 )
-# --- 6. MAIN LOGIC ---
 def answer_question(user_input, lang_choice, history=[]):
     try:
         query = (user_input or "").strip()
         if not query:
             return history, history
-        history.append({'role': 'user', 'content': query})
-        if query.lower() in ["hi", "hello", "hey"]:
-            ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
-                   if lang_choice == "english" else
-                   "Howfa! I be your Nigerian Legal AI Assistant. How I fit help you today? No be legal advice o.")
-            history.append({'role': 'assistant', 'content': ans})
             return history, history
         print(f"⚡ Running RAG chain for query: {query}")
         docs = retriever.invoke(query)
         if not docs:
-            answer = "I could not find any relevant information in the legal documents for your query."
         else:
             answer = rag_chain.invoke(query)
-        print("✅ RAG chain finished.")
-        disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. "
-                      "Please consult a qualified lawyer."
-                      if lang_choice == "english" else
-                      "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
-        references = set()
-        for doc in docs:
-            source = doc.metadata.get("source", "Unknown Source")
-            section = doc.metadata.get("section", "Unknown Section")
-            if source and section:
-                references.add(f"- {source} ({section})")
-        if references:
-            answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
-        answer += disclaimer
-        history.append({'role': 'assistant', 'content': answer.strip()})
         return history, history
     except Exception as e:
         print(f"❌ Error: {e}")
-        error_message = "Sorry, an unexpected error occurred. Please try again."
-        history.append({'role': 'assistant', 'content': error_message})
         return history, history
 def _reset():
     return [], []
-# --- 7. UI ---
 def build_ui():
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
         gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
-        gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, and more. *Powered by AI.*")
         chatbot = gr.Chatbot(
             label="Chat History",
             height=600,
-            type='messages',
-            avatar_images=("user.png", "bot.png")
         )
         with gr.Row():
             msg = gr.Textbox(
                 label="Your Question",
-                placeholder="e.g., 'What are my rights if I am arrested?'",
                 lines=2,
                 scale=4,
             )
@@ -186,6 +209,7 @@ def build_ui():
     return demo
 if __name__ == "__main__":
-    print("🚀 Launching Gradio app...")
     demo = build_ui()
-    demo.launch(debug=True)

 import os
 from pathlib import Path
 import gradio as gr
 from dotenv import load_dotenv
+load_dotenv()
 from langchain.prompts import PromptTemplate
+from langchain_community.vectorstores import Chroma   # <-- match ingestion
+from langchain_huggingface import (
+    HuggingFaceEmbeddings,
+    HuggingFaceEndpoint,
+)
 from langchain.schema.runnable import RunnablePassthrough
+from langchain.schema.output_parser import StrOutputParser
+# --- 1) CONFIG / SAFETY ---
 if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
+    print("HUGGINGFACEHUB_API_TOKEN not found. Add it to your Space secrets.")
+    raise SystemExit(1)
 PERSIST_DIR = Path("data/processed/vector_db")
+COLLECTION_NAME = "legal_documents"  # <-- MUST match complete_ingestion.py
 if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
     print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
     raise SystemExit(1)
+# --- 2) LOAD VECTOR DB / RETRIEVER ---
+print("Loading vector database...")
 embedding_model = HuggingFaceEmbeddings(
     model_name="BAAI/bge-small-en",
+    model_kwargs={"device": "cpu"},
 )
 vectordb = Chroma(
     persist_directory=str(PERSIST_DIR),
     embedding_function=embedding_model,
+    collection_name=COLLECTION_NAME,  # <-- critical: open the right collection
 )
+# Quick sanity check (helps spot empty/wrong collection immediately)
+try:
+    count = vectordb._collection.count()
+    print(f"✅ Loaded Chroma collection '{COLLECTION_NAME}' with {count} documents.")
+    if count == 0:
+        raise RuntimeError(
+            "Chroma collection is empty. Confirm collection_name matches the one used in complete_ingestion.py"
+        )
+except Exception as e:
+    print(f"Chroma sanity check failed: {e}")
+    raise
+# A slightly more forgiving retriever
+retriever = vectordb.as_retriever(
+    search_type="mmr",
+    search_kwargs={"k": 4, "fetch_k": 20},
+)
+print("Vector database ready.")
+# --- 3) LLM (Hugging Face Inference Endpoint) ---
+print("Initializing LLM via Hugging Face Endpoint...")
 llm = HuggingFaceEndpoint(
+    repo_id=os.getenv("HF_ENDPOINT_MODEL", "mistralai/Mistral-7B-Instruct-v0.2"),
+    temperature=0.15,
     max_new_tokens=512,
+    huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
 )
+print("LLM initialized.")
+# --- 4) PROMPT & RAG CHAIN ---
+RAG_PROMPT_TEMPLATE = """
+You are an expert Nigerian Legal Assistant. Provide clear, concise explanations.
+CONTEXT:
 {context}
+RULES:
+1) Explain and summarize—do not paste raw sections verbatim.
+2) Use ONLY the context above. If missing, say you don't know.
+3) Conversational tone. Plain English (or Pidgin if user chose it).
+4) At the end, list the referenced section(s)/source(s).
+QUESTION: {question}
+ANSWER:
 """
 RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
 def format_docs(docs):
+    # Keep rich info so the LLM can cite properly
+    blocks = []
+    for d in docs:
+        src = d.metadata.get("source", "Unknown Source")
+        sec = d.metadata.get("section", "Unknown Section")
+        blocks.append(f"Source: {src}\nSection: {sec}\nContent: {d.page_content}")
+    return "\n\n---\n\n".join(blocks)
 rag_chain = (
     {"context": retriever | format_docs, "question": RunnablePassthrough()}
     | RAG_PROMPT
     | llm
+    | StrOutputParser()
 )
+# --- 5) APP LOGIC ---
 def answer_question(user_input, lang_choice, history=[]):
     try:
         query = (user_input or "").strip()
         if not query:
             return history, history
+        # Chatbot uses type='messages'
+        history.append({"role": "user", "content": query})
+        if query.lower() in {"hi", "hello", "hey"}:
+            ans = (
+                "Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
+                if lang_choice == "english"
+                else "Howfa! I be your Nigerian Legal AI Assistant. How I fit help you today? No be legal advice o."
+            )
+            history.append({"role": "assistant", "content": ans})
             return history, history
         print(f"⚡ Running RAG chain for query: {query}")
         docs = retriever.invoke(query)
+        print(f"Retrieved {len(docs)} docs")
         if not docs:
+            answer = (
+                "I could not find any relevant information in the legal documents for your query."
+            )
         else:
             answer = rag_chain.invoke(query)
+        # Build references from the retrieved docs
+        refs = []
+        for d in docs[:5]:
+            src = d.metadata.get("source", "Unknown Source")
+            sec = d.metadata.get("section", "Unknown Section")
+            if src or sec:
+                refs.append(f"- {src} — {sec}")
+        if refs:
+            answer += "\n\n**References:**\n" + "\n".join(refs)
+        # Disclaimer
+        answer += (
+            "\n\n--- \n*⚠️ Disclaimer: This is AI-generated information and not legal advice. "
+            "Please consult a qualified lawyer for professional guidance.*"
+            if lang_choice == "english"
+            else "\n\n--- \n*⚠️ No be legal advice o, abeg find lawyer for proper advice.*"
+        )
+        history.append({"role": "assistant", "content": answer.strip()})
         return history, history
     except Exception as e:
         print(f"❌ Error: {e}")
+        err = "Sorry, an unexpected error occurred. Please try again."
+        history.append({"role": "assistant", "content": err})
         return history, history
 def _reset():
     return [], []
+# --- 6) GRADIO UI ---
 def build_ui():
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
         gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
+        gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, FCCPA, Data Protection, and more.")
         chatbot = gr.Chatbot(
             label="Chat History",
             height=600,
+            type="messages",
+            avatar_images=("user.png", "bot.png"),
         )
         with gr.Row():
             msg = gr.Textbox(
                 label="Your Question",
+                placeholder="e.g., 'What are my rights as a tenant?'",
                 lines=2,
                 scale=4,
             )
     return demo
 if __name__ == "__main__":
+    print("Building Gradio UI...")
     demo = build_ui()
+    print("Launching Gradio app...")
+    demo.launch()