Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

menikev commited on Aug 20, 2025

Commit

2454a06

verified ·

1 Parent(s): 76921a6

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -62

app.py CHANGED Viewed

@@ -1,29 +1,27 @@
 import os
 from pathlib import Path
 import gradio as gr
 from langchain.prompts import PromptTemplate
 from langchain_chroma import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
-# --- 1. CONFIGURATION & INITIALIZATION ---
-from dotenv import load_dotenv
 load_dotenv()
 if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
-    print(" HUGGINGFACEHUB_API_TOKEN not found in secrets. Please add it.")
-    exit()
-# --- 2. LOAD VECTOR DATABASE (Retriever) ---
-print("Loading vector database...")
 PERSIST_DIR = Path("data/processed/vector_db")
 if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
-    print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
-    raise SystemExit(1)
 embedding_model = HuggingFaceEmbeddings(
     model_name="BAAI/bge-small-en",
@@ -33,24 +31,23 @@ embedding_model = HuggingFaceEmbeddings(
 vectordb = Chroma(
     persist_directory=str(PERSIST_DIR),
     embedding_function=embedding_model,
 )
 retriever = vectordb.as_retriever(search_kwargs={"k": 4})
-print("Vector database loaded successfully.")
-# --- 3. SETUP THE LLM (via Hugging Face Endpoint) ---
-print("Initializing LLM via Hugging Face Endpoint...")
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
     temperature=0.1,
     max_new_tokens=512,
     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
-print("LLM initialized.")
-# --- 4. CREATE THE IMPROVED PROMPT TEMPLATE ---
 RAG_PROMPT_TEMPLATE = """
 You are an expert Nigerian Legal Assistant. Your primary goal is to help users understand Nigerian law by providing clear, concise, and helpful explanations.
@@ -60,43 +57,45 @@ You are an expert Nigerian Legal Assistant. Your primary goal is to help users u
 {context}
 **RULES:**
-1.  **Explain, Don't Just Quote:** Do not just copy the text from the context. You MUST synthesize, summarize, and explain the relevant laws in simple, easy-to-understand language.
-2.  **Be Conversational:** Respond in a helpful and advisory tone.
-3.  **Use Only Provided Context:** Base your answer SOLELY on the provided context. If the context does not contain the information needed to answer the question, you MUST say "The provided legal documents do not contain specific information on this topic." Do not use outside knowledge.
-4.  **Language:** Respond in the user's chosen language (English or Nigerian Pidgin).
-5.  **Citations:** At the end of your answer, always list the sources you used from the context.
 **QUESTION:** {question}
 **ANSWER:**
 """
 RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
-# --- 5. DEFINE THE RAG CHAIN ---
 def format_docs(docs):
-    return "\n\n---\n\n".join(f"Source: {d.metadata.get('source', 'Unknown')}\nSection: {d.metadata.get('section', 'Unknown')}\nContent: {d.page_content}" for d in docs)
 rag_chain = (
-    {"context": retriever | format_docs, "question": RunnablePassthrough()}
     | RAG_PROMPT
     | llm
     | StrOutputParser()
 )
-# --- 6. MAIN APPLICATION LOGIC ---
-## UPDATED to handle the new `type='messages'` format ##
 def answer_question(user_input, lang_choice, history=[]):
     try:
         query = (user_input or "").strip()
         if not query:
-            return history
-        # Append the user's message to the history in the new format
         history.append({'role': 'user', 'content': query})
         if query.lower() in ["hi", "hello", "hey"]:
             ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
                    if lang_choice == "english" else
@@ -104,40 +103,38 @@ def answer_question(user_input, lang_choice, history=[]):
             history.append({'role': 'assistant', 'content': ans})
             return history, history
-        print(f"Received query: {query}")
         docs = retriever.invoke(query)
         if not docs:
             answer = "I could not find any relevant information in the legal documents for your query. Please try rephrasing."
         else:
-            print("Invoking RAG chain...")
-            answer = rag_chain.invoke(query)
-            print("RAG chain finished.")
-        disclaimer = ("\n\n--- \n*⚠️ Disclaimer: This is AI-generated information and not legal advice. Please consult a qualified lawyer for professional guidance.*"
                       if lang_choice == "english" else
-                      "\n\n--- \n*⚠️ No be legal advice o, abeg find lawyer for proper advice.*")
         references = set()
         for doc in docs:
             source = doc.metadata.get("source", "Unknown Source")
             section = doc.metadata.get("section", "Unknown Section")
-            if source != "Unknown Source" and section != "Unknown Section":
-                references.add(f"- {source} ({section})")
         if references:
-            answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
-        answer += disclaimer
-        # Append the assistant's response to the history
         history.append({'role': 'assistant', 'content': answer.strip()})
-        # The function now only needs to return the updated history for the chatbot
         return history, history
     except Exception as e:
-        print(f"An error occurred: {e}")
         error_message = "Sorry, an unexpected error occurred. Please try again or rephrase your question."
         history.append({'role': 'assistant', 'content': error_message})
         return history, history
@@ -146,20 +143,18 @@ def _reset():
     return [], []
 # --- 7. GRADIO UI ---
 def build_ui():
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
         gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
         gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, and more. *Powered by AI.*")
-        ## UPDATED Chatbot initialization ##
         chatbot = gr.Chatbot(
             label="Chat History",
             height=600,
-            type='messages',  # Use the new messages format
             avatar_images=("user.png", "bot.png")
         )
         with gr.Row():
             msg = gr.Textbox(
                 label="Your Question",
@@ -174,21 +169,18 @@ def build_ui():
         chat_state = gr.State([])
-        # Main event handlers for submitting a question
         submit_btn.click(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
         msg.submit(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
-        ## UPDATED logic for clearing the textbox ##
-        # This now correctly uses .click() for the button and .submit() for the textbox
         submit_btn.click(lambda: "", None, msg)
         msg.submit(lambda: "", None, msg)
         clear_btn.click(_reset, None, [chatbot, chat_state])
     return demo
 if __name__ == "__main__":
-    print("Building Gradio UI...")
     demo = build_ui()
-    print("Launching Gradio app...")
-    demo.launch(debug=True)

 import os
 from pathlib import Path
 import gradio as gr
+from dotenv import load_dotenv
 from langchain.prompts import PromptTemplate
 from langchain_chroma import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 from langchain.schema.runnable import RunnablePassthrough
 from langchain.schema.output_parser import StrOutputParser
+# --- 1. CONFIGURATION ---
 load_dotenv()
 if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
+    print("❌ HUGGINGFACEHUB_API_TOKEN not found in secrets. Please add it.")
+    exit(1)
+# --- 2. LOAD VECTOR DATABASE ---
+print("📂 Loading vector database...")
 PERSIST_DIR = Path("data/processed/vector_db")
 if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
+    raise SystemExit("⚠️ Vector DB not found. Run complete_ingestion.py first.")
 embedding_model = HuggingFaceEmbeddings(
     model_name="BAAI/bge-small-en",
 vectordb = Chroma(
     persist_directory=str(PERSIST_DIR),
     embedding_function=embedding_model,
+    collection_name="legal_documents"  # 🔑 must match ingestion step
 )
 retriever = vectordb.as_retriever(search_kwargs={"k": 4})
+print("✅ Vector database loaded.")
+# --- 3. SETUP LLM ---
+print("🚀 Initializing LLM via Hugging Face Endpoint...")
 llm = HuggingFaceEndpoint(
     repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
     temperature=0.1,
     max_new_tokens=512,
     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
+print("✅ LLM initialized.")
+# --- 4. PROMPT TEMPLATE ---
 RAG_PROMPT_TEMPLATE = """
 You are an expert Nigerian Legal Assistant. Your primary goal is to help users understand Nigerian law by providing clear, concise, and helpful explanations.
 {context}
 **RULES:**
+1.  Explain, don't just quote. Summarize and explain the relevant laws in simple language.
+2.  Be conversational and clear.
+3.  Use ONLY the provided context. If it’s missing, say:
+    "The provided legal documents do not contain specific information on this topic."
+4.  Language: Respond in the user's chosen language (English or Nigerian Pidgin).
+5.  Always list sources from the context at the end.
 **QUESTION:** {question}
 **ANSWER:**
 """
 RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
+# --- 5. RAG CHAIN ---
 def format_docs(docs):
+    return "\n\n---\n\n".join(
+        f"Source: {d.metadata.get('source', 'Unknown')}\n"
+        f"Section: {d.metadata.get('section', 'Unknown')}\n"
+        f"Content: {d.page_content}"
+        for d in docs
+    )
 rag_chain = (
+    {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
     | RAG_PROMPT
     | llm
     | StrOutputParser()
 )
+# --- 6. MAIN LOGIC ---
 def answer_question(user_input, lang_choice, history=[]):
     try:
         query = (user_input or "").strip()
         if not query:
+            return history, history
         history.append({'role': 'user', 'content': query})
+        # Greetings
         if query.lower() in ["hi", "hello", "hey"]:
             ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
                    if lang_choice == "english" else
             history.append({'role': 'assistant', 'content': ans})
             return history, history
+        print(f"🔎 Received query: {query}")
+        # Retrieve docs
         docs = retriever.invoke(query)
         if not docs:
             answer = "I could not find any relevant information in the legal documents for your query. Please try rephrasing."
         else:
+            print("⚡ Running RAG chain...")
+            context = format_docs(docs)  # use retrieved docs
+            answer = rag_chain.invoke({"question": query, "context": context})
+            print("✅ RAG chain finished.")
+        # Add disclaimer
+        disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. Please consult a qualified lawyer."
                       if lang_choice == "english" else
+                      "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
+        answer += disclaimer
+        # Add references
         references = set()
         for doc in docs:
             source = doc.metadata.get("source", "Unknown Source")
             section = doc.metadata.get("section", "Unknown Section")
+            references.add(f"- {source} ({section})")
         if references:
+            answer += "\n\n**References:**\n" + "\n".join(sorted(references))
         history.append({'role': 'assistant', 'content': answer.strip()})
         return history, history
     except Exception as e:
+        print(f"❌ Error: {e}")
         error_message = "Sorry, an unexpected error occurred. Please try again or rephrase your question."
         history.append({'role': 'assistant', 'content': error_message})
         return history, history
     return [], []
 # --- 7. GRADIO UI ---
 def build_ui():
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
         gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
         gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, and more. *Powered by AI.*")
         chatbot = gr.Chatbot(
             label="Chat History",
             height=600,
+            type='messages',
             avatar_images=("user.png", "bot.png")
         )
         with gr.Row():
             msg = gr.Textbox(
                 label="Your Question",
         chat_state = gr.State([])
         submit_btn.click(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
         msg.submit(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
         submit_btn.click(lambda: "", None, msg)
         msg.submit(lambda: "", None, msg)
         clear_btn.click(_reset, None, [chatbot, chat_state])
     return demo
 if __name__ == "__main__":
+    print("🌍 Building Gradio UI...")
     demo = build_ui()
+    print("🚀 Launching Gradio app...")
+    demo.launch(debug=True)