Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

menikev commited on Aug 20, 2025

Commit

ccec758

verified ·

1 Parent(s): 685d9a5

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -35

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from langchain.prompts import PromptTemplate
 from langchain_chroma import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 from langchain.schema.runnable import RunnablePassthrough
-from langchain.schema.output_parser import StrOutputParser
 # --- 1. CONFIGURATION ---
 load_dotenv()
@@ -21,7 +20,8 @@ print("📂 Loading vector database...")
 PERSIST_DIR = Path("data/processed/vector_db")
 if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
-    raise SystemExit("⚠️ Vector DB not found. Run complete_ingestion.py first.")
 embedding_model = HuggingFaceEmbeddings(
     model_name="BAAI/bge-small-en",
@@ -31,26 +31,25 @@ embedding_model = HuggingFaceEmbeddings(
 vectordb = Chroma(
     persist_directory=str(PERSIST_DIR),
     embedding_function=embedding_model,
-    collection_name="legal_documents"  # 🔑 must match ingestion step
 )
 retriever = vectordb.as_retriever(search_kwargs={"k": 4})
 print("✅ Vector database loaded.")
 # --- 3. SETUP LLM ---
-print("🚀 Initializing LLM via Hugging Face Endpoint...")
 llm = HuggingFaceEndpoint(
-    repo_id="mistralai/Mistral-7B-Instruct-v0.2",  # ✅ text-generation supported
-    task="text-generation",                        # explicitly set task
     temperature=0.1,
     max_new_tokens=512,
     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
-print("✅ LLM initialized.")
 # --- 4. PROMPT TEMPLATE ---
 RAG_PROMPT_TEMPLATE = """
-You are an expert Nigerian Legal Assistant. Your primary goal is to help users understand Nigerian law by providing clear, concise, and helpful explanations.
 **TASK:** Analyze the provided legal context below to answer the user's question.
@@ -58,12 +57,12 @@ You are an expert Nigerian Legal Assistant. Your primary goal is to help users u
 {context}
 **RULES:**
-1.  Explain, don't just quote. Summarize and explain the relevant laws in simple language.
-2.  Be conversational and clear.
-3.  Use ONLY the provided context. If it’s missing, say:
-    "The provided legal documents do not contain specific information on this topic."
-4.  Language: Respond in the user's chosen language (English or Nigerian Pidgin).
-5.  Always list sources from the context at the end.
 **QUESTION:** {question}
@@ -80,11 +79,21 @@ def format_docs(docs):
         for d in docs
     )
 rag_chain = (
-    {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
     | RAG_PROMPT
     | llm
-    | StrOutputParser()
 )
 # --- 6. MAIN LOGIC ---
@@ -96,7 +105,6 @@ def answer_question(user_input, lang_choice, history=[]):
         history.append({'role': 'user', 'content': query})
-        # Greetings
         if query.lower() in ["hi", "hello", "hey"]:
             ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
                    if lang_choice == "english" else
@@ -104,46 +112,43 @@ def answer_question(user_input, lang_choice, history=[]):
             history.append({'role': 'assistant', 'content': ans})
             return history, history
-        print(f"🔎 Received query: {query}")
-        # Retrieve docs
         docs = retriever.invoke(query)
         if not docs:
-            answer = "I could not find any relevant information in the legal documents for your query. Please try rephrasing."
         else:
-            print("⚡ Running RAG chain...")
-            context = format_docs(docs)  # use retrieved docs
-            answer = rag_chain.invoke({"question": query, "context": context})
-            print("✅ RAG chain finished.")
-        # Add disclaimer
-        disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. Please consult a qualified lawyer."
                       if lang_choice == "english" else
                       "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
-        answer += disclaimer
-        # Add references
         references = set()
         for doc in docs:
             source = doc.metadata.get("source", "Unknown Source")
             section = doc.metadata.get("section", "Unknown Section")
-            references.add(f"- {source} ({section})")
         if references:
-            answer += "\n\n**References:**\n" + "\n".join(sorted(references))
         history.append({'role': 'assistant', 'content': answer.strip()})
         return history, history
     except Exception as e:
         print(f"❌ Error: {e}")
-        error_message = "Sorry, an unexpected error occurred. Please try again or rephrase your question."
         history.append({'role': 'assistant', 'content': error_message})
         return history, history
 def _reset():
     return [], []
-# --- 7. GRADIO UI ---
 def build_ui():
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
         gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
@@ -181,7 +186,6 @@ def build_ui():
     return demo
 if __name__ == "__main__":
-    print("🌍 Building Gradio UI...")
-    demo = build_ui()
     print("🚀 Launching Gradio app...")
     demo.launch(debug=True)

 from langchain_chroma import Chroma
 from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 from langchain.schema.runnable import RunnablePassthrough
 # --- 1. CONFIGURATION ---
 load_dotenv()
 PERSIST_DIR = Path("data/processed/vector_db")
 if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
+    print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
+    raise SystemExit(1)
 embedding_model = HuggingFaceEmbeddings(
     model_name="BAAI/bge-small-en",
 vectordb = Chroma(
     persist_directory=str(PERSIST_DIR),
     embedding_function=embedding_model,
 )
 retriever = vectordb.as_retriever(search_kwargs={"k": 4})
 print("✅ Vector database loaded.")
 # --- 3. SETUP LLM ---
+print("🤖 Initializing LLM...")
 llm = HuggingFaceEndpoint(
+    repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",  # or mistral-7b-instruct
+    task="conversational",   # ✅ must match HF endpoint type
     temperature=0.1,
     max_new_tokens=512,
     huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
+print("✅ LLM ready.")
 # --- 4. PROMPT TEMPLATE ---
 RAG_PROMPT_TEMPLATE = """
+You are an expert Nigerian Legal Assistant. Your goal is to help users understand Nigerian law by providing clear, concise explanations.
 **TASK:** Analyze the provided legal context below to answer the user's question.
 {context}
 **RULES:**
+1. Do not just copy the text. Summarize and explain in simple language.
+2. Be conversational and helpful.
+3. Base your answer ONLY on the provided context. If not found, say:
+   "The provided legal documents do not contain specific information on this topic."
+4. Respond in the user's chosen language (English or Pidgin).
+5. At the end, cite the referenced sources.
 **QUESTION:** {question}
         for d in docs
     )
+def extract_text_from_conversational(response):
+    """Normalize HF conversational outputs to plain text."""
+    if isinstance(response, dict) and "generated_text" in response:
+        return response["generated_text"]
+    elif isinstance(response, str):
+        return response
+    elif isinstance(response, list):
+        return response[0].get("generated_text", str(response))
+    return str(response)
 rag_chain = (
+    {"context": retriever | format_docs, "question": RunnablePassthrough()}
     | RAG_PROMPT
     | llm
+    | extract_text_from_conversational
 )
 # --- 6. MAIN LOGIC ---
         history.append({'role': 'user', 'content': query})
         if query.lower() in ["hi", "hello", "hey"]:
             ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
                    if lang_choice == "english" else
             history.append({'role': 'assistant', 'content': ans})
             return history, history
+        print(f"⚡ Running RAG chain for query: {query}")
         docs = retriever.invoke(query)
         if not docs:
+            answer = "I could not find any relevant information in the legal documents for your query."
         else:
+            answer = rag_chain.invoke(query)
+        print("✅ RAG chain finished.")
+        disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. "
+                      "Please consult a qualified lawyer."
                       if lang_choice == "english" else
                       "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
         references = set()
         for doc in docs:
             source = doc.metadata.get("source", "Unknown Source")
             section = doc.metadata.get("section", "Unknown Section")
+            if source and section:
+                references.add(f"- {source} ({section})")
         if references:
+            answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
+        answer += disclaimer
         history.append({'role': 'assistant', 'content': answer.strip()})
         return history, history
     except Exception as e:
         print(f"❌ Error: {e}")
+        error_message = "Sorry, an unexpected error occurred. Please try again."
         history.append({'role': 'assistant', 'content': error_message})
         return history, history
 def _reset():
     return [], []
+# --- 7. UI ---
 def build_ui():
     with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
         gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
     return demo
 if __name__ == "__main__":
     print("🚀 Launching Gradio app...")
+    demo = build_ui()
     demo.launch(debug=True)