Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

KnowYourRIght-Bot / app.py

menikev

Update app.py

ec071b2 verified 5 months ago

raw

history blame contribute delete

6.97 kB

	import os
	from pathlib import Path
	import gradio as gr

	from dotenv import load_dotenv
	load_dotenv()

	from langchain.prompts import PromptTemplate
	from langchain_community.vectorstores import Chroma # <-- match ingestion
	from langchain_huggingface import (
	HuggingFaceEmbeddings,
	HuggingFaceEndpoint,
	)
	from langchain.schema.runnable import RunnablePassthrough
	from langchain.schema.output_parser import StrOutputParser

	# --- 1) CONFIG / SAFETY ---

	if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
	print("HUGGINGFACEHUB_API_TOKEN not found. Add it to your Space secrets.")
	raise SystemExit(1)

	PERSIST_DIR = Path("data/processed/vector_db")
	COLLECTION_NAME = "legal_documents" # <-- MUST match complete_ingestion.py

	if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
	print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
	raise SystemExit(1)

	# --- 2) LOAD VECTOR DB / RETRIEVER ---

	print("Loading vector database...")
	embedding_model = HuggingFaceEmbeddings(
	model_name="BAAI/bge-small-en",
	model_kwargs={"device": "cpu"},
	)

	vectordb = Chroma(
	persist_directory=str(PERSIST_DIR),
	embedding_function=embedding_model,
	collection_name=COLLECTION_NAME, # <-- critical: open the right collection
	)

	# Quick sanity check (helps spot empty/wrong collection immediately)
	try:
	count = vectordb._collection.count()
	print(f"✅ Loaded Chroma collection '{COLLECTION_NAME}' with {count} documents.")
	if count == 0:
	raise RuntimeError(
	"Chroma collection is empty. Confirm collection_name matches the one used in complete_ingestion.py"
	)
	except Exception as e:
	print(f"Chroma sanity check failed: {e}")
	raise

	# A slightly more forgiving retriever
	retriever = vectordb.as_retriever(
	search_type="mmr",
	search_kwargs={"k": 4, "fetch_k": 20},
	)
	print("Vector database ready.")

	# --- 3) LLM (Hugging Face Inference Endpoint) ---

	print("Initializing LLM via Hugging Face Endpoint...")
	llm = HuggingFaceEndpoint(
	repo_id=os.getenv("HF_ENDPOINT_MODEL", "mistralai/Mistral-7B-Instruct-v0.2"),
	temperature=0.15,
	max_new_tokens=512,
	huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
	)
	print("LLM initialized.")

	# --- 4) PROMPT & RAG CHAIN ---

	RAG_PROMPT_TEMPLATE = """
	You are an expert Nigerian Legal Assistant. Provide clear, concise explanations.

	CONTEXT:
	{context}

	RULES:
	1) Explain and summarize—do not paste raw sections verbatim.
	2) Use ONLY the context above. If missing, say you don't know.
	3) Conversational tone. Plain English (or Pidgin if user chose it).
	4) At the end, list the referenced section(s)/source(s).

	QUESTION: {question}

	ANSWER:
	"""

	RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)

	def format_docs(docs):
	# Keep rich info so the LLM can cite properly
	blocks = []
	for d in docs:
	src = d.metadata.get("source", "Unknown Source")
	sec = d.metadata.get("section", "Unknown Section")
	blocks.append(f"Source: {src}\nSection: {sec}\nContent: {d.page_content}")
	return "\n\n---\n\n".join(blocks)

	rag_chain = (
	{"context": retriever \| format_docs, "question": RunnablePassthrough()}
	\| RAG_PROMPT
	\| llm
	\| StrOutputParser()
	)

	# --- 5) APP LOGIC ---

	def answer_question(user_input, lang_choice, history=[]):
	try:
	query = (user_input or "").strip()
	if not query:
	return history, history

	# Chatbot uses type='messages'
	history.append({"role": "user", "content": query})

	if query.lower() in {"hi", "hello", "hey"}:
	ans = (
	"Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
	if lang_choice == "english"
	else "Howfa! I be your Nigerian Legal AI Assistant. How I fit help you today? No be legal advice o."
	)
	history.append({"role": "assistant", "content": ans})
	return history, history

	print(f"⚡ Running RAG chain for query: {query}")
	docs = retriever.invoke(query)
	print(f"Retrieved {len(docs)} docs")

	if not docs:
	answer = (
	"I could not find any relevant information in the legal documents for your query."
	)
	else:
	answer = rag_chain.invoke(query)

	# Build references from the retrieved docs
	refs = []
	for d in docs[:5]:
	src = d.metadata.get("source", "Unknown Source")
	sec = d.metadata.get("section", "Unknown Section")
	if src or sec:
	refs.append(f"- {src} — {sec}")

	if refs:
	answer += "\n\nReferences:\n" + "\n".join(refs)

	# Disclaimer
	answer += (
	"\n\n--- \n*⚠️ Disclaimer: This is AI-generated information and not legal advice. "
	"Please consult a qualified lawyer for professional guidance.*"
	if lang_choice == "english"
	else "\n\n--- \n⚠️ No be legal advice o, abeg find lawyer for proper advice."
	)

	history.append({"role": "assistant", "content": answer.strip()})
	return history, history

	except Exception as e:
	print(f"❌ Error: {e}")
	err = "Sorry, an unexpected error occurred. Please try again."
	history.append({"role": "assistant", "content": err})
	return history, history

	def _reset():
	return [], []

	# --- 6) GRADIO UI ---

	def build_ui():
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
	gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
	gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, FCCPA, Data Protection, and more.")

	chatbot = gr.Chatbot(
	label="Chat History",
	height=600,
	type="messages",
	avatar_images=("user.png", "bot.png"),
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Question",
	placeholder="e.g., 'What are my rights as a tenant?'",
	lines=2,
	scale=4,
	)
	submit_btn = gr.Button("▶️ Send", variant="primary", scale=1)

	lang_choice = gr.Radio(["english", "pidgin"], value="english", label="Response Language")
	clear_btn = gr.Button("🗑️ Clear Chat")

	chat_state = gr.State([])

	submit_btn.click(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
	msg.submit(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])

	submit_btn.click(lambda: "", None, msg)
	msg.submit(lambda: "", None, msg)

	clear_btn.click(_reset, None, [chatbot, chat_state])

	return demo

	if __name__ == "__main__":
	print("Building Gradio UI...")
	demo = build_ui()
	print("Launching Gradio app...")
	demo.launch()