Spaces:
Sleeping
Sleeping
| import os | |
| from pathlib import Path | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| from langchain.prompts import PromptTemplate | |
| from langchain_community.vectorstores import Chroma # <-- match ingestion | |
| from langchain_huggingface import ( | |
| HuggingFaceEmbeddings, | |
| HuggingFaceEndpoint, | |
| ) | |
| from langchain.schema.runnable import RunnablePassthrough | |
| from langchain.schema.output_parser import StrOutputParser | |
| # --- 1) CONFIG / SAFETY --- | |
| if not os.getenv("HUGGINGFACEHUB_API_TOKEN"): | |
| print("HUGGINGFACEHUB_API_TOKEN not found. Add it to your Space secrets.") | |
| raise SystemExit(1) | |
| PERSIST_DIR = Path("data/processed/vector_db") | |
| COLLECTION_NAME = "legal_documents" # <-- MUST match complete_ingestion.py | |
| if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()): | |
| print("⚠️ Vector DB not found. Run complete_ingestion.py first.") | |
| raise SystemExit(1) | |
| # --- 2) LOAD VECTOR DB / RETRIEVER --- | |
| print("Loading vector database...") | |
| embedding_model = HuggingFaceEmbeddings( | |
| model_name="BAAI/bge-small-en", | |
| model_kwargs={"device": "cpu"}, | |
| ) | |
| vectordb = Chroma( | |
| persist_directory=str(PERSIST_DIR), | |
| embedding_function=embedding_model, | |
| collection_name=COLLECTION_NAME, # <-- critical: open the right collection | |
| ) | |
| # Quick sanity check (helps spot empty/wrong collection immediately) | |
| try: | |
| count = vectordb._collection.count() | |
| print(f"✅ Loaded Chroma collection '{COLLECTION_NAME}' with {count} documents.") | |
| if count == 0: | |
| raise RuntimeError( | |
| "Chroma collection is empty. Confirm collection_name matches the one used in complete_ingestion.py" | |
| ) | |
| except Exception as e: | |
| print(f"Chroma sanity check failed: {e}") | |
| raise | |
| # A slightly more forgiving retriever | |
| retriever = vectordb.as_retriever( | |
| search_type="mmr", | |
| search_kwargs={"k": 4, "fetch_k": 20}, | |
| ) | |
| print("Vector database ready.") | |
| # --- 3) LLM (Hugging Face Inference Endpoint) --- | |
| print("Initializing LLM via Hugging Face Endpoint...") | |
| llm = HuggingFaceEndpoint( | |
| repo_id=os.getenv("HF_ENDPOINT_MODEL", "mistralai/Mistral-7B-Instruct-v0.2"), | |
| temperature=0.15, | |
| max_new_tokens=512, | |
| huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"), | |
| ) | |
| print("LLM initialized.") | |
| # --- 4) PROMPT & RAG CHAIN --- | |
| RAG_PROMPT_TEMPLATE = """ | |
| You are an expert Nigerian Legal Assistant. Provide clear, concise explanations. | |
| CONTEXT: | |
| {context} | |
| RULES: | |
| 1) Explain and summarize—do not paste raw sections verbatim. | |
| 2) Use ONLY the context above. If missing, say you don't know. | |
| 3) Conversational tone. Plain English (or Pidgin if user chose it). | |
| 4) At the end, list the referenced section(s)/source(s). | |
| QUESTION: {question} | |
| ANSWER: | |
| """ | |
| RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE) | |
| def format_docs(docs): | |
| # Keep rich info so the LLM can cite properly | |
| blocks = [] | |
| for d in docs: | |
| src = d.metadata.get("source", "Unknown Source") | |
| sec = d.metadata.get("section", "Unknown Section") | |
| blocks.append(f"Source: {src}\nSection: {sec}\nContent: {d.page_content}") | |
| return "\n\n---\n\n".join(blocks) | |
| rag_chain = ( | |
| {"context": retriever | format_docs, "question": RunnablePassthrough()} | |
| | RAG_PROMPT | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| # --- 5) APP LOGIC --- | |
| def answer_question(user_input, lang_choice, history=[]): | |
| try: | |
| query = (user_input or "").strip() | |
| if not query: | |
| return history, history | |
| # Chatbot uses type='messages' | |
| history.append({"role": "user", "content": query}) | |
| if query.lower() in {"hi", "hello", "hey"}: | |
| ans = ( | |
| "Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?" | |
| if lang_choice == "english" | |
| else "Howfa! I be your Nigerian Legal AI Assistant. How I fit help you today? No be legal advice o." | |
| ) | |
| history.append({"role": "assistant", "content": ans}) | |
| return history, history | |
| print(f"⚡ Running RAG chain for query: {query}") | |
| docs = retriever.invoke(query) | |
| print(f"Retrieved {len(docs)} docs") | |
| if not docs: | |
| answer = ( | |
| "I could not find any relevant information in the legal documents for your query." | |
| ) | |
| else: | |
| answer = rag_chain.invoke(query) | |
| # Build references from the retrieved docs | |
| refs = [] | |
| for d in docs[:5]: | |
| src = d.metadata.get("source", "Unknown Source") | |
| sec = d.metadata.get("section", "Unknown Section") | |
| if src or sec: | |
| refs.append(f"- {src} — {sec}") | |
| if refs: | |
| answer += "\n\n**References:**\n" + "\n".join(refs) | |
| # Disclaimer | |
| answer += ( | |
| "\n\n--- \n*⚠️ Disclaimer: This is AI-generated information and not legal advice. " | |
| "Please consult a qualified lawyer for professional guidance.*" | |
| if lang_choice == "english" | |
| else "\n\n--- \n*⚠️ No be legal advice o, abeg find lawyer for proper advice.*" | |
| ) | |
| history.append({"role": "assistant", "content": answer.strip()}) | |
| return history, history | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| err = "Sorry, an unexpected error occurred. Please try again." | |
| history.append({"role": "assistant", "content": err}) | |
| return history, history | |
| def _reset(): | |
| return [], [] | |
| # --- 6) GRADIO UI --- | |
| def build_ui(): | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo: | |
| gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant") | |
| gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, FCCPA, Data Protection, and more.") | |
| chatbot = gr.Chatbot( | |
| label="Chat History", | |
| height=600, | |
| type="messages", | |
| avatar_images=("user.png", "bot.png"), | |
| ) | |
| with gr.Row(): | |
| msg = gr.Textbox( | |
| label="Your Question", | |
| placeholder="e.g., 'What are my rights as a tenant?'", | |
| lines=2, | |
| scale=4, | |
| ) | |
| submit_btn = gr.Button("▶️ Send", variant="primary", scale=1) | |
| lang_choice = gr.Radio(["english", "pidgin"], value="english", label="Response Language") | |
| clear_btn = gr.Button("🗑️ Clear Chat") | |
| chat_state = gr.State([]) | |
| submit_btn.click(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state]) | |
| msg.submit(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state]) | |
| submit_btn.click(lambda: "", None, msg) | |
| msg.submit(lambda: "", None, msg) | |
| clear_btn.click(_reset, None, [chatbot, chat_state]) | |
| return demo | |
| if __name__ == "__main__": | |
| print("Building Gradio UI...") | |
| demo = build_ui() | |
| print("Launching Gradio app...") | |
| demo.launch() | |