Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,105 +1,144 @@
|
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
-
|
| 4 |
-
from retriever import get_retriever
|
| 5 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
|
|
|
| 6 |
from langchain_community.llms import HuggingFacePipeline
|
|
|
|
|
|
|
|
|
|
| 7 |
from transformers import pipeline
|
| 8 |
-
from dotenv import load_dotenv
|
| 9 |
|
| 10 |
-
load_dotenv()
|
| 11 |
|
| 12 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
retriever = get_retriever()
|
| 14 |
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
| 16 |
pipe = pipeline(
|
| 17 |
-
"
|
| 18 |
-
model="google/flan-t5-base", #
|
| 19 |
max_new_tokens=512,
|
| 20 |
-
temperature=0.
|
| 21 |
-
device=-1 # CPU
|
| 22 |
)
|
| 23 |
-
|
| 24 |
llm = HuggingFacePipeline(pipeline=pipe)
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
- Nigerian Constitution 1999
|
| 30 |
-
- Labour Act and Employment Laws
|
| 31 |
-
- Nigeria Data Protection Act
|
| 32 |
- Federal Competition and Consumer Protection Act (FCCPA)
|
| 33 |
|
| 34 |
PERSONALITY: Professional but approachable, uses Nigerian legal terminology, understands local context.
|
| 35 |
|
| 36 |
RESPONSE STYLE:
|
| 37 |
-
- Start with
|
| 38 |
- Quote specific sections/articles when available
|
| 39 |
- Explain in simple terms what the law means
|
| 40 |
-
- Always include
|
| 41 |
- Use Nigerian English expressions naturally (but not forced)
|
| 42 |
|
| 43 |
-
CONVERSATION MEMORY: Remember previous
|
| 44 |
"""
|
| 45 |
|
| 46 |
-
|
| 47 |
-
You
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
PERSONALITY: Friendly
|
| 50 |
|
| 51 |
RESPONSE STYLE:
|
| 52 |
-
- Start with
|
| 53 |
-
-
|
| 54 |
-
-
|
| 55 |
-
-
|
| 56 |
-
-
|
| 57 |
-
|
| 58 |
-
CONVERSATION MEMORY: Remember wetin user don ask before so your answer go make sense with the gist.
|
| 59 |
"""
|
| 60 |
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
| 63 |
llm=llm,
|
| 64 |
retriever=retriever,
|
| 65 |
-
|
|
|
|
| 66 |
)
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
answer = result["answer"]
|
| 74 |
|
| 75 |
-
# Collect sources with
|
| 76 |
sources = []
|
| 77 |
for doc in result["source_documents"]:
|
| 78 |
section = doc.metadata.get("section", "Unknown Section")
|
| 79 |
-
|
| 80 |
-
sources.append(f"[{section}] {
|
| 81 |
-
|
| 82 |
if sources:
|
| 83 |
answer += "\n\n📚 Sources:\n" + "\n".join(sources)
|
| 84 |
-
|
| 85 |
-
return answer
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
-
|
| 91 |
-
gr.Markdown("# 📜 KnowYourRight Bot\n### Ask Nigerian law questions in English or Pidgin")
|
| 92 |
-
lang_choice = gr.Radio(["english", "pidgin"], label="Language Mode", value="english")
|
| 93 |
-
chatbot = gr.Chatbot(label="Conversation", height=500, show_label=False)
|
| 94 |
-
msg = gr.Textbox(label="Your Question", placeholder="Type your question here...")
|
| 95 |
-
clear = gr.Button("Clear Chat")
|
| 96 |
|
| 97 |
-
def
|
| 98 |
-
|
| 99 |
-
history = history + [(user_message, bot_reply)]
|
| 100 |
-
return history, ""
|
| 101 |
|
| 102 |
-
msg.submit(
|
| 103 |
-
clear.click(
|
| 104 |
|
| 105 |
-
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
import os
|
| 3 |
+
from pathlib import Path
|
| 4 |
import gradio as gr
|
| 5 |
+
|
|
|
|
| 6 |
from langchain.chains import ConversationalRetrievalChain
|
| 7 |
+
from langchain.memory import ConversationBufferMemory
|
| 8 |
+
from langchain.prompts import PromptTemplate
|
| 9 |
from langchain_community.llms import HuggingFacePipeline
|
| 10 |
+
from langchain_chroma import Chroma
|
| 11 |
+
from retriever import get_retriever
|
| 12 |
+
|
| 13 |
from transformers import pipeline
|
|
|
|
| 14 |
|
|
|
|
| 15 |
|
| 16 |
+
# =====================================================
|
| 17 |
+
# Auto-build DB if missing
|
| 18 |
+
# =====================================================
|
| 19 |
+
if not Path("vector_db").exists() or not any(Path("vector_db").iterdir()):
|
| 20 |
+
print("⚠️ Vector DB not found. Running ingestion...")
|
| 21 |
+
os.system("python src/ingest_documents.py")
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# =====================================================
|
| 25 |
+
# Load retriever
|
| 26 |
+
# =====================================================
|
| 27 |
retriever = get_retriever()
|
| 28 |
|
| 29 |
+
|
| 30 |
+
# =====================================================
|
| 31 |
+
# LLM Setup (lighter model for CPU Spaces)
|
| 32 |
+
# =====================================================
|
| 33 |
pipe = pipeline(
|
| 34 |
+
"text-generation",
|
| 35 |
+
model="google/flan-t5-base", # ✅ smaller + CPU friendly
|
| 36 |
max_new_tokens=512,
|
| 37 |
+
temperature=0.3
|
|
|
|
| 38 |
)
|
|
|
|
| 39 |
llm = HuggingFacePipeline(pipeline=pipe)
|
| 40 |
|
| 41 |
+
|
| 42 |
+
# =====================================================
|
| 43 |
+
# Prompts
|
| 44 |
+
# =====================================================
|
| 45 |
+
english_system_prompt = """
|
| 46 |
+
You are a Nigerian Legal AI Assistant specialized in Nigerian law. You have deep knowledge of:
|
| 47 |
- Nigerian Constitution 1999
|
| 48 |
+
- Labour Act and Employment Laws
|
| 49 |
+
- Nigeria Data Protection Act
|
| 50 |
- Federal Competition and Consumer Protection Act (FCCPA)
|
| 51 |
|
| 52 |
PERSONALITY: Professional but approachable, uses Nigerian legal terminology, understands local context.
|
| 53 |
|
| 54 |
RESPONSE STYLE:
|
| 55 |
+
- Start with direct answer to the question
|
| 56 |
- Quote specific sections/articles when available
|
| 57 |
- Explain in simple terms what the law means
|
| 58 |
+
- Always include disclaimer: "⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
|
| 59 |
- Use Nigerian English expressions naturally (but not forced)
|
| 60 |
|
| 61 |
+
CONVERSATION MEMORY: Remember previous questions in this chat to provide contextual follow-ups.
|
| 62 |
"""
|
| 63 |
|
| 64 |
+
pidgin_system_prompt = """
|
| 65 |
+
You be Nigerian Legal AI Assistant wey sabi Nigerian law well well. You get knowledge of:
|
| 66 |
+
- Nigerian Constitution 1999
|
| 67 |
+
- Labour Act and Employment Laws
|
| 68 |
+
- Nigeria Data Protection Act
|
| 69 |
+
- Federal Competition and Consumer Protection Act (FCCPA)
|
| 70 |
|
| 71 |
+
PERSONALITY: Friendly, approachable, dey use Naija way of talk but still correct for legal matter.
|
| 72 |
|
| 73 |
RESPONSE STYLE:
|
| 74 |
+
- Start with direct answer
|
| 75 |
+
- Mention the exact section/article if available
|
| 76 |
+
- Explain am for clear Pidgin wey anybody fit understand
|
| 77 |
+
- Always add disclaimer: "⚠️ No be legal advice o, abeg meet lawyer if matter serious."
|
| 78 |
+
- Remember wetin dem don ask before, make conversation flow well.
|
|
|
|
|
|
|
| 79 |
"""
|
| 80 |
|
| 81 |
+
|
| 82 |
+
# =====================================================
|
| 83 |
+
# Conversational QA Chain
|
| 84 |
+
# =====================================================
|
| 85 |
+
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
| 86 |
+
|
| 87 |
qa_chain = ConversationalRetrievalChain.from_llm(
|
| 88 |
llm=llm,
|
| 89 |
retriever=retriever,
|
| 90 |
+
memory=memory,
|
| 91 |
+
return_source_documents=True,
|
| 92 |
)
|
| 93 |
|
| 94 |
+
|
| 95 |
+
# =====================================================
|
| 96 |
+
# Chat function
|
| 97 |
+
# =====================================================
|
| 98 |
+
def answer_question(user_input, lang_choice, history=[]):
|
| 99 |
+
# Pick system prompt
|
| 100 |
+
if lang_choice == "pidgin":
|
| 101 |
+
system_prompt = pidgin_system_prompt
|
| 102 |
+
else:
|
| 103 |
+
system_prompt = english_system_prompt
|
| 104 |
+
|
| 105 |
+
# Run QA
|
| 106 |
+
result = qa_chain.invoke({"question": f"{system_prompt}\n\nUser: {user_input}"})
|
| 107 |
answer = result["answer"]
|
| 108 |
|
| 109 |
+
# Collect sources (with sections)
|
| 110 |
sources = []
|
| 111 |
for doc in result["source_documents"]:
|
| 112 |
section = doc.metadata.get("section", "Unknown Section")
|
| 113 |
+
source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
|
| 114 |
+
sources.append(f"[{section}] from {source}")
|
| 115 |
+
|
| 116 |
if sources:
|
| 117 |
answer += "\n\n📚 Sources:\n" + "\n".join(sources)
|
|
|
|
|
|
|
| 118 |
|
| 119 |
+
history.append(("You: " + user_input, "Bot: " + answer))
|
| 120 |
+
return history, history
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
# =====================================================
|
| 124 |
+
# Gradio Interface (ChatGPT-like UI)
|
| 125 |
+
# =====================================================
|
| 126 |
+
with gr.Blocks(css=".gradio-container {max-width: 800px !important}") as demo:
|
| 127 |
+
gr.Markdown("## 📜 KnowYourRight Bot — Nigerian Legal Assistant")
|
| 128 |
+
|
| 129 |
+
with gr.Row():
|
| 130 |
+
with gr.Column(scale=4):
|
| 131 |
+
chatbot = gr.Chatbot(label="Chat with Legal AI", height=500)
|
| 132 |
+
msg = gr.Textbox(label="Ask your question here...")
|
| 133 |
+
lang_choice = gr.Radio(["english", "pidgin"], value="english", label="Language")
|
| 134 |
+
clear = gr.Button("Clear Chat")
|
| 135 |
|
| 136 |
+
state = gr.State([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
+
def reset():
|
| 139 |
+
return [], []
|
|
|
|
|
|
|
| 140 |
|
| 141 |
+
msg.submit(answer_question, [msg, lang_choice, state], [chatbot, state])
|
| 142 |
+
clear.click(reset, None, [chatbot, state])
|
| 143 |
|
| 144 |
+
demo.launch()
|