Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,6 @@ from langchain.prompts import PromptTemplate
|
|
| 7 |
from langchain_chroma import Chroma
|
| 8 |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
|
| 9 |
from langchain.schema.runnable import RunnablePassthrough
|
| 10 |
-
from langchain.schema.output_parser import StrOutputParser
|
| 11 |
|
| 12 |
# --- 1. CONFIGURATION ---
|
| 13 |
load_dotenv()
|
|
@@ -21,7 +20,8 @@ print("π Loading vector database...")
|
|
| 21 |
PERSIST_DIR = Path("data/processed/vector_db")
|
| 22 |
|
| 23 |
if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
|
| 24 |
-
|
|
|
|
| 25 |
|
| 26 |
embedding_model = HuggingFaceEmbeddings(
|
| 27 |
model_name="BAAI/bge-small-en",
|
|
@@ -31,26 +31,25 @@ embedding_model = HuggingFaceEmbeddings(
|
|
| 31 |
vectordb = Chroma(
|
| 32 |
persist_directory=str(PERSIST_DIR),
|
| 33 |
embedding_function=embedding_model,
|
| 34 |
-
collection_name="legal_documents" # π must match ingestion step
|
| 35 |
)
|
| 36 |
|
| 37 |
retriever = vectordb.as_retriever(search_kwargs={"k": 4})
|
| 38 |
print("β
Vector database loaded.")
|
| 39 |
|
| 40 |
# --- 3. SETUP LLM ---
|
| 41 |
-
print("
|
| 42 |
llm = HuggingFaceEndpoint(
|
| 43 |
-
repo_id="mistralai/
|
| 44 |
-
task="
|
| 45 |
temperature=0.1,
|
| 46 |
max_new_tokens=512,
|
| 47 |
huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 48 |
)
|
| 49 |
-
print("β
LLM
|
| 50 |
|
| 51 |
# --- 4. PROMPT TEMPLATE ---
|
| 52 |
RAG_PROMPT_TEMPLATE = """
|
| 53 |
-
You are an expert Nigerian Legal Assistant. Your
|
| 54 |
|
| 55 |
**TASK:** Analyze the provided legal context below to answer the user's question.
|
| 56 |
|
|
@@ -58,12 +57,12 @@ You are an expert Nigerian Legal Assistant. Your primary goal is to help users u
|
|
| 58 |
{context}
|
| 59 |
|
| 60 |
**RULES:**
|
| 61 |
-
1.
|
| 62 |
-
2.
|
| 63 |
-
3.
|
| 64 |
-
|
| 65 |
-
4.
|
| 66 |
-
5.
|
| 67 |
|
| 68 |
**QUESTION:** {question}
|
| 69 |
|
|
@@ -80,11 +79,21 @@ def format_docs(docs):
|
|
| 80 |
for d in docs
|
| 81 |
)
|
| 82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
rag_chain = (
|
| 84 |
-
{"context":
|
| 85 |
| RAG_PROMPT
|
| 86 |
| llm
|
| 87 |
-
|
|
| 88 |
)
|
| 89 |
|
| 90 |
# --- 6. MAIN LOGIC ---
|
|
@@ -96,7 +105,6 @@ def answer_question(user_input, lang_choice, history=[]):
|
|
| 96 |
|
| 97 |
history.append({'role': 'user', 'content': query})
|
| 98 |
|
| 99 |
-
# Greetings
|
| 100 |
if query.lower() in ["hi", "hello", "hey"]:
|
| 101 |
ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
|
| 102 |
if lang_choice == "english" else
|
|
@@ -104,46 +112,43 @@ def answer_question(user_input, lang_choice, history=[]):
|
|
| 104 |
history.append({'role': 'assistant', 'content': ans})
|
| 105 |
return history, history
|
| 106 |
|
| 107 |
-
print(f"
|
| 108 |
-
|
| 109 |
-
# Retrieve docs
|
| 110 |
docs = retriever.invoke(query)
|
| 111 |
if not docs:
|
| 112 |
-
answer = "I could not find any relevant information in the legal documents for your query.
|
| 113 |
else:
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
answer = rag_chain.invoke({"question": query, "context": context})
|
| 117 |
-
print("β
RAG chain finished.")
|
| 118 |
|
| 119 |
-
|
| 120 |
-
|
| 121 |
if lang_choice == "english" else
|
| 122 |
"\n\n---\nβ οΈ No be legal advice o, abeg find lawyer for proper advice.")
|
| 123 |
-
answer += disclaimer
|
| 124 |
|
| 125 |
-
# Add references
|
| 126 |
references = set()
|
| 127 |
for doc in docs:
|
| 128 |
source = doc.metadata.get("source", "Unknown Source")
|
| 129 |
section = doc.metadata.get("section", "Unknown Section")
|
| 130 |
-
|
|
|
|
|
|
|
| 131 |
if references:
|
| 132 |
-
answer += "\n\n**References:**\n" + "\n".join(sorted(references))
|
|
|
|
| 133 |
|
| 134 |
history.append({'role': 'assistant', 'content': answer.strip()})
|
| 135 |
return history, history
|
| 136 |
|
| 137 |
except Exception as e:
|
| 138 |
print(f"β Error: {e}")
|
| 139 |
-
error_message = "Sorry, an unexpected error occurred. Please try again
|
| 140 |
history.append({'role': 'assistant', 'content': error_message})
|
| 141 |
return history, history
|
| 142 |
|
| 143 |
def _reset():
|
| 144 |
return [], []
|
| 145 |
|
| 146 |
-
# --- 7.
|
| 147 |
def build_ui():
|
| 148 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
|
| 149 |
gr.Markdown("# π KnowYourRight Bot β Nigerian Legal Assistant")
|
|
@@ -181,7 +186,6 @@ def build_ui():
|
|
| 181 |
return demo
|
| 182 |
|
| 183 |
if __name__ == "__main__":
|
| 184 |
-
print("π Building Gradio UI...")
|
| 185 |
-
demo = build_ui()
|
| 186 |
print("π Launching Gradio app...")
|
|
|
|
| 187 |
demo.launch(debug=True)
|
|
|
|
| 7 |
from langchain_chroma import Chroma
|
| 8 |
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
|
| 9 |
from langchain.schema.runnable import RunnablePassthrough
|
|
|
|
| 10 |
|
| 11 |
# --- 1. CONFIGURATION ---
|
| 12 |
load_dotenv()
|
|
|
|
| 20 |
PERSIST_DIR = Path("data/processed/vector_db")
|
| 21 |
|
| 22 |
if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
|
| 23 |
+
print("β οΈ Vector DB not found. Run complete_ingestion.py first.")
|
| 24 |
+
raise SystemExit(1)
|
| 25 |
|
| 26 |
embedding_model = HuggingFaceEmbeddings(
|
| 27 |
model_name="BAAI/bge-small-en",
|
|
|
|
| 31 |
vectordb = Chroma(
|
| 32 |
persist_directory=str(PERSIST_DIR),
|
| 33 |
embedding_function=embedding_model,
|
|
|
|
| 34 |
)
|
| 35 |
|
| 36 |
retriever = vectordb.as_retriever(search_kwargs={"k": 4})
|
| 37 |
print("β
Vector database loaded.")
|
| 38 |
|
| 39 |
# --- 3. SETUP LLM ---
|
| 40 |
+
print("π€ Initializing LLM...")
|
| 41 |
llm = HuggingFaceEndpoint(
|
| 42 |
+
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", # or mistral-7b-instruct
|
| 43 |
+
task="conversational", # β
must match HF endpoint type
|
| 44 |
temperature=0.1,
|
| 45 |
max_new_tokens=512,
|
| 46 |
huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 47 |
)
|
| 48 |
+
print("β
LLM ready.")
|
| 49 |
|
| 50 |
# --- 4. PROMPT TEMPLATE ---
|
| 51 |
RAG_PROMPT_TEMPLATE = """
|
| 52 |
+
You are an expert Nigerian Legal Assistant. Your goal is to help users understand Nigerian law by providing clear, concise explanations.
|
| 53 |
|
| 54 |
**TASK:** Analyze the provided legal context below to answer the user's question.
|
| 55 |
|
|
|
|
| 57 |
{context}
|
| 58 |
|
| 59 |
**RULES:**
|
| 60 |
+
1. Do not just copy the text. Summarize and explain in simple language.
|
| 61 |
+
2. Be conversational and helpful.
|
| 62 |
+
3. Base your answer ONLY on the provided context. If not found, say:
|
| 63 |
+
"The provided legal documents do not contain specific information on this topic."
|
| 64 |
+
4. Respond in the user's chosen language (English or Pidgin).
|
| 65 |
+
5. At the end, cite the referenced sources.
|
| 66 |
|
| 67 |
**QUESTION:** {question}
|
| 68 |
|
|
|
|
| 79 |
for d in docs
|
| 80 |
)
|
| 81 |
|
| 82 |
+
def extract_text_from_conversational(response):
|
| 83 |
+
"""Normalize HF conversational outputs to plain text."""
|
| 84 |
+
if isinstance(response, dict) and "generated_text" in response:
|
| 85 |
+
return response["generated_text"]
|
| 86 |
+
elif isinstance(response, str):
|
| 87 |
+
return response
|
| 88 |
+
elif isinstance(response, list):
|
| 89 |
+
return response[0].get("generated_text", str(response))
|
| 90 |
+
return str(response)
|
| 91 |
+
|
| 92 |
rag_chain = (
|
| 93 |
+
{"context": retriever | format_docs, "question": RunnablePassthrough()}
|
| 94 |
| RAG_PROMPT
|
| 95 |
| llm
|
| 96 |
+
| extract_text_from_conversational
|
| 97 |
)
|
| 98 |
|
| 99 |
# --- 6. MAIN LOGIC ---
|
|
|
|
| 105 |
|
| 106 |
history.append({'role': 'user', 'content': query})
|
| 107 |
|
|
|
|
| 108 |
if query.lower() in ["hi", "hello", "hey"]:
|
| 109 |
ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
|
| 110 |
if lang_choice == "english" else
|
|
|
|
| 112 |
history.append({'role': 'assistant', 'content': ans})
|
| 113 |
return history, history
|
| 114 |
|
| 115 |
+
print(f"β‘ Running RAG chain for query: {query}")
|
|
|
|
|
|
|
| 116 |
docs = retriever.invoke(query)
|
| 117 |
if not docs:
|
| 118 |
+
answer = "I could not find any relevant information in the legal documents for your query."
|
| 119 |
else:
|
| 120 |
+
answer = rag_chain.invoke(query)
|
| 121 |
+
print("β
RAG chain finished.")
|
|
|
|
|
|
|
| 122 |
|
| 123 |
+
disclaimer = ("\n\n---\nβ οΈ Disclaimer: This is AI-generated information and not legal advice. "
|
| 124 |
+
"Please consult a qualified lawyer."
|
| 125 |
if lang_choice == "english" else
|
| 126 |
"\n\n---\nβ οΈ No be legal advice o, abeg find lawyer for proper advice.")
|
|
|
|
| 127 |
|
|
|
|
| 128 |
references = set()
|
| 129 |
for doc in docs:
|
| 130 |
source = doc.metadata.get("source", "Unknown Source")
|
| 131 |
section = doc.metadata.get("section", "Unknown Section")
|
| 132 |
+
if source and section:
|
| 133 |
+
references.add(f"- {source} ({section})")
|
| 134 |
+
|
| 135 |
if references:
|
| 136 |
+
answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
|
| 137 |
+
answer += disclaimer
|
| 138 |
|
| 139 |
history.append({'role': 'assistant', 'content': answer.strip()})
|
| 140 |
return history, history
|
| 141 |
|
| 142 |
except Exception as e:
|
| 143 |
print(f"β Error: {e}")
|
| 144 |
+
error_message = "Sorry, an unexpected error occurred. Please try again."
|
| 145 |
history.append({'role': 'assistant', 'content': error_message})
|
| 146 |
return history, history
|
| 147 |
|
| 148 |
def _reset():
|
| 149 |
return [], []
|
| 150 |
|
| 151 |
+
# --- 7. UI ---
|
| 152 |
def build_ui():
|
| 153 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
|
| 154 |
gr.Markdown("# π KnowYourRight Bot β Nigerian Legal Assistant")
|
|
|
|
| 186 |
return demo
|
| 187 |
|
| 188 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 189 |
print("π Launching Gradio app...")
|
| 190 |
+
demo = build_ui()
|
| 191 |
demo.launch(debug=True)
|