menikev commited on
Commit
2454a06
Β·
verified Β·
1 Parent(s): 76921a6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -62
app.py CHANGED
@@ -1,29 +1,27 @@
1
  import os
2
  from pathlib import Path
3
  import gradio as gr
 
 
4
  from langchain.prompts import PromptTemplate
5
  from langchain_chroma import Chroma
6
  from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
7
  from langchain.schema.runnable import RunnablePassthrough
8
  from langchain.schema.output_parser import StrOutputParser
9
 
10
- # --- 1. CONFIGURATION & INITIALIZATION ---
11
-
12
- from dotenv import load_dotenv
13
  load_dotenv()
14
 
15
  if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
16
- print(" HUGGINGFACEHUB_API_TOKEN not found in secrets. Please add it.")
17
- exit()
18
-
19
- # --- 2. LOAD VECTOR DATABASE (Retriever) ---
20
 
21
- print("Loading vector database...")
 
22
  PERSIST_DIR = Path("data/processed/vector_db")
23
 
24
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
25
- print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
26
- raise SystemExit(1)
27
 
28
  embedding_model = HuggingFaceEmbeddings(
29
  model_name="BAAI/bge-small-en",
@@ -33,24 +31,23 @@ embedding_model = HuggingFaceEmbeddings(
33
  vectordb = Chroma(
34
  persist_directory=str(PERSIST_DIR),
35
  embedding_function=embedding_model,
 
36
  )
37
 
38
  retriever = vectordb.as_retriever(search_kwargs={"k": 4})
39
- print("Vector database loaded successfully.")
40
 
41
- # --- 3. SETUP THE LLM (via Hugging Face Endpoint) ---
42
-
43
- print("Initializing LLM via Hugging Face Endpoint...")
44
  llm = HuggingFaceEndpoint(
45
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
46
  temperature=0.1,
47
  max_new_tokens=512,
48
  huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
49
  )
50
- print("LLM initialized.")
51
-
52
- # --- 4. CREATE THE IMPROVED PROMPT TEMPLATE ---
53
 
 
54
  RAG_PROMPT_TEMPLATE = """
55
  You are an expert Nigerian Legal Assistant. Your primary goal is to help users understand Nigerian law by providing clear, concise, and helpful explanations.
56
 
@@ -60,43 +57,45 @@ You are an expert Nigerian Legal Assistant. Your primary goal is to help users u
60
  {context}
61
 
62
  **RULES:**
63
- 1. **Explain, Don't Just Quote:** Do not just copy the text from the context. You MUST synthesize, summarize, and explain the relevant laws in simple, easy-to-understand language.
64
- 2. **Be Conversational:** Respond in a helpful and advisory tone.
65
- 3. **Use Only Provided Context:** Base your answer SOLELY on the provided context. If the context does not contain the information needed to answer the question, you MUST say "The provided legal documents do not contain specific information on this topic." Do not use outside knowledge.
66
- 4. **Language:** Respond in the user's chosen language (English or Nigerian Pidgin).
67
- 5. **Citations:** At the end of your answer, always list the sources you used from the context.
 
68
 
69
  **QUESTION:** {question}
70
 
71
  **ANSWER:**
72
  """
73
-
74
  RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
75
 
76
- # --- 5. DEFINE THE RAG CHAIN ---
77
-
78
  def format_docs(docs):
79
- return "\n\n---\n\n".join(f"Source: {d.metadata.get('source', 'Unknown')}\nSection: {d.metadata.get('section', 'Unknown')}\nContent: {d.page_content}" for d in docs)
 
 
 
 
 
80
 
81
  rag_chain = (
82
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
83
  | RAG_PROMPT
84
  | llm
85
  | StrOutputParser()
86
  )
87
 
88
- # --- 6. MAIN APPLICATION LOGIC ---
89
-
90
- ## UPDATED to handle the new `type='messages'` format ##
91
  def answer_question(user_input, lang_choice, history=[]):
92
  try:
93
  query = (user_input or "").strip()
94
  if not query:
95
- return history
96
 
97
- # Append the user's message to the history in the new format
98
  history.append({'role': 'user', 'content': query})
99
 
 
100
  if query.lower() in ["hi", "hello", "hey"]:
101
  ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
102
  if lang_choice == "english" else
@@ -104,40 +103,38 @@ def answer_question(user_input, lang_choice, history=[]):
104
  history.append({'role': 'assistant', 'content': ans})
105
  return history, history
106
 
107
- print(f"Received query: {query}")
108
-
 
109
  docs = retriever.invoke(query)
110
  if not docs:
111
  answer = "I could not find any relevant information in the legal documents for your query. Please try rephrasing."
112
  else:
113
- print("Invoking RAG chain...")
114
- answer = rag_chain.invoke(query)
115
- print("RAG chain finished.")
 
116
 
117
- disclaimer = ("\n\n--- \n*⚠️ Disclaimer: This is AI-generated information and not legal advice. Please consult a qualified lawyer for professional guidance.*"
 
118
  if lang_choice == "english" else
119
- "\n\n--- \n*⚠️ No be legal advice o, abeg find lawyer for proper advice.*")
120
-
 
 
121
  references = set()
122
  for doc in docs:
123
  source = doc.metadata.get("source", "Unknown Source")
124
  section = doc.metadata.get("section", "Unknown Section")
125
- if source != "Unknown Source" and section != "Unknown Section":
126
- references.add(f"- {source} ({section})")
127
-
128
  if references:
129
- answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
130
 
131
- answer += disclaimer
132
-
133
- # Append the assistant's response to the history
134
  history.append({'role': 'assistant', 'content': answer.strip()})
135
-
136
- # The function now only needs to return the updated history for the chatbot
137
  return history, history
138
 
139
  except Exception as e:
140
- print(f"An error occurred: {e}")
141
  error_message = "Sorry, an unexpected error occurred. Please try again or rephrase your question."
142
  history.append({'role': 'assistant', 'content': error_message})
143
  return history, history
@@ -146,20 +143,18 @@ def _reset():
146
  return [], []
147
 
148
  # --- 7. GRADIO UI ---
149
-
150
  def build_ui():
151
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
152
  gr.Markdown("# πŸ“œ KnowYourRight Bot β€” Nigerian Legal Assistant")
153
  gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, and more. *Powered by AI.*")
154
-
155
- ## UPDATED Chatbot initialization ##
156
  chatbot = gr.Chatbot(
157
  label="Chat History",
158
  height=600,
159
- type='messages', # Use the new messages format
160
  avatar_images=("user.png", "bot.png")
161
  )
162
-
163
  with gr.Row():
164
  msg = gr.Textbox(
165
  label="Your Question",
@@ -174,21 +169,18 @@ def build_ui():
174
 
175
  chat_state = gr.State([])
176
 
177
- # Main event handlers for submitting a question
178
  submit_btn.click(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
179
  msg.submit(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
180
 
181
- ## UPDATED logic for clearing the textbox ##
182
- # This now correctly uses .click() for the button and .submit() for the textbox
183
  submit_btn.click(lambda: "", None, msg)
184
  msg.submit(lambda: "", None, msg)
185
 
186
  clear_btn.click(_reset, None, [chatbot, chat_state])
187
-
188
  return demo
189
 
190
  if __name__ == "__main__":
191
- print("Building Gradio UI...")
192
  demo = build_ui()
193
- print("Launching Gradio app...")
194
- demo.launch(debug=True)
 
1
  import os
2
  from pathlib import Path
3
  import gradio as gr
4
+ from dotenv import load_dotenv
5
+
6
  from langchain.prompts import PromptTemplate
7
  from langchain_chroma import Chroma
8
  from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
9
  from langchain.schema.runnable import RunnablePassthrough
10
  from langchain.schema.output_parser import StrOutputParser
11
 
12
+ # --- 1. CONFIGURATION ---
 
 
13
  load_dotenv()
14
 
15
  if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
16
+ print("❌ HUGGINGFACEHUB_API_TOKEN not found in secrets. Please add it.")
17
+ exit(1)
 
 
18
 
19
+ # --- 2. LOAD VECTOR DATABASE ---
20
+ print("πŸ“‚ Loading vector database...")
21
  PERSIST_DIR = Path("data/processed/vector_db")
22
 
23
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
24
+ raise SystemExit("⚠️ Vector DB not found. Run complete_ingestion.py first.")
 
25
 
26
  embedding_model = HuggingFaceEmbeddings(
27
  model_name="BAAI/bge-small-en",
 
31
  vectordb = Chroma(
32
  persist_directory=str(PERSIST_DIR),
33
  embedding_function=embedding_model,
34
+ collection_name="legal_documents" # πŸ”‘ must match ingestion step
35
  )
36
 
37
  retriever = vectordb.as_retriever(search_kwargs={"k": 4})
38
+ print("βœ… Vector database loaded.")
39
 
40
+ # --- 3. SETUP LLM ---
41
+ print("πŸš€ Initializing LLM via Hugging Face Endpoint...")
 
42
  llm = HuggingFaceEndpoint(
43
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
44
  temperature=0.1,
45
  max_new_tokens=512,
46
  huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
47
  )
48
+ print("βœ… LLM initialized.")
 
 
49
 
50
+ # --- 4. PROMPT TEMPLATE ---
51
  RAG_PROMPT_TEMPLATE = """
52
  You are an expert Nigerian Legal Assistant. Your primary goal is to help users understand Nigerian law by providing clear, concise, and helpful explanations.
53
 
 
57
  {context}
58
 
59
  **RULES:**
60
+ 1. Explain, don't just quote. Summarize and explain the relevant laws in simple language.
61
+ 2. Be conversational and clear.
62
+ 3. Use ONLY the provided context. If it’s missing, say:
63
+ "The provided legal documents do not contain specific information on this topic."
64
+ 4. Language: Respond in the user's chosen language (English or Nigerian Pidgin).
65
+ 5. Always list sources from the context at the end.
66
 
67
  **QUESTION:** {question}
68
 
69
  **ANSWER:**
70
  """
 
71
  RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
72
 
73
+ # --- 5. RAG CHAIN ---
 
74
  def format_docs(docs):
75
+ return "\n\n---\n\n".join(
76
+ f"Source: {d.metadata.get('source', 'Unknown')}\n"
77
+ f"Section: {d.metadata.get('section', 'Unknown')}\n"
78
+ f"Content: {d.page_content}"
79
+ for d in docs
80
+ )
81
 
82
  rag_chain = (
83
+ {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
84
  | RAG_PROMPT
85
  | llm
86
  | StrOutputParser()
87
  )
88
 
89
+ # --- 6. MAIN LOGIC ---
 
 
90
  def answer_question(user_input, lang_choice, history=[]):
91
  try:
92
  query = (user_input or "").strip()
93
  if not query:
94
+ return history, history
95
 
 
96
  history.append({'role': 'user', 'content': query})
97
 
98
+ # Greetings
99
  if query.lower() in ["hi", "hello", "hey"]:
100
  ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
101
  if lang_choice == "english" else
 
103
  history.append({'role': 'assistant', 'content': ans})
104
  return history, history
105
 
106
+ print(f"πŸ”Ž Received query: {query}")
107
+
108
+ # Retrieve docs
109
  docs = retriever.invoke(query)
110
  if not docs:
111
  answer = "I could not find any relevant information in the legal documents for your query. Please try rephrasing."
112
  else:
113
+ print("⚑ Running RAG chain...")
114
+ context = format_docs(docs) # use retrieved docs
115
+ answer = rag_chain.invoke({"question": query, "context": context})
116
+ print("βœ… RAG chain finished.")
117
 
118
+ # Add disclaimer
119
+ disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. Please consult a qualified lawyer."
120
  if lang_choice == "english" else
121
+ "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
122
+ answer += disclaimer
123
+
124
+ # Add references
125
  references = set()
126
  for doc in docs:
127
  source = doc.metadata.get("source", "Unknown Source")
128
  section = doc.metadata.get("section", "Unknown Section")
129
+ references.add(f"- {source} ({section})")
 
 
130
  if references:
131
+ answer += "\n\n**References:**\n" + "\n".join(sorted(references))
132
 
 
 
 
133
  history.append({'role': 'assistant', 'content': answer.strip()})
 
 
134
  return history, history
135
 
136
  except Exception as e:
137
+ print(f"❌ Error: {e}")
138
  error_message = "Sorry, an unexpected error occurred. Please try again or rephrase your question."
139
  history.append({'role': 'assistant', 'content': error_message})
140
  return history, history
 
143
  return [], []
144
 
145
  # --- 7. GRADIO UI ---
 
146
  def build_ui():
147
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
148
  gr.Markdown("# πŸ“œ KnowYourRight Bot β€” Nigerian Legal Assistant")
149
  gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, and more. *Powered by AI.*")
150
+
 
151
  chatbot = gr.Chatbot(
152
  label="Chat History",
153
  height=600,
154
+ type='messages',
155
  avatar_images=("user.png", "bot.png")
156
  )
157
+
158
  with gr.Row():
159
  msg = gr.Textbox(
160
  label="Your Question",
 
169
 
170
  chat_state = gr.State([])
171
 
 
172
  submit_btn.click(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
173
  msg.submit(answer_question, [msg, lang_choice, chat_state], [chatbot, chat_state])
174
 
 
 
175
  submit_btn.click(lambda: "", None, msg)
176
  msg.submit(lambda: "", None, msg)
177
 
178
  clear_btn.click(_reset, None, [chatbot, chat_state])
179
+
180
  return demo
181
 
182
  if __name__ == "__main__":
183
+ print("🌍 Building Gradio UI...")
184
  demo = build_ui()
185
+ print("πŸš€ Launching Gradio app...")
186
+ demo.launch(debug=True)