menikev commited on
Commit
ec071b2
·
verified ·
1 Parent(s): ccec758

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -84
app.py CHANGED
@@ -1,170 +1,193 @@
1
  import os
2
  from pathlib import Path
3
  import gradio as gr
 
4
  from dotenv import load_dotenv
 
5
 
6
  from langchain.prompts import PromptTemplate
7
- from langchain_chroma import Chroma
8
- from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
 
 
 
9
  from langchain.schema.runnable import RunnablePassthrough
 
10
 
11
- # --- 1. CONFIGURATION ---
12
- load_dotenv()
13
 
14
  if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
15
- print("HUGGINGFACEHUB_API_TOKEN not found in secrets. Please add it.")
16
- exit(1)
17
 
18
- # --- 2. LOAD VECTOR DATABASE ---
19
- print("📂 Loading vector database...")
20
  PERSIST_DIR = Path("data/processed/vector_db")
 
21
 
22
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
23
  print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
24
  raise SystemExit(1)
25
 
 
 
 
26
  embedding_model = HuggingFaceEmbeddings(
27
  model_name="BAAI/bge-small-en",
28
- model_kwargs={'device': 'cpu'}
29
  )
30
 
31
  vectordb = Chroma(
32
  persist_directory=str(PERSIST_DIR),
33
  embedding_function=embedding_model,
 
34
  )
35
 
36
- retriever = vectordb.as_retriever(search_kwargs={"k": 4})
37
- print("✅ Vector database loaded.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- # --- 3. SETUP LLM ---
40
- print("🤖 Initializing LLM...")
 
41
  llm = HuggingFaceEndpoint(
42
- repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", # or mistral-7b-instruct
43
- task="conversational", # ✅ must match HF endpoint type
44
- temperature=0.1,
45
  max_new_tokens=512,
46
- huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
47
  )
48
- print("LLM ready.")
49
 
50
- # --- 4. PROMPT TEMPLATE ---
51
- RAG_PROMPT_TEMPLATE = """
52
- You are an expert Nigerian Legal Assistant. Your goal is to help users understand Nigerian law by providing clear, concise explanations.
53
 
54
- **TASK:** Analyze the provided legal context below to answer the user's question.
 
55
 
56
- **CONTEXT:**
57
  {context}
58
 
59
- **RULES:**
60
- 1. Do not just copy the text. Summarize and explain in simple language.
61
- 2. Be conversational and helpful.
62
- 3. Base your answer ONLY on the provided context. If not found, say:
63
- "The provided legal documents do not contain specific information on this topic."
64
- 4. Respond in the user's chosen language (English or Pidgin).
65
- 5. At the end, cite the referenced sources.
66
 
67
- **QUESTION:** {question}
68
 
69
- **ANSWER:**
70
  """
 
71
  RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
72
 
73
- # --- 5. RAG CHAIN ---
74
  def format_docs(docs):
75
- return "\n\n---\n\n".join(
76
- f"Source: {d.metadata.get('source', 'Unknown')}\n"
77
- f"Section: {d.metadata.get('section', 'Unknown')}\n"
78
- f"Content: {d.page_content}"
79
- for d in docs
80
- )
81
-
82
- def extract_text_from_conversational(response):
83
- """Normalize HF conversational outputs to plain text."""
84
- if isinstance(response, dict) and "generated_text" in response:
85
- return response["generated_text"]
86
- elif isinstance(response, str):
87
- return response
88
- elif isinstance(response, list):
89
- return response[0].get("generated_text", str(response))
90
- return str(response)
91
 
92
  rag_chain = (
93
  {"context": retriever | format_docs, "question": RunnablePassthrough()}
94
  | RAG_PROMPT
95
  | llm
96
- | extract_text_from_conversational
97
  )
98
 
99
- # --- 6. MAIN LOGIC ---
 
100
  def answer_question(user_input, lang_choice, history=[]):
101
  try:
102
  query = (user_input or "").strip()
103
  if not query:
104
  return history, history
105
 
106
- history.append({'role': 'user', 'content': query})
 
107
 
108
- if query.lower() in ["hi", "hello", "hey"]:
109
- ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
110
- if lang_choice == "english" else
111
- "Howfa! I be your Nigerian Legal AI Assistant. How I fit help you today? No be legal advice o.")
112
- history.append({'role': 'assistant', 'content': ans})
 
 
113
  return history, history
114
 
115
  print(f"⚡ Running RAG chain for query: {query}")
116
  docs = retriever.invoke(query)
 
 
117
  if not docs:
118
- answer = "I could not find any relevant information in the legal documents for your query."
 
 
119
  else:
120
  answer = rag_chain.invoke(query)
121
- print("✅ RAG chain finished.")
122
-
123
- disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. "
124
- "Please consult a qualified lawyer."
125
- if lang_choice == "english" else
126
- "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
127
-
128
- references = set()
129
- for doc in docs:
130
- source = doc.metadata.get("source", "Unknown Source")
131
- section = doc.metadata.get("section", "Unknown Section")
132
- if source and section:
133
- references.add(f"- {source} ({section})")
134
 
135
- if references:
136
- answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
137
- answer += disclaimer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
- history.append({'role': 'assistant', 'content': answer.strip()})
140
  return history, history
141
 
142
  except Exception as e:
143
  print(f"❌ Error: {e}")
144
- error_message = "Sorry, an unexpected error occurred. Please try again."
145
- history.append({'role': 'assistant', 'content': error_message})
146
  return history, history
147
 
148
  def _reset():
149
  return [], []
150
 
151
- # --- 7. UI ---
 
152
  def build_ui():
153
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
154
  gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
155
- gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, and more. *Powered by AI.*")
156
 
157
  chatbot = gr.Chatbot(
158
  label="Chat History",
159
  height=600,
160
- type='messages',
161
- avatar_images=("user.png", "bot.png")
162
  )
163
 
164
  with gr.Row():
165
  msg = gr.Textbox(
166
  label="Your Question",
167
- placeholder="e.g., 'What are my rights if I am arrested?'",
168
  lines=2,
169
  scale=4,
170
  )
@@ -186,6 +209,7 @@ def build_ui():
186
  return demo
187
 
188
  if __name__ == "__main__":
189
- print("🚀 Launching Gradio app...")
190
  demo = build_ui()
191
- demo.launch(debug=True)
 
 
1
  import os
2
  from pathlib import Path
3
  import gradio as gr
4
+
5
  from dotenv import load_dotenv
6
+ load_dotenv()
7
 
8
  from langchain.prompts import PromptTemplate
9
+ from langchain_community.vectorstores import Chroma # <-- match ingestion
10
+ from langchain_huggingface import (
11
+ HuggingFaceEmbeddings,
12
+ HuggingFaceEndpoint,
13
+ )
14
  from langchain.schema.runnable import RunnablePassthrough
15
+ from langchain.schema.output_parser import StrOutputParser
16
 
17
+ # --- 1) CONFIG / SAFETY ---
 
18
 
19
  if not os.getenv("HUGGINGFACEHUB_API_TOKEN"):
20
+ print("HUGGINGFACEHUB_API_TOKEN not found. Add it to your Space secrets.")
21
+ raise SystemExit(1)
22
 
 
 
23
  PERSIST_DIR = Path("data/processed/vector_db")
24
+ COLLECTION_NAME = "legal_documents" # <-- MUST match complete_ingestion.py
25
 
26
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
27
  print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
28
  raise SystemExit(1)
29
 
30
+ # --- 2) LOAD VECTOR DB / RETRIEVER ---
31
+
32
+ print("Loading vector database...")
33
  embedding_model = HuggingFaceEmbeddings(
34
  model_name="BAAI/bge-small-en",
35
+ model_kwargs={"device": "cpu"},
36
  )
37
 
38
  vectordb = Chroma(
39
  persist_directory=str(PERSIST_DIR),
40
  embedding_function=embedding_model,
41
+ collection_name=COLLECTION_NAME, # <-- critical: open the right collection
42
  )
43
 
44
+ # Quick sanity check (helps spot empty/wrong collection immediately)
45
+ try:
46
+ count = vectordb._collection.count()
47
+ print(f"✅ Loaded Chroma collection '{COLLECTION_NAME}' with {count} documents.")
48
+ if count == 0:
49
+ raise RuntimeError(
50
+ "Chroma collection is empty. Confirm collection_name matches the one used in complete_ingestion.py"
51
+ )
52
+ except Exception as e:
53
+ print(f"Chroma sanity check failed: {e}")
54
+ raise
55
+
56
+ # A slightly more forgiving retriever
57
+ retriever = vectordb.as_retriever(
58
+ search_type="mmr",
59
+ search_kwargs={"k": 4, "fetch_k": 20},
60
+ )
61
+ print("Vector database ready.")
62
 
63
+ # --- 3) LLM (Hugging Face Inference Endpoint) ---
64
+
65
+ print("Initializing LLM via Hugging Face Endpoint...")
66
  llm = HuggingFaceEndpoint(
67
+ repo_id=os.getenv("HF_ENDPOINT_MODEL", "mistralai/Mistral-7B-Instruct-v0.2"),
68
+ temperature=0.15,
 
69
  max_new_tokens=512,
70
+ huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN"),
71
  )
72
+ print("LLM initialized.")
73
 
74
+ # --- 4) PROMPT & RAG CHAIN ---
 
 
75
 
76
+ RAG_PROMPT_TEMPLATE = """
77
+ You are an expert Nigerian Legal Assistant. Provide clear, concise explanations.
78
 
79
+ CONTEXT:
80
  {context}
81
 
82
+ RULES:
83
+ 1) Explain and summarize—do not paste raw sections verbatim.
84
+ 2) Use ONLY the context above. If missing, say you don't know.
85
+ 3) Conversational tone. Plain English (or Pidgin if user chose it).
86
+ 4) At the end, list the referenced section(s)/source(s).
 
 
87
 
88
+ QUESTION: {question}
89
 
90
+ ANSWER:
91
  """
92
+
93
  RAG_PROMPT = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
94
 
 
95
  def format_docs(docs):
96
+ # Keep rich info so the LLM can cite properly
97
+ blocks = []
98
+ for d in docs:
99
+ src = d.metadata.get("source", "Unknown Source")
100
+ sec = d.metadata.get("section", "Unknown Section")
101
+ blocks.append(f"Source: {src}\nSection: {sec}\nContent: {d.page_content}")
102
+ return "\n\n---\n\n".join(blocks)
 
 
 
 
 
 
 
 
 
103
 
104
  rag_chain = (
105
  {"context": retriever | format_docs, "question": RunnablePassthrough()}
106
  | RAG_PROMPT
107
  | llm
108
+ | StrOutputParser()
109
  )
110
 
111
+ # --- 5) APP LOGIC ---
112
+
113
  def answer_question(user_input, lang_choice, history=[]):
114
  try:
115
  query = (user_input or "").strip()
116
  if not query:
117
  return history, history
118
 
119
+ # Chatbot uses type='messages'
120
+ history.append({"role": "user", "content": query})
121
 
122
+ if query.lower() in {"hi", "hello", "hey"}:
123
+ ans = (
124
+ "Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
125
+ if lang_choice == "english"
126
+ else "Howfa! I be your Nigerian Legal AI Assistant. How I fit help you today? No be legal advice o."
127
+ )
128
+ history.append({"role": "assistant", "content": ans})
129
  return history, history
130
 
131
  print(f"⚡ Running RAG chain for query: {query}")
132
  docs = retriever.invoke(query)
133
+ print(f"Retrieved {len(docs)} docs")
134
+
135
  if not docs:
136
+ answer = (
137
+ "I could not find any relevant information in the legal documents for your query."
138
+ )
139
  else:
140
  answer = rag_chain.invoke(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
+ # Build references from the retrieved docs
143
+ refs = []
144
+ for d in docs[:5]:
145
+ src = d.metadata.get("source", "Unknown Source")
146
+ sec = d.metadata.get("section", "Unknown Section")
147
+ if src or sec:
148
+ refs.append(f"- {src} — {sec}")
149
+
150
+ if refs:
151
+ answer += "\n\n**References:**\n" + "\n".join(refs)
152
+
153
+ # Disclaimer
154
+ answer += (
155
+ "\n\n--- \n*⚠️ Disclaimer: This is AI-generated information and not legal advice. "
156
+ "Please consult a qualified lawyer for professional guidance.*"
157
+ if lang_choice == "english"
158
+ else "\n\n--- \n*⚠️ No be legal advice o, abeg find lawyer for proper advice.*"
159
+ )
160
 
161
+ history.append({"role": "assistant", "content": answer.strip()})
162
  return history, history
163
 
164
  except Exception as e:
165
  print(f"❌ Error: {e}")
166
+ err = "Sorry, an unexpected error occurred. Please try again."
167
+ history.append({"role": "assistant", "content": err})
168
  return history, history
169
 
170
  def _reset():
171
  return [], []
172
 
173
+ # --- 6) GRADIO UI ---
174
+
175
  def build_ui():
176
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
177
  gr.Markdown("# 📜 KnowYourRight Bot — Nigerian Legal Assistant")
178
+ gr.Markdown("Ask questions about the Nigerian Constitution, Labour Act, FCCPA, Data Protection, and more.")
179
 
180
  chatbot = gr.Chatbot(
181
  label="Chat History",
182
  height=600,
183
+ type="messages",
184
+ avatar_images=("user.png", "bot.png"),
185
  )
186
 
187
  with gr.Row():
188
  msg = gr.Textbox(
189
  label="Your Question",
190
+ placeholder="e.g., 'What are my rights as a tenant?'",
191
  lines=2,
192
  scale=4,
193
  )
 
209
  return demo
210
 
211
  if __name__ == "__main__":
212
+ print("Building Gradio UI...")
213
  demo = build_ui()
214
+ print("Launching Gradio app...")
215
+ demo.launch()