menikev commited on
Commit
ccec758
Β·
verified Β·
1 Parent(s): 685d9a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -35
app.py CHANGED
@@ -7,7 +7,6 @@ from langchain.prompts import PromptTemplate
7
  from langchain_chroma import Chroma
8
  from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
9
  from langchain.schema.runnable import RunnablePassthrough
10
- from langchain.schema.output_parser import StrOutputParser
11
 
12
  # --- 1. CONFIGURATION ---
13
  load_dotenv()
@@ -21,7 +20,8 @@ print("πŸ“‚ Loading vector database...")
21
  PERSIST_DIR = Path("data/processed/vector_db")
22
 
23
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
24
- raise SystemExit("⚠️ Vector DB not found. Run complete_ingestion.py first.")
 
25
 
26
  embedding_model = HuggingFaceEmbeddings(
27
  model_name="BAAI/bge-small-en",
@@ -31,26 +31,25 @@ embedding_model = HuggingFaceEmbeddings(
31
  vectordb = Chroma(
32
  persist_directory=str(PERSIST_DIR),
33
  embedding_function=embedding_model,
34
- collection_name="legal_documents" # πŸ”‘ must match ingestion step
35
  )
36
 
37
  retriever = vectordb.as_retriever(search_kwargs={"k": 4})
38
  print("βœ… Vector database loaded.")
39
 
40
  # --- 3. SETUP LLM ---
41
- print("πŸš€ Initializing LLM via Hugging Face Endpoint...")
42
  llm = HuggingFaceEndpoint(
43
- repo_id="mistralai/Mistral-7B-Instruct-v0.2", # βœ… text-generation supported
44
- task="text-generation", # explicitly set task
45
  temperature=0.1,
46
  max_new_tokens=512,
47
  huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
48
  )
49
- print("βœ… LLM initialized.")
50
 
51
  # --- 4. PROMPT TEMPLATE ---
52
  RAG_PROMPT_TEMPLATE = """
53
- You are an expert Nigerian Legal Assistant. Your primary goal is to help users understand Nigerian law by providing clear, concise, and helpful explanations.
54
 
55
  **TASK:** Analyze the provided legal context below to answer the user's question.
56
 
@@ -58,12 +57,12 @@ You are an expert Nigerian Legal Assistant. Your primary goal is to help users u
58
  {context}
59
 
60
  **RULES:**
61
- 1. Explain, don't just quote. Summarize and explain the relevant laws in simple language.
62
- 2. Be conversational and clear.
63
- 3. Use ONLY the provided context. If it’s missing, say:
64
- "The provided legal documents do not contain specific information on this topic."
65
- 4. Language: Respond in the user's chosen language (English or Nigerian Pidgin).
66
- 5. Always list sources from the context at the end.
67
 
68
  **QUESTION:** {question}
69
 
@@ -80,11 +79,21 @@ def format_docs(docs):
80
  for d in docs
81
  )
82
 
 
 
 
 
 
 
 
 
 
 
83
  rag_chain = (
84
- {"context": RunnablePassthrough(), "question": RunnablePassthrough()}
85
  | RAG_PROMPT
86
  | llm
87
- | StrOutputParser()
88
  )
89
 
90
  # --- 6. MAIN LOGIC ---
@@ -96,7 +105,6 @@ def answer_question(user_input, lang_choice, history=[]):
96
 
97
  history.append({'role': 'user', 'content': query})
98
 
99
- # Greetings
100
  if query.lower() in ["hi", "hello", "hey"]:
101
  ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
102
  if lang_choice == "english" else
@@ -104,46 +112,43 @@ def answer_question(user_input, lang_choice, history=[]):
104
  history.append({'role': 'assistant', 'content': ans})
105
  return history, history
106
 
107
- print(f"πŸ”Ž Received query: {query}")
108
-
109
- # Retrieve docs
110
  docs = retriever.invoke(query)
111
  if not docs:
112
- answer = "I could not find any relevant information in the legal documents for your query. Please try rephrasing."
113
  else:
114
- print("⚑ Running RAG chain...")
115
- context = format_docs(docs) # use retrieved docs
116
- answer = rag_chain.invoke({"question": query, "context": context})
117
- print("βœ… RAG chain finished.")
118
 
119
- # Add disclaimer
120
- disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. Please consult a qualified lawyer."
121
  if lang_choice == "english" else
122
  "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
123
- answer += disclaimer
124
 
125
- # Add references
126
  references = set()
127
  for doc in docs:
128
  source = doc.metadata.get("source", "Unknown Source")
129
  section = doc.metadata.get("section", "Unknown Section")
130
- references.add(f"- {source} ({section})")
 
 
131
  if references:
132
- answer += "\n\n**References:**\n" + "\n".join(sorted(references))
 
133
 
134
  history.append({'role': 'assistant', 'content': answer.strip()})
135
  return history, history
136
 
137
  except Exception as e:
138
  print(f"❌ Error: {e}")
139
- error_message = "Sorry, an unexpected error occurred. Please try again or rephrase your question."
140
  history.append({'role': 'assistant', 'content': error_message})
141
  return history, history
142
 
143
  def _reset():
144
  return [], []
145
 
146
- # --- 7. GRADIO UI ---
147
  def build_ui():
148
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
149
  gr.Markdown("# πŸ“œ KnowYourRight Bot β€” Nigerian Legal Assistant")
@@ -181,7 +186,6 @@ def build_ui():
181
  return demo
182
 
183
  if __name__ == "__main__":
184
- print("🌍 Building Gradio UI...")
185
- demo = build_ui()
186
  print("πŸš€ Launching Gradio app...")
 
187
  demo.launch(debug=True)
 
7
  from langchain_chroma import Chroma
8
  from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
9
  from langchain.schema.runnable import RunnablePassthrough
 
10
 
11
  # --- 1. CONFIGURATION ---
12
  load_dotenv()
 
20
  PERSIST_DIR = Path("data/processed/vector_db")
21
 
22
  if not PERSIST_DIR.exists() or not any(PERSIST_DIR.iterdir()):
23
+ print("⚠️ Vector DB not found. Run complete_ingestion.py first.")
24
+ raise SystemExit(1)
25
 
26
  embedding_model = HuggingFaceEmbeddings(
27
  model_name="BAAI/bge-small-en",
 
31
  vectordb = Chroma(
32
  persist_directory=str(PERSIST_DIR),
33
  embedding_function=embedding_model,
 
34
  )
35
 
36
  retriever = vectordb.as_retriever(search_kwargs={"k": 4})
37
  print("βœ… Vector database loaded.")
38
 
39
  # --- 3. SETUP LLM ---
40
+ print("πŸ€– Initializing LLM...")
41
  llm = HuggingFaceEndpoint(
42
+ repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", # or mistral-7b-instruct
43
+ task="conversational", # βœ… must match HF endpoint type
44
  temperature=0.1,
45
  max_new_tokens=512,
46
  huggingfacehub_api_token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
47
  )
48
+ print("βœ… LLM ready.")
49
 
50
  # --- 4. PROMPT TEMPLATE ---
51
  RAG_PROMPT_TEMPLATE = """
52
+ You are an expert Nigerian Legal Assistant. Your goal is to help users understand Nigerian law by providing clear, concise explanations.
53
 
54
  **TASK:** Analyze the provided legal context below to answer the user's question.
55
 
 
57
  {context}
58
 
59
  **RULES:**
60
+ 1. Do not just copy the text. Summarize and explain in simple language.
61
+ 2. Be conversational and helpful.
62
+ 3. Base your answer ONLY on the provided context. If not found, say:
63
+ "The provided legal documents do not contain specific information on this topic."
64
+ 4. Respond in the user's chosen language (English or Pidgin).
65
+ 5. At the end, cite the referenced sources.
66
 
67
  **QUESTION:** {question}
68
 
 
79
  for d in docs
80
  )
81
 
82
+ def extract_text_from_conversational(response):
83
+ """Normalize HF conversational outputs to plain text."""
84
+ if isinstance(response, dict) and "generated_text" in response:
85
+ return response["generated_text"]
86
+ elif isinstance(response, str):
87
+ return response
88
+ elif isinstance(response, list):
89
+ return response[0].get("generated_text", str(response))
90
+ return str(response)
91
+
92
  rag_chain = (
93
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
94
  | RAG_PROMPT
95
  | llm
96
+ | extract_text_from_conversational
97
  )
98
 
99
  # --- 6. MAIN LOGIC ---
 
105
 
106
  history.append({'role': 'user', 'content': query})
107
 
 
108
  if query.lower() in ["hi", "hello", "hey"]:
109
  ans = ("Hello! I'm your Nigerian Legal AI Assistant. How can I help you today?"
110
  if lang_choice == "english" else
 
112
  history.append({'role': 'assistant', 'content': ans})
113
  return history, history
114
 
115
+ print(f"⚑ Running RAG chain for query: {query}")
 
 
116
  docs = retriever.invoke(query)
117
  if not docs:
118
+ answer = "I could not find any relevant information in the legal documents for your query."
119
  else:
120
+ answer = rag_chain.invoke(query)
121
+ print("βœ… RAG chain finished.")
 
 
122
 
123
+ disclaimer = ("\n\n---\n⚠️ Disclaimer: This is AI-generated information and not legal advice. "
124
+ "Please consult a qualified lawyer."
125
  if lang_choice == "english" else
126
  "\n\n---\n⚠️ No be legal advice o, abeg find lawyer for proper advice.")
 
127
 
 
128
  references = set()
129
  for doc in docs:
130
  source = doc.metadata.get("source", "Unknown Source")
131
  section = doc.metadata.get("section", "Unknown Section")
132
+ if source and section:
133
+ references.add(f"- {source} ({section})")
134
+
135
  if references:
136
+ answer += "\n\n**References:**\n" + "\n".join(sorted(list(references)))
137
+ answer += disclaimer
138
 
139
  history.append({'role': 'assistant', 'content': answer.strip()})
140
  return history, history
141
 
142
  except Exception as e:
143
  print(f"❌ Error: {e}")
144
+ error_message = "Sorry, an unexpected error occurred. Please try again."
145
  history.append({'role': 'assistant', 'content': error_message})
146
  return history, history
147
 
148
  def _reset():
149
  return [], []
150
 
151
+ # --- 7. UI ---
152
  def build_ui():
153
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="KnowYourRight Bot") as demo:
154
  gr.Markdown("# πŸ“œ KnowYourRight Bot β€” Nigerian Legal Assistant")
 
186
  return demo
187
 
188
  if __name__ == "__main__":
 
 
189
  print("πŸš€ Launching Gradio app...")
190
+ demo = build_ui()
191
  demo.launch(debug=True)