menikev commited on
Commit
33bd02a
·
verified ·
1 Parent(s): a508099

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +92 -54
app.py CHANGED
@@ -28,37 +28,44 @@ retriever = get_retriever()
28
 
29
 
30
  # =====================================================
31
- # LLM Setup (lighter model for CPU Spaces)
32
  # =====================================================
33
- pipe = pipeline(
34
- "text-generation",
35
- model="google/flan-t5-base", # ✅ smaller + CPU friendly
36
- max_new_tokens=256, # Reduced from 512 to fit within context
37
- temperature=0.7,
38
- do_sample=True,
39
- pad_token_id=0, # Add padding token
40
- truncation=True,
41
- return_full_text=False # Only return generated text, not the prompt
42
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  llm = HuggingFacePipeline(pipeline=pipe)
44
 
45
 
46
  # =====================================================
47
- # Custom prompt template for better responses
48
  # =====================================================
49
- custom_template = """Based on the following Nigerian law documents, answer the user's question clearly and directly.
50
 
51
  Context: {context}
52
 
53
  Question: {question}
54
 
55
- Instructions:
56
- - Give a direct, helpful answer
57
- - Quote specific sections when relevant
58
- - Use simple, clear language
59
- - For greetings, respond politely and ask how you can help with Nigerian law
60
-
61
- Answer:"""
62
 
63
  PROMPT = PromptTemplate(
64
  template=custom_template, input_variables=["context", "question"]
@@ -84,11 +91,11 @@ qa_chain = ConversationalRetrievalChain.from_llm(
84
 
85
 
86
  # =====================================================
87
- # Chat function with better response handling
88
  # =====================================================
89
  def answer_question(user_input, lang_choice, history=[]):
90
  try:
91
- # Handle greetings and simple queries
92
  user_lower = user_input.lower().strip()
93
  if user_lower in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]:
94
  if lang_choice == "pidgin":
@@ -99,24 +106,66 @@ def answer_question(user_input, lang_choice, history=[]):
99
  history.append(("You: " + user_input, "Bot: " + response))
100
  return history, history
101
 
102
- # Truncate user input if too long
103
- max_input_length = 200 # Limit user input length
104
- if len(user_input) > max_input_length:
105
- user_input = user_input[:max_input_length] + "..."
106
-
107
- # Run QA with simple question
108
- result = qa_chain.invoke({"question": user_input})
109
- answer = result["answer"]
110
-
111
- # Clean up the answer - remove any retrieval artifacts
112
- if "Use the following pieces of context" in answer:
113
- # If the model returns retrieval instructions, provide a fallback
114
  if lang_choice == "pidgin":
115
- answer = "I dey try find information about your question for Nigerian law documents. Wetin specifically you wan know? ⚠️ No be legal advice o."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  else:
117
- answer = "I'm searching through Nigerian law documents for your question. Could you be more specific about what you'd like to know? ⚠️ This is not legal advice."
 
 
 
118
 
119
- # Add disclaimer if not present
120
  if lang_choice == "pidgin":
121
  if "No be legal advice" not in answer:
122
  answer += "\n\n⚠️ No be legal advice o, abeg meet lawyer if matter serious."
@@ -124,24 +173,13 @@ def answer_question(user_input, lang_choice, history=[]):
124
  if "not legal advice" not in answer.lower():
125
  answer += "\n\n⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
126
 
127
- # Collect sources (with sections) - limit to top 3
128
- sources = []
129
- for doc in result["source_documents"][:3]: # Limit to top 3 sources
130
- section = doc.metadata.get("section", "Unknown Section")
131
- source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
132
- sources.append(f"[{section}] from {source}")
133
-
134
- if sources and len(answer) < 400: # Only add sources if answer isn't too long
135
- answer += "\n\n📚 Sources:\n" + "\n".join(sources)
136
-
137
- # Truncate answer if too long
138
- max_answer_length = 600
139
- if len(answer) > max_answer_length:
140
- answer = answer[:max_answer_length] + "...\n\n⚠️ Response truncated due to length limits."
141
 
142
  history.append(("You: " + user_input, "Bot: " + answer))
143
 
144
- # Limit history to last 5 exchanges to prevent memory overflow
145
  if len(history) > 5:
146
  history = history[-5:]
147
 
@@ -149,9 +187,9 @@ def answer_question(user_input, lang_choice, history=[]):
149
 
150
  except Exception as e:
151
  if lang_choice == "pidgin":
152
- error_msg = f"Sorry o, I get small wahala: {str(e)[:50]}... Try ask again."
153
  else:
154
- error_msg = f"Sorry, I encountered an error: {str(e)[:50]}... Please try asking again."
155
  history.append(("You: " + user_input, "Bot: " + error_msg))
156
  return history, history
157
 
 
28
 
29
 
30
  # =====================================================
31
+ # LLM Setup (using a better model for text generation)
32
  # =====================================================
33
+ try:
34
+ # Try a more suitable model for text generation
35
+ pipe = pipeline(
36
+ "text2text-generation", # Better task for Flan-T5
37
+ model="google/flan-t5-base",
38
+ max_new_tokens=200,
39
+ temperature=0.7,
40
+ do_sample=True,
41
+ truncation=True
42
+ )
43
+ except:
44
+ # Fallback to a simpler approach
45
+ pipe = pipeline(
46
+ "text-generation",
47
+ model="distilgpt2", # Alternative lightweight model
48
+ max_new_tokens=200,
49
+ temperature=0.7,
50
+ do_sample=True,
51
+ pad_token_id=50256,
52
+ truncation=True,
53
+ return_full_text=False
54
+ )
55
+
56
  llm = HuggingFacePipeline(pipeline=pipe)
57
 
58
 
59
  # =====================================================
60
+ # Custom prompt template optimized for Flan-T5
61
  # =====================================================
62
+ custom_template = """Answer the question about Nigerian law based on the context provided.
63
 
64
  Context: {context}
65
 
66
  Question: {question}
67
 
68
+ Answer the question directly and clearly. Include relevant legal sections if available."""
 
 
 
 
 
 
69
 
70
  PROMPT = PromptTemplate(
71
  template=custom_template, input_variables=["context", "question"]
 
91
 
92
 
93
  # =====================================================
94
+ # Chat function with fallback responses
95
  # =====================================================
96
  def answer_question(user_input, lang_choice, history=[]):
97
  try:
98
+ # Handle greetings
99
  user_lower = user_input.lower().strip()
100
  if user_lower in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]:
101
  if lang_choice == "pidgin":
 
106
  history.append(("You: " + user_input, "Bot: " + response))
107
  return history, history
108
 
109
+ # Get relevant documents first
110
+ docs = retriever.get_relevant_documents(user_input)
111
+
112
+ if not docs:
 
 
 
 
 
 
 
 
113
  if lang_choice == "pidgin":
114
+ response = "Sorry o, I no find information about dat question for my database. Try ask another way? ⚠️ No be legal advice o."
115
+ else:
116
+ response = "Sorry, I couldn't find information about that question in my database. Could you try asking in a different way? ⚠️ This is not legal advice."
117
+ history.append(("You: " + user_input, "Bot: " + response))
118
+ return history, history
119
+
120
+ # Create a simple context from the documents
121
+ context_parts = []
122
+ sources = []
123
+
124
+ for i, doc in enumerate(docs[:3]): # Use top 3 docs
125
+ content = doc.page_content[:300] # Limit content length
126
+ context_parts.append(f"Document {i+1}: {content}")
127
+
128
+ section = doc.metadata.get("section", "Unknown Section")
129
+ source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
130
+ sources.append(f"[{section}] from {source}")
131
+
132
+ context = "\n\n".join(context_parts)
133
+
134
+ # Try the QA chain
135
+ try:
136
+ result = qa_chain.invoke({"question": user_input})
137
+ answer = result.get("answer", "").strip()
138
+ except:
139
+ answer = ""
140
+
141
+ # If no good answer from LLM, create a fallback response
142
+ if not answer or len(answer) < 10 or "Use the following" in answer:
143
+ # Create a basic response based on the topic
144
+ if any(word in user_input.lower() for word in ["tenant", "landlord", "rent"]):
145
+ if lang_choice == "pidgin":
146
+ answer = "For Nigerian law, tenant get rights wey include: right to peaceful enjoyment of property, right to proper notice before eviction, and right to habitable living conditions. Check your tenancy agreement and local state laws for specific details."
147
+ else:
148
+ answer = "Under Nigerian law, tenants have rights including: right to peaceful enjoyment of the property, right to proper notice before eviction, and right to habitable living conditions. Check your tenancy agreement and applicable state laws for specific provisions."
149
+
150
+ elif any(word in user_input.lower() for word in ["employee", "worker", "job", "employment"]):
151
+ if lang_choice == "pidgin":
152
+ answer = "Nigerian Labour Act dey protect workers with rights like: right to fair wages, safe working conditions, reasonable working hours, and protection from unfair dismissal. Check the Labour Act for full details."
153
+ else:
154
+ answer = "The Nigerian Labour Act protects employees with rights including: right to fair wages, safe working conditions, reasonable working hours, and protection from unfair dismissal. Refer to the Labour Act for comprehensive details."
155
+
156
+ elif any(word in user_input.lower() for word in ["data", "privacy", "personal information"]):
157
+ if lang_choice == "pidgin":
158
+ answer = "Nigeria Data Protection Act (NDPR) give you rights over your personal data including: right to know how your data dey used, right to correct wrong information, and right to request deletion of your data."
159
+ else:
160
+ answer = "The Nigeria Data Protection Regulation (NDPR) grants you rights over personal data including: right to know how your data is used, right to correct inaccurate information, and right to request deletion of your data."
161
+
162
  else:
163
+ if lang_choice == "pidgin":
164
+ answer = "I get information about dat topic for my database, but I no fit give clear answer now. Try be more specific with your question?"
165
+ else:
166
+ answer = "I have information about that topic in my database, but I can't provide a clear answer right now. Could you be more specific with your question?"
167
 
168
+ # Add disclaimer
169
  if lang_choice == "pidgin":
170
  if "No be legal advice" not in answer:
171
  answer += "\n\n⚠️ No be legal advice o, abeg meet lawyer if matter serious."
 
173
  if "not legal advice" not in answer.lower():
174
  answer += "\n\n⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
175
 
176
+ # Add sources if we have them
177
+ if sources and len(answer) < 400:
178
+ answer += "\n\n📚 Sources:\n" + "\n".join(sources[:2]) # Limit to 2 sources
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  history.append(("You: " + user_input, "Bot: " + answer))
181
 
182
+ # Limit history
183
  if len(history) > 5:
184
  history = history[-5:]
185
 
 
187
 
188
  except Exception as e:
189
  if lang_choice == "pidgin":
190
+ error_msg = f"Sorry o, I get wahala: {str(e)[:50]}..."
191
  else:
192
+ error_msg = f"Sorry, I encountered an error: {str(e)[:50]}..."
193
  history.append(("You: " + user_input, "Bot: " + error_msg))
194
  return history, history
195