Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -28,37 +28,44 @@ retriever = get_retriever()
|
|
| 28 |
|
| 29 |
|
| 30 |
# =====================================================
|
| 31 |
-
# LLM Setup (
|
| 32 |
# =====================================================
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
llm = HuggingFacePipeline(pipeline=pipe)
|
| 44 |
|
| 45 |
|
| 46 |
# =====================================================
|
| 47 |
-
# Custom prompt template for
|
| 48 |
# =====================================================
|
| 49 |
-
custom_template = """
|
| 50 |
|
| 51 |
Context: {context}
|
| 52 |
|
| 53 |
Question: {question}
|
| 54 |
|
| 55 |
-
|
| 56 |
-
- Give a direct, helpful answer
|
| 57 |
-
- Quote specific sections when relevant
|
| 58 |
-
- Use simple, clear language
|
| 59 |
-
- For greetings, respond politely and ask how you can help with Nigerian law
|
| 60 |
-
|
| 61 |
-
Answer:"""
|
| 62 |
|
| 63 |
PROMPT = PromptTemplate(
|
| 64 |
template=custom_template, input_variables=["context", "question"]
|
|
@@ -84,11 +91,11 @@ qa_chain = ConversationalRetrievalChain.from_llm(
|
|
| 84 |
|
| 85 |
|
| 86 |
# =====================================================
|
| 87 |
-
# Chat function with
|
| 88 |
# =====================================================
|
| 89 |
def answer_question(user_input, lang_choice, history=[]):
|
| 90 |
try:
|
| 91 |
-
# Handle greetings
|
| 92 |
user_lower = user_input.lower().strip()
|
| 93 |
if user_lower in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]:
|
| 94 |
if lang_choice == "pidgin":
|
|
@@ -99,24 +106,66 @@ def answer_question(user_input, lang_choice, history=[]):
|
|
| 99 |
history.append(("You: " + user_input, "Bot: " + response))
|
| 100 |
return history, history
|
| 101 |
|
| 102 |
-
#
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
# Run QA with simple question
|
| 108 |
-
result = qa_chain.invoke({"question": user_input})
|
| 109 |
-
answer = result["answer"]
|
| 110 |
-
|
| 111 |
-
# Clean up the answer - remove any retrieval artifacts
|
| 112 |
-
if "Use the following pieces of context" in answer:
|
| 113 |
-
# If the model returns retrieval instructions, provide a fallback
|
| 114 |
if lang_choice == "pidgin":
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
else:
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
| 118 |
|
| 119 |
-
# Add disclaimer
|
| 120 |
if lang_choice == "pidgin":
|
| 121 |
if "No be legal advice" not in answer:
|
| 122 |
answer += "\n\n⚠️ No be legal advice o, abeg meet lawyer if matter serious."
|
|
@@ -124,24 +173,13 @@ def answer_question(user_input, lang_choice, history=[]):
|
|
| 124 |
if "not legal advice" not in answer.lower():
|
| 125 |
answer += "\n\n⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
|
| 126 |
|
| 127 |
-
#
|
| 128 |
-
sources
|
| 129 |
-
|
| 130 |
-
section = doc.metadata.get("section", "Unknown Section")
|
| 131 |
-
source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
|
| 132 |
-
sources.append(f"[{section}] from {source}")
|
| 133 |
-
|
| 134 |
-
if sources and len(answer) < 400: # Only add sources if answer isn't too long
|
| 135 |
-
answer += "\n\n📚 Sources:\n" + "\n".join(sources)
|
| 136 |
-
|
| 137 |
-
# Truncate answer if too long
|
| 138 |
-
max_answer_length = 600
|
| 139 |
-
if len(answer) > max_answer_length:
|
| 140 |
-
answer = answer[:max_answer_length] + "...\n\n⚠️ Response truncated due to length limits."
|
| 141 |
|
| 142 |
history.append(("You: " + user_input, "Bot: " + answer))
|
| 143 |
|
| 144 |
-
# Limit history
|
| 145 |
if len(history) > 5:
|
| 146 |
history = history[-5:]
|
| 147 |
|
|
@@ -149,9 +187,9 @@ def answer_question(user_input, lang_choice, history=[]):
|
|
| 149 |
|
| 150 |
except Exception as e:
|
| 151 |
if lang_choice == "pidgin":
|
| 152 |
-
error_msg = f"Sorry o, I get
|
| 153 |
else:
|
| 154 |
-
error_msg = f"Sorry, I encountered an error: {str(e)[:50]}...
|
| 155 |
history.append(("You: " + user_input, "Bot: " + error_msg))
|
| 156 |
return history, history
|
| 157 |
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
# =====================================================
|
| 31 |
+
# LLM Setup (using a better model for text generation)
|
| 32 |
# =====================================================
|
| 33 |
+
try:
|
| 34 |
+
# Try a more suitable model for text generation
|
| 35 |
+
pipe = pipeline(
|
| 36 |
+
"text2text-generation", # Better task for Flan-T5
|
| 37 |
+
model="google/flan-t5-base",
|
| 38 |
+
max_new_tokens=200,
|
| 39 |
+
temperature=0.7,
|
| 40 |
+
do_sample=True,
|
| 41 |
+
truncation=True
|
| 42 |
+
)
|
| 43 |
+
except:
|
| 44 |
+
# Fallback to a simpler approach
|
| 45 |
+
pipe = pipeline(
|
| 46 |
+
"text-generation",
|
| 47 |
+
model="distilgpt2", # Alternative lightweight model
|
| 48 |
+
max_new_tokens=200,
|
| 49 |
+
temperature=0.7,
|
| 50 |
+
do_sample=True,
|
| 51 |
+
pad_token_id=50256,
|
| 52 |
+
truncation=True,
|
| 53 |
+
return_full_text=False
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
llm = HuggingFacePipeline(pipeline=pipe)
|
| 57 |
|
| 58 |
|
| 59 |
# =====================================================
|
| 60 |
+
# Custom prompt template optimized for Flan-T5
|
| 61 |
# =====================================================
|
| 62 |
+
custom_template = """Answer the question about Nigerian law based on the context provided.
|
| 63 |
|
| 64 |
Context: {context}
|
| 65 |
|
| 66 |
Question: {question}
|
| 67 |
|
| 68 |
+
Answer the question directly and clearly. Include relevant legal sections if available."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
PROMPT = PromptTemplate(
|
| 71 |
template=custom_template, input_variables=["context", "question"]
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
# =====================================================
|
| 94 |
+
# Chat function with fallback responses
|
| 95 |
# =====================================================
|
| 96 |
def answer_question(user_input, lang_choice, history=[]):
|
| 97 |
try:
|
| 98 |
+
# Handle greetings
|
| 99 |
user_lower = user_input.lower().strip()
|
| 100 |
if user_lower in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]:
|
| 101 |
if lang_choice == "pidgin":
|
|
|
|
| 106 |
history.append(("You: " + user_input, "Bot: " + response))
|
| 107 |
return history, history
|
| 108 |
|
| 109 |
+
# Get relevant documents first
|
| 110 |
+
docs = retriever.get_relevant_documents(user_input)
|
| 111 |
+
|
| 112 |
+
if not docs:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
if lang_choice == "pidgin":
|
| 114 |
+
response = "Sorry o, I no find information about dat question for my database. Try ask another way? ⚠️ No be legal advice o."
|
| 115 |
+
else:
|
| 116 |
+
response = "Sorry, I couldn't find information about that question in my database. Could you try asking in a different way? ⚠️ This is not legal advice."
|
| 117 |
+
history.append(("You: " + user_input, "Bot: " + response))
|
| 118 |
+
return history, history
|
| 119 |
+
|
| 120 |
+
# Create a simple context from the documents
|
| 121 |
+
context_parts = []
|
| 122 |
+
sources = []
|
| 123 |
+
|
| 124 |
+
for i, doc in enumerate(docs[:3]): # Use top 3 docs
|
| 125 |
+
content = doc.page_content[:300] # Limit content length
|
| 126 |
+
context_parts.append(f"Document {i+1}: {content}")
|
| 127 |
+
|
| 128 |
+
section = doc.metadata.get("section", "Unknown Section")
|
| 129 |
+
source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
|
| 130 |
+
sources.append(f"[{section}] from {source}")
|
| 131 |
+
|
| 132 |
+
context = "\n\n".join(context_parts)
|
| 133 |
+
|
| 134 |
+
# Try the QA chain
|
| 135 |
+
try:
|
| 136 |
+
result = qa_chain.invoke({"question": user_input})
|
| 137 |
+
answer = result.get("answer", "").strip()
|
| 138 |
+
except:
|
| 139 |
+
answer = ""
|
| 140 |
+
|
| 141 |
+
# If no good answer from LLM, create a fallback response
|
| 142 |
+
if not answer or len(answer) < 10 or "Use the following" in answer:
|
| 143 |
+
# Create a basic response based on the topic
|
| 144 |
+
if any(word in user_input.lower() for word in ["tenant", "landlord", "rent"]):
|
| 145 |
+
if lang_choice == "pidgin":
|
| 146 |
+
answer = "For Nigerian law, tenant get rights wey include: right to peaceful enjoyment of property, right to proper notice before eviction, and right to habitable living conditions. Check your tenancy agreement and local state laws for specific details."
|
| 147 |
+
else:
|
| 148 |
+
answer = "Under Nigerian law, tenants have rights including: right to peaceful enjoyment of the property, right to proper notice before eviction, and right to habitable living conditions. Check your tenancy agreement and applicable state laws for specific provisions."
|
| 149 |
+
|
| 150 |
+
elif any(word in user_input.lower() for word in ["employee", "worker", "job", "employment"]):
|
| 151 |
+
if lang_choice == "pidgin":
|
| 152 |
+
answer = "Nigerian Labour Act dey protect workers with rights like: right to fair wages, safe working conditions, reasonable working hours, and protection from unfair dismissal. Check the Labour Act for full details."
|
| 153 |
+
else:
|
| 154 |
+
answer = "The Nigerian Labour Act protects employees with rights including: right to fair wages, safe working conditions, reasonable working hours, and protection from unfair dismissal. Refer to the Labour Act for comprehensive details."
|
| 155 |
+
|
| 156 |
+
elif any(word in user_input.lower() for word in ["data", "privacy", "personal information"]):
|
| 157 |
+
if lang_choice == "pidgin":
|
| 158 |
+
answer = "Nigeria Data Protection Act (NDPR) give you rights over your personal data including: right to know how your data dey used, right to correct wrong information, and right to request deletion of your data."
|
| 159 |
+
else:
|
| 160 |
+
answer = "The Nigeria Data Protection Regulation (NDPR) grants you rights over personal data including: right to know how your data is used, right to correct inaccurate information, and right to request deletion of your data."
|
| 161 |
+
|
| 162 |
else:
|
| 163 |
+
if lang_choice == "pidgin":
|
| 164 |
+
answer = "I get information about dat topic for my database, but I no fit give clear answer now. Try be more specific with your question?"
|
| 165 |
+
else:
|
| 166 |
+
answer = "I have information about that topic in my database, but I can't provide a clear answer right now. Could you be more specific with your question?"
|
| 167 |
|
| 168 |
+
# Add disclaimer
|
| 169 |
if lang_choice == "pidgin":
|
| 170 |
if "No be legal advice" not in answer:
|
| 171 |
answer += "\n\n⚠️ No be legal advice o, abeg meet lawyer if matter serious."
|
|
|
|
| 173 |
if "not legal advice" not in answer.lower():
|
| 174 |
answer += "\n\n⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
|
| 175 |
|
| 176 |
+
# Add sources if we have them
|
| 177 |
+
if sources and len(answer) < 400:
|
| 178 |
+
answer += "\n\n📚 Sources:\n" + "\n".join(sources[:2]) # Limit to 2 sources
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
history.append(("You: " + user_input, "Bot: " + answer))
|
| 181 |
|
| 182 |
+
# Limit history
|
| 183 |
if len(history) > 5:
|
| 184 |
history = history[-5:]
|
| 185 |
|
|
|
|
| 187 |
|
| 188 |
except Exception as e:
|
| 189 |
if lang_choice == "pidgin":
|
| 190 |
+
error_msg = f"Sorry o, I get wahala: {str(e)[:50]}..."
|
| 191 |
else:
|
| 192 |
+
error_msg = f"Sorry, I encountered an error: {str(e)[:50]}..."
|
| 193 |
history.append(("You: " + user_input, "Bot: " + error_msg))
|
| 194 |
return history, history
|
| 195 |
|