Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

menikev commited on Aug 17, 2025

Commit

33bd02a

verified ·

1 Parent(s): a508099

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -54

app.py CHANGED Viewed

@@ -28,37 +28,44 @@ retriever = get_retriever()
 # =====================================================
-# LLM Setup (lighter model for CPU Spaces)
 # =====================================================
-pipe = pipeline(
-    "text-generation",
-    model="google/flan-t5-base",   # ✅ smaller + CPU friendly
-    max_new_tokens=256,  # Reduced from 512 to fit within context
-    temperature=0.7,
-    do_sample=True,
-    pad_token_id=0,  # Add padding token
-    truncation=True,
-    return_full_text=False  # Only return generated text, not the prompt
-)
 llm = HuggingFacePipeline(pipeline=pipe)
 # =====================================================
-# Custom prompt template for better responses
 # =====================================================
-custom_template = """Based on the following Nigerian law documents, answer the user's question clearly and directly.
 Context: {context}
 Question: {question}
-Instructions:
-- Give a direct, helpful answer
-- Quote specific sections when relevant
-- Use simple, clear language
-- For greetings, respond politely and ask how you can help with Nigerian law
-Answer:"""
 PROMPT = PromptTemplate(
     template=custom_template, input_variables=["context", "question"]
@@ -84,11 +91,11 @@ qa_chain = ConversationalRetrievalChain.from_llm(
 # =====================================================
-# Chat function with better response handling
 # =====================================================
 def answer_question(user_input, lang_choice, history=[]):
     try:
-        # Handle greetings and simple queries
         user_lower = user_input.lower().strip()
         if user_lower in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]:
             if lang_choice == "pidgin":
@@ -99,24 +106,66 @@ def answer_question(user_input, lang_choice, history=[]):
             history.append(("You: " + user_input, "Bot: " + response))
             return history, history
-        # Truncate user input if too long
-        max_input_length = 200  # Limit user input length
-        if len(user_input) > max_input_length:
-            user_input = user_input[:max_input_length] + "..."
-        # Run QA with simple question
-        result = qa_chain.invoke({"question": user_input})
-        answer = result["answer"]
-        # Clean up the answer - remove any retrieval artifacts
-        if "Use the following pieces of context" in answer:
-            # If the model returns retrieval instructions, provide a fallback
             if lang_choice == "pidgin":
-                answer = "I dey try find information about your question for Nigerian law documents. Wetin specifically you wan know? ⚠️ No be legal advice o."
             else:
-                answer = "I'm searching through Nigerian law documents for your question. Could you be more specific about what you'd like to know? ⚠️ This is not legal advice."
-        # Add disclaimer if not present
         if lang_choice == "pidgin":
             if "No be legal advice" not in answer:
                 answer += "\n\n⚠️ No be legal advice o, abeg meet lawyer if matter serious."
@@ -124,24 +173,13 @@ def answer_question(user_input, lang_choice, history=[]):
             if "not legal advice" not in answer.lower():
                 answer += "\n\n⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
-        # Collect sources (with sections) - limit to top 3
-        sources = []
-        for doc in result["source_documents"][:3]:  # Limit to top 3 sources
-            section = doc.metadata.get("section", "Unknown Section")
-            source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
-            sources.append(f"[{section}] from {source}")
-        if sources and len(answer) < 400:  # Only add sources if answer isn't too long
-            answer += "\n\n📚 Sources:\n" + "\n".join(sources)
-        # Truncate answer if too long
-        max_answer_length = 600
-        if len(answer) > max_answer_length:
-            answer = answer[:max_answer_length] + "...\n\n⚠️ Response truncated due to length limits."
         history.append(("You: " + user_input, "Bot: " + answer))
-        # Limit history to last 5 exchanges to prevent memory overflow
         if len(history) > 5:
             history = history[-5:]
@@ -149,9 +187,9 @@ def answer_question(user_input, lang_choice, history=[]):
     except Exception as e:
         if lang_choice == "pidgin":
-            error_msg = f"Sorry o, I get small wahala: {str(e)[:50]}... Try ask again."
         else:
-            error_msg = f"Sorry, I encountered an error: {str(e)[:50]}... Please try asking again."
         history.append(("You: " + user_input, "Bot: " + error_msg))
         return history, history

 # =====================================================
+# LLM Setup (using a better model for text generation)
 # =====================================================
+try:
+    # Try a more suitable model for text generation
+    pipe = pipeline(
+        "text2text-generation",  # Better task for Flan-T5
+        model="google/flan-t5-base",
+        max_new_tokens=200,
+        temperature=0.7,
+        do_sample=True,
+        truncation=True
+    )
+except:
+    # Fallback to a simpler approach
+    pipe = pipeline(
+        "text-generation",
+        model="distilgpt2",  # Alternative lightweight model
+        max_new_tokens=200,
+        temperature=0.7,
+        do_sample=True,
+        pad_token_id=50256,
+        truncation=True,
+        return_full_text=False
+    )
 llm = HuggingFacePipeline(pipeline=pipe)
 # =====================================================
+# Custom prompt template optimized for Flan-T5
 # =====================================================
+custom_template = """Answer the question about Nigerian law based on the context provided.
 Context: {context}
 Question: {question}
+Answer the question directly and clearly. Include relevant legal sections if available."""
 PROMPT = PromptTemplate(
     template=custom_template, input_variables=["context", "question"]
 # =====================================================
+# Chat function with fallback responses
 # =====================================================
 def answer_question(user_input, lang_choice, history=[]):
     try:
+        # Handle greetings
         user_lower = user_input.lower().strip()
         if user_lower in ["hello", "hi", "hey", "good morning", "good afternoon", "good evening"]:
             if lang_choice == "pidgin":
             history.append(("You: " + user_input, "Bot: " + response))
             return history, history
+        # Get relevant documents first
+        docs = retriever.get_relevant_documents(user_input)
+        if not docs:
             if lang_choice == "pidgin":
+                response = "Sorry o, I no find information about dat question for my database. Try ask another way? ⚠️ No be legal advice o."
+            else:
+                response = "Sorry, I couldn't find information about that question in my database. Could you try asking in a different way? ⚠️ This is not legal advice."
+            history.append(("You: " + user_input, "Bot: " + response))
+            return history, history
+        # Create a simple context from the documents
+        context_parts = []
+        sources = []
+        for i, doc in enumerate(docs[:3]):  # Use top 3 docs
+            content = doc.page_content[:300]  # Limit content length
+            context_parts.append(f"Document {i+1}: {content}")
+            section = doc.metadata.get("section", "Unknown Section")
+            source = doc.metadata.get("source", "Unknown Document").replace(".pdf", "")
+            sources.append(f"[{section}] from {source}")
+        context = "\n\n".join(context_parts)
+        # Try the QA chain
+        try:
+            result = qa_chain.invoke({"question": user_input})
+            answer = result.get("answer", "").strip()
+        except:
+            answer = ""
+        # If no good answer from LLM, create a fallback response
+        if not answer or len(answer) < 10 or "Use the following" in answer:
+            # Create a basic response based on the topic
+            if any(word in user_input.lower() for word in ["tenant", "landlord", "rent"]):
+                if lang_choice == "pidgin":
+                    answer = "For Nigerian law, tenant get rights wey include: right to peaceful enjoyment of property, right to proper notice before eviction, and right to habitable living conditions. Check your tenancy agreement and local state laws for specific details."
+                else:
+                    answer = "Under Nigerian law, tenants have rights including: right to peaceful enjoyment of the property, right to proper notice before eviction, and right to habitable living conditions. Check your tenancy agreement and applicable state laws for specific provisions."
+            elif any(word in user_input.lower() for word in ["employee", "worker", "job", "employment"]):
+                if lang_choice == "pidgin":
+                    answer = "Nigerian Labour Act dey protect workers with rights like: right to fair wages, safe working conditions, reasonable working hours, and protection from unfair dismissal. Check the Labour Act for full details."
+                else:
+                    answer = "The Nigerian Labour Act protects employees with rights including: right to fair wages, safe working conditions, reasonable working hours, and protection from unfair dismissal. Refer to the Labour Act for comprehensive details."
+            elif any(word in user_input.lower() for word in ["data", "privacy", "personal information"]):
+                if lang_choice == "pidgin":
+                    answer = "Nigeria Data Protection Act (NDPR) give you rights over your personal data including: right to know how your data dey used, right to correct wrong information, and right to request deletion of your data."
+                else:
+                    answer = "The Nigeria Data Protection Regulation (NDPR) grants you rights over personal data including: right to know how your data is used, right to correct inaccurate information, and right to request deletion of your data."
             else:
+                if lang_choice == "pidgin":
+                    answer = "I get information about dat topic for my database, but I no fit give clear answer now. Try be more specific with your question?"
+                else:
+                    answer = "I have information about that topic in my database, but I can't provide a clear answer right now. Could you be more specific with your question?"
+        # Add disclaimer
         if lang_choice == "pidgin":
             if "No be legal advice" not in answer:
                 answer += "\n\n⚠️ No be legal advice o, abeg meet lawyer if matter serious."
             if "not legal advice" not in answer.lower():
                 answer += "\n\n⚠️ This is not legal advice. Please consult a qualified lawyer for specific issues."
+        # Add sources if we have them
+        if sources and len(answer) < 400:
+            answer += "\n\n📚 Sources:\n" + "\n".join(sources[:2])  # Limit to 2 sources
         history.append(("You: " + user_input, "Bot: " + answer))
+        # Limit history
         if len(history) > 5:
             history = history[-5:]
     except Exception as e:
         if lang_choice == "pidgin":
+            error_msg = f"Sorry o, I get wahala: {str(e)[:50]}..."
         else:
+            error_msg = f"Sorry, I encountered an error: {str(e)[:50]}..."
         history.append(("You: " + user_input, "Bot: " + error_msg))
         return history, history