Spaces:

menikev
/

KnowYourRIght-Bot

Sleeping

App Files Files Community

menikev commited on Aug 11, 2025

Commit

5b69f3e

verified ·

1 Parent(s): 369c8e0

Update app.py

Browse files

Files changed (1) hide show

app.py +194 -56

app.py CHANGED Viewed

@@ -1,85 +1,223 @@
-import gradio as gr
 import os
-import time
-from langchain.chains import RetrievalQA
-from langchain.prompts import PromptTemplate
-from langchain_community.llms import HuggingFacePipeline
 from transformers import pipeline
-# Import your retriever (update import path as needed)
-try:
-    from retriever import get_retriever
-except ImportError:
-    from src.retriever import get_retriever
-# Use a lightweight model for fast inference
 pipe = pipeline(
     "text-generation",
-    model="microsoft/DialoGPT-medium",
     device_map="auto",
-    max_new_tokens=150,
     do_sample=False,
-    pad_token_id=tokenizer.eos_token_id
 )
-llm = HuggingFacePipeline(pipeline=pipe)
-retriever = get_retriever()
-# Simple prompt template
-template = """Answer this legal question about Nigeria using the context provided.
-If asked to respond in Nigerian Pidgin, use Nigerian Pidgin.
 Context: {context}
 Question: {question}
-Answer:"""
-prompt = PromptTemplate(input_variables=["question", "context"], template=template)
-qa_chain = RetrievalQA.from_chain_type(
-    llm=llm,
-    retriever=retriever,
-    chain_type="stuff",
-    return_source_documents=True,
-    chain_type_kwargs={"prompt": prompt}
-)
-def answer_question(user_input, lang_choice):
-    if not user_input.strip():
-        return "Please enter a question."
-    start_time = time.time()
     try:
-        if lang_choice == "pidgin":
-            user_input = f"Respond in Nigerian Pidgin: {user_input}"
-        result = qa_chain.invoke({"query": user_input})
-        processing_time = time.time() - start_time
-        answer_text = result["result"]
-        # Collect sources
-        sources = list({doc.metadata.get("source", "Unknown")
-                       for doc in result["source_documents"][:3]})
-        sources_text = "\n".join(f"📄 {os.path.basename(src)}" for src in sources)
-        return f"{answer_text}\n\n**References:**\n{sources_text}\n\n*Response time: {processing_time:.1f}s*"
     except Exception as e:
-        return f"Error: {str(e)}"
-# Gradio interface
-with gr.Blocks(title="KnowYourRight Bot") as demo:
-    gr.Markdown("# 🇳🇬 KnowYourRight Bot\nAsk legal questions in English or Nigerian Pidgin")
-    with gr.Row():
-        question = gr.Textbox(label="Your question", lines=2)
-        language = gr.Radio(["english", "pidgin"], label="Language", value="english")
-    submit = gr.Button("Ask Question", variant="primary")
-    output = gr.Textbox(label="Answer", lines=8)
-    submit.click(answer_question, inputs=[question, language], outputs=output)
-    question.submit(answer_question, inputs=[question, language], outputs=output)
-demo.launch()

+# src/knowyourright_bot.py
 import os
+from sentence_transformers import SentenceTransformer
+import chromadb
+from chromadb.config import Settings
 from transformers import pipeline
+import gradio as gr
+# Configuration
+VECTOR_DIR = "vector_db"
+MODEL_NAME = "microsoft/DialoGPT-medium"  # Free, fast model
+# Initialize embedding model and vector database
+embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet", persist_directory=VECTOR_DIR))
+collection = client.get_collection("laws")
+# Initialize language model
 pipe = pipeline(
     "text-generation",
+    model=MODEL_NAME,
     device_map="auto",
+    max_new_tokens=300,
+    temperature=0.1,
     do_sample=False,
+    pad_token_id=50256
 )
+# English Prompt Template
+ENGLISH_TEMPLATE = """
+You are a knowledgeable legal assistant for Nigerian law. Answer the question using only the provided context.
+Be concise, accurate, and cite specific sections when possible.
+Context: {context}
+Question: {question}
+Answer (in clear English):
+"""
+# Pidgin Prompt Template
+PIDGIN_TEMPLATE = """
+You be legal assistant wey sabi Nigerian law well well. Use only the context wey dem give you answer the question.
+Make your answer short, correct, and talk the specific law section if e dey.
 Context: {context}
 Question: {question}
+Answer for Nigerian Pidgin:
+"""
+def get_relevant_context(question, k=4):
+    """Retrieve relevant legal context from vector database"""
+    try:
+        q_emb = embed_model.encode([question], convert_to_numpy=True)
+        results = collection.query(
+            query_embeddings=q_emb,
+            n_results=k,
+            include=["documents", "metadatas"]
+        )
+        # Format context with sources
+        context_chunks = []
+        sources = []
+        for i, doc in enumerate(results['documents'][0]):
+            source = results['metadatas'][0][i].get("source", "Unknown")
+            context_chunks.append(doc)
+            sources.append(source)
+        context = "\n\n".join(context_chunks)
+        return context, sources
+    except Exception as e:
+        print(f"Error retrieving context: {e}")
+        return "", []
+def generate_response(question, language="english"):
+    """Generate response using appropriate prompt template"""
     try:
+        # Get relevant context
+        context, sources = get_relevant_context(question)
+        if not context:
+            return "Sorry, I couldn't find relevant information to answer your question.", []
+        # Choose prompt template based on language
+        if language.lower() == "pidgin":
+            prompt = PIDGIN_TEMPLATE.format(context=context, question=question)
+        else:
+            prompt = ENGLISH_TEMPLATE.format(context=context, question=question)
+        # Generate response
+        response = pipe(prompt, max_new_tokens=256, do_sample=False, pad_token_id=50256)
+        answer = response[0]['generated_text']
+        # Extract only the generated part (remove the prompt)
+        if "Answer" in answer:
+            answer = answer.split("Answer")[-1].strip()
+            if answer.startswith("(in clear English):") or answer.startswith("for Nigerian Pidgin:"):
+                answer = answer.split(":", 1)[-1].strip()
+        return answer, sources
     except Exception as e:
+        error_msg = f"Sorry, I encountered an error: {str(e)}"
+        if language.lower() == "pidgin":
+            error_msg = "Sorry o, something happen when I dey answer your question. Try ask again."
+        return error_msg, []
+def answer_question(user_input, lang_choice):
+    """Main function for processing questions"""
+    if not user_input or len(user_input.strip()) < 3:
+        return "Please ask a more specific question about your legal rights."
+    if len(user_input) > 1000:
+        return "Please ask a shorter question (maximum 1000 characters)."
+    # Generate response
+    answer, sources = generate_response(user_input.strip(), lang_choice)
+    # Format sources
+    if sources:
+        unique_sources = list(set([os.path.basename(src) for src in sources[:3]]))
+        sources_text = "\n".join(f"📄 {src}" for src in unique_sources)
+        formatted_response = f"{answer}\n\n**References:**\n{sources_text}"
+    else:
+        formatted_response = f"{answer}\n\n**References:**\n📄 No sources found"
+    return formatted_response
+def create_gradio_interface():
+    """Create Gradio interface for testing"""
+    with gr.Blocks(
+        title="KnowYourRight Bot - Nigerian Legal Assistant",
+        theme=gr.themes.Soft()
+    ) as demo:
+        gr.Markdown(
+            """
+            # 🇳🇬 KnowYourRight Bot
+            ## Your AI Legal Assistant for Nigerian Law
+            Ask questions about your rights under:
+            - Nigerian Constitution
+            - Labor Laws
+            - Data Protection Regulation (NDPR)
+            - Consumer Protection Act (FCCPA)
+            **Available in English and Nigerian Pidgin**
+            """
+        )
+        with gr.Row():
+            with gr.Column(scale=3):
+                question_input = gr.Textbox(
+                    label="Ask about your legal rights",
+                    placeholder="e.g., Can my landlord evict me without notice?",
+                    lines=3,
+                    max_lines=5
+                )
+            with gr.Column(scale=1):
+                language_choice = gr.Radio(
+                    choices=["english", "pidgin"],
+                    label="Language / Language wey you wan use",
+                    value="english"
+                )
+        submit_btn = gr.Button("Ask Question / Ask Question", variant="primary", size="lg")
+        answer_output = gr.Textbox(
+            label="Answer / Answer",
+            lines=10,
+            max_lines=15
+        )
+        # Example questions
+        gr.Markdown("### Example Questions / Example Questions")
+        examples = [
+            ["Can my employer sack me without notice?", "english"],
+            ["Wetin be my right as tenant?", "pidgin"],
+            ["What does NDPR say about data privacy?", "english"],
+            ["How can I report consumer fraud?", "english"],
+            ["Wetin happen if person collect my data without permission?", "pidgin"]
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=[question_input, language_choice],
+            outputs=answer_output,
+            fn=answer_question
+        )
+        # Event handlers
+        submit_btn.click(
+            fn=answer_question,
+            inputs=[question_input, language_choice],
+            outputs=answer_output
+        )
+        question_input.submit(
+            fn=answer_question,
+            inputs=[question_input, language_choice],
+            outputs=answer_output
+        )
+        # Footer
+        gr.Markdown(
+            """
+            ---
+            **Disclaimer:** This is an AI assistant for informational purposes only.
+            For legal advice, consult a qualified lawyer.
+            Built by **AI Club Lagos** | Open Source Project
+            """
+        )
+    return demo