Spaces:

devAby
/

YourHonor

Sleeping

File size: 5,190 Bytes

""" Interactive RAG chatbot using Gradio.

Name: Constitution Of India RAG Chatbot

Phi3-mini-4k + MiniLM + chromaDB

 """

import os
from pathlib import Path
from turtle import undo
import gradio as gr
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, PromptTemplate,Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.memory import ChatMemoryBuffer
from textblob import TextBlob
from typing import List, Tuple
from transformers import AutoTokenizer

EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
CHROMA_DB_PATH = "./chroma_db"
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"  # ✅ No tokenizer bugs
MAX_HISTORY_TOKENS = 8000
TOP_K = 4

# user query embeded with this model
Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL, device="cpu")
# phi3 LLm (downloads ~2GB on first use)
# Model name and its tokenizer name are the same most of the times. check HF for tokenizer name if not found.

llm = HuggingFaceLLM(
model_name=MODEL_NAME,
tokenizer_name=MODEL_NAME,
context_window=32768,
max_new_tokens=512,
device_map="cpu")

qa_prompt = PromptTemplate(
    """<|im_start|>system

        You are an expert on the Constitution of India. Your job is to answer questions using ONLY the provided Constitution text excerpts.



        RULES (MANDATORY):

        1. Answer using ONLY the context provided below

        2. If the answer is NOT in the context, respond EXACTLY: "❌ Not found in Constitution of India"

        3. Cite specific Article/Section numbers when possible

        4. Never use external knowledge or general facts

        5. Be precise, legal, and constitutional in tone

        6. If asked about creator - Respond with Abhijeet M



        CONTEXT FROM CONSTITUTION:

        {context_str}



        QUESTION: {query_str}<|im_end|>

        <|im_start|>assistant"""
)

class ConstitutionRAGChatBot:
    def __init__(self):
        if not os.path.exists(CHROMA_DB_PATH):
            raise FileNotFoundError(f"ChromaDB index not found at {CHROMA_DB_PATH}. Run indexing first.")
        
        # load index from storage -- already computed by index_builder.py
        storage_context = StorageContext.from_defaults(persist_dir=CHROMA_DB_PATH)
        self.index = load_index_from_storage(storage_context)

        self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
        # self.chat_engine = self.index.as_chat_engine(
        #     chat_mode="context",
        #     query_engine=self.query_engine,     # Injects your custom prompt + settings
        #     memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))

    def preprocess_query(self, query: str) -> str:
        """ Preprocess user query to improve accuracy. """
        # correct spelling
        corrected_query = TextBlob(query.strip()).correct()
        return str(corrected_query)
    
    def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
        """ Callback """
        if not message.strip():
            return "Please, Stick to the questions regarding the Constitutions. Thanks!"
        # for user_msg, bot_msg in history[-3:]:  # Last 3 exchanges
        #     print ('History:\n')
        #     print(user_msg,"\n", bot_msg)
        #     print ('Ends..:\n')
        try:
            clean_query = self.preprocess_query(message)
            # query RAG (auto embed, retrives, generate)
            response = self.query_engine.query(clean_query)
            # response = self.chat_engine.chat(clean_query)
            # print(f"📜 Retrieved context: {response.get_formatted_sources()}")

            if "Not Found" in response.response.lower():
                return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)"
            return response.response
        except Exception as e:
            return f"Error: {str(e)}.\n Try rephrasing your question in less workds"

def create_demo():
    rag = ConstitutionRAGChatBot()
    demo = gr.ChatInterface(
        fn=rag.chat,
        title = 'YourHonor',
        description="Ask precise questions about Articles, Rights, Duties, Amendments. ",
        theme="soft",
        examples=[
            "What does Article 14 say?",
            "Fundamental Rights list?",
            "President election process?",
            "Emergency provisions?",
        ],
        cache_examples=False,
        retry_btn="Ask Again",
        undo_btn="Undo",
        submit_btn="Order!Order!"
    )
    return demo


if __name__ == "__main__":
    # Local test
    demo = create_demo()
    # demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
    # demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
    demo.launch()