""" Interactive RAG chatbot using Gradio. Name: Constitution Of India RAG Chatbot Phi3-mini-4k + MiniLM + chromaDB """ import os from pathlib import Path from turtle import undo import gradio as gr from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, PromptTemplate,Settings from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.llms.huggingface import HuggingFaceLLM from llama_index.core.memory import ChatMemoryBuffer from textblob import TextBlob from typing import List, Tuple from transformers import AutoTokenizer EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" CHROMA_DB_PATH = "./chroma_db" MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct" # ✅ No tokenizer bugs MAX_HISTORY_TOKENS = 8000 TOP_K = 4 # user query embeded with this model Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL, device="cpu") # phi3 LLm (downloads ~2GB on first use) # Model name and its tokenizer name are the same most of the times. check HF for tokenizer name if not found. llm = HuggingFaceLLM( model_name=MODEL_NAME, tokenizer_name=MODEL_NAME, context_window=32768, max_new_tokens=512, device_map="cpu") qa_prompt = PromptTemplate( """<|im_start|>system You are an expert on the Constitution of India. Your job is to answer questions using ONLY the provided Constitution text excerpts. RULES (MANDATORY): 1. Answer using ONLY the context provided below 2. If the answer is NOT in the context, respond EXACTLY: "❌ Not found in Constitution of India" 3. Cite specific Article/Section numbers when possible 4. Never use external knowledge or general facts 5. Be precise, legal, and constitutional in tone 6. If asked about creator - Respond with Abhijeet M CONTEXT FROM CONSTITUTION: {context_str} QUESTION: {query_str}<|im_end|> <|im_start|>assistant""" ) class ConstitutionRAGChatBot: def __init__(self): if not os.path.exists(CHROMA_DB_PATH): raise FileNotFoundError(f"ChromaDB index not found at {CHROMA_DB_PATH}. Run indexing first.") # load index from storage -- already computed by index_builder.py storage_context = StorageContext.from_defaults(persist_dir=CHROMA_DB_PATH) self.index = load_index_from_storage(storage_context) self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS)) # self.chat_engine = self.index.as_chat_engine( # chat_mode="context", # query_engine=self.query_engine, # Injects your custom prompt + settings # memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS)) def preprocess_query(self, query: str) -> str: """ Preprocess user query to improve accuracy. """ # correct spelling corrected_query = TextBlob(query.strip()).correct() return str(corrected_query) def chat(self, message: str, history: List[Tuple[str, str]]) -> str: """ Callback """ if not message.strip(): return "Please, Stick to the questions regarding the Constitutions. Thanks!" # for user_msg, bot_msg in history[-3:]: # Last 3 exchanges # print ('History:\n') # print(user_msg,"\n", bot_msg) # print ('Ends..:\n') try: clean_query = self.preprocess_query(message) # query RAG (auto embed, retrives, generate) response = self.query_engine.query(clean_query) # response = self.chat_engine.chat(clean_query) # print(f"📜 Retrieved context: {response.get_formatted_sources()}") if "Not Found" in response.response.lower(): return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)" return response.response except Exception as e: return f"Error: {str(e)}.\n Try rephrasing your question in less workds" def create_demo(): rag = ConstitutionRAGChatBot() demo = gr.ChatInterface( fn=rag.chat, title = 'YourHonor', description="Ask precise questions about Articles, Rights, Duties, Amendments. ", theme="soft", examples=[ "What does Article 14 say?", "Fundamental Rights list?", "President election process?", "Emergency provisions?", ], cache_examples=False, retry_btn="Ask Again", undo_btn="Undo", submit_btn="Order!Order!" ) return demo if __name__ == "__main__": # Local test demo = create_demo() # demo.launch(server_name="0.0.0.0", server_port=7860, debug=True) # demo.launch(server_name="0.0.0.0", server_port=7860, share=True) demo.launch()