|
|
""" Interactive RAG chatbot using Gradio.
|
|
|
Name: Constitution Of India RAG Chatbot
|
|
|
Phi3-mini-4k + MiniLM + chromaDB
|
|
|
"""
|
|
|
|
|
|
import os
|
|
|
from pathlib import Path
|
|
|
from turtle import undo
|
|
|
import gradio as gr
|
|
|
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, PromptTemplate,Settings
|
|
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
|
|
from llama_index.llms.huggingface import HuggingFaceLLM
|
|
|
from llama_index.core.memory import ChatMemoryBuffer
|
|
|
from textblob import TextBlob
|
|
|
from typing import List, Tuple
|
|
|
from transformers import AutoTokenizer
|
|
|
|
|
|
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
|
CHROMA_DB_PATH = "./chroma_db"
|
|
|
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
|
|
|
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
|
|
|
MAX_HISTORY_TOKENS = 8000
|
|
|
TOP_K = 4
|
|
|
|
|
|
|
|
|
Settings.embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL, device="cpu")
|
|
|
|
|
|
|
|
|
|
|
|
llm = HuggingFaceLLM(
|
|
|
model_name=MODEL_NAME,
|
|
|
tokenizer_name=MODEL_NAME,
|
|
|
context_window=32768,
|
|
|
max_new_tokens=512,
|
|
|
device_map="cpu")
|
|
|
|
|
|
qa_prompt = PromptTemplate(
|
|
|
"""<|im_start|>system
|
|
|
You are an expert on the Constitution of India. Your job is to answer questions using ONLY the provided Constitution text excerpts.
|
|
|
|
|
|
RULES (MANDATORY):
|
|
|
1. Answer using ONLY the context provided below
|
|
|
2. If the answer is NOT in the context, respond EXACTLY: "β Not found in Constitution of India"
|
|
|
3. Cite specific Article/Section numbers when possible
|
|
|
4. Never use external knowledge or general facts
|
|
|
5. Be precise, legal, and constitutional in tone
|
|
|
6. If asked about creator - Respond with Abhijeet M
|
|
|
|
|
|
CONTEXT FROM CONSTITUTION:
|
|
|
{context_str}
|
|
|
|
|
|
QUESTION: {query_str}<|im_end|>
|
|
|
<|im_start|>assistant"""
|
|
|
)
|
|
|
|
|
|
class ConstitutionRAGChatBot:
|
|
|
def __init__(self):
|
|
|
if not os.path.exists(CHROMA_DB_PATH):
|
|
|
raise FileNotFoundError(f"ChromaDB index not found at {CHROMA_DB_PATH}. Run indexing first.")
|
|
|
|
|
|
|
|
|
storage_context = StorageContext.from_defaults(persist_dir=CHROMA_DB_PATH)
|
|
|
self.index = load_index_from_storage(storage_context)
|
|
|
|
|
|
self.query_engine = self.index.as_query_engine(llm=llm, chat_mode=True, similarity_top_k=TOP_K, response_mode="compact", text_qa_template=qa_prompt, memory=ChatMemoryBuffer.from_defaults(token_limit=MAX_HISTORY_TOKENS))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess_query(self, query: str) -> str:
|
|
|
""" Preprocess user query to improve accuracy. """
|
|
|
|
|
|
corrected_query = TextBlob(query.strip()).correct()
|
|
|
return str(corrected_query)
|
|
|
|
|
|
def chat(self, message: str, history: List[Tuple[str, str]]) -> str:
|
|
|
""" Callback """
|
|
|
if not message.strip():
|
|
|
return "Please, Stick to the questions regarding the Constitutions. Thanks!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
clean_query = self.preprocess_query(message)
|
|
|
|
|
|
response = self.query_engine.query(clean_query)
|
|
|
|
|
|
|
|
|
|
|
|
if "Not Found" in response.response.lower():
|
|
|
return "Its my Bad. Might be there is no information on this topic into the constitution of India or Legal language is too hard for me too.. ;)"
|
|
|
return response.response
|
|
|
except Exception as e:
|
|
|
return f"Error: {str(e)}.\n Try rephrasing your question in less workds"
|
|
|
|
|
|
def create_demo():
|
|
|
rag = ConstitutionRAGChatBot()
|
|
|
demo = gr.ChatInterface(
|
|
|
fn=rag.chat,
|
|
|
title = 'YourHonor',
|
|
|
description="Ask precise questions about Articles, Rights, Duties, Amendments. ",
|
|
|
theme="soft",
|
|
|
examples=[
|
|
|
"What does Article 14 say?",
|
|
|
"Fundamental Rights list?",
|
|
|
"President election process?",
|
|
|
"Emergency provisions?",
|
|
|
],
|
|
|
cache_examples=False,
|
|
|
retry_btn="Ask Again",
|
|
|
undo_btn="Undo",
|
|
|
submit_btn="Order!Order!"
|
|
|
)
|
|
|
return demo
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
demo = create_demo()
|
|
|
|
|
|
|
|
|
demo.launch() |