Pdf-RAG-Chatbot / app.py
Bijay13's picture
made the status box bigger
8707156
import os
import gradio as gr
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain_community.document_loaders import PyPDFLoader
import tempfile
import shutil
MODEL_NAME = "llama-3.3-70b-versatile"
DEFAULT_API_KEY = os.getenv("GROQ_API_KEY", "")
# Global variables
vectorstore = None
conversation_chain = None
chat_history = []
def process_pdf(pdf_file, api_key):
"""Process uploaded PDF and create vector store"""
global vectorstore, conversation_chain, chat_history
if not api_key:
return "Please provide a Groq API key first.", None
if pdf_file is None:
return "Please upload a PDF file.", None
try:
# Save uploaded file temporarily
temp_dir = tempfile.mkdtemp()
temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf")
shutil.copy(pdf_file.name, temp_pdf_path)
# Load PDF
loader = PyPDFLoader(temp_pdf_path)
documents = loader.load()
# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_documents(documents)
# Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vectorstore = FAISS.from_documents(chunks, embeddings)
# Initialize LLM
llm = ChatGroq(
groq_api_key=api_key,
model_name=MODEL_NAME,
temperature=0.7,
max_tokens=1024
)
# Create conversation chain
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True,
output_key="answer"
)
conversation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
memory=memory,
return_source_documents=True
)
# Reset chat history
chat_history = []
# Cleanup
shutil.rmtree(temp_dir)
return f"✅ PDF processed successfully! Found {len(chunks)} text chunks. You can now ask questions about the document.", []
except Exception as e:
return f"Error processing PDF: {str(e)}", None
def chat_with_pdf(message, chat_history_ui, api_key):
"""Handle chat interactions with the PDF content"""
global conversation_chain, chat_history
if not message.strip():
return chat_history_ui, ""
if conversation_chain is None:
chat_history_ui.append({
"role": "user",
"content": message
})
chat_history_ui.append({
"role": "assistant",
"content": "Please upload a PDF file first before asking questions."
})
return chat_history_ui, ""
try:
# Add user message
chat_history_ui.append({
"role": "user",
"content": message
})
# Get response from RAG chain
response = conversation_chain({"question": message})
answer = response["answer"]
# Add assistant response
chat_history_ui.append({
"role": "assistant",
"content": answer
})
return chat_history_ui, ""
except Exception as e:
chat_history_ui.append({
"role": "assistant",
"content": f"Error: {str(e)}"
})
return chat_history_ui, ""
def reset_chat():
"""Reset the conversation"""
global conversation_chain, vectorstore, chat_history
conversation_chain = None
vectorstore = None
chat_history = []
return [], "Ready to upload a new PDF."
# Build Gradio Interface
with gr.Blocks(title="PDF RAG Chatbot") as demo:
gr.Markdown("# 📄 PDF RAG Chatbot")
gr.Markdown("Upload a PDF and chat with its content using AI")
gr.Markdown(f"**Model:** `{MODEL_NAME}`")
with gr.Row():
with gr.Column(scale=1):
if not DEFAULT_API_KEY:
api_key_input = gr.Textbox(
label="Groq API Key",
placeholder="Enter your Groq API key here...",
type="password"
)
else:
api_key_input = gr.Textbox(
type="password",
value=DEFAULT_API_KEY,
visible=False
)
pdf_upload = gr.File(
label="Upload PDF",
file_types=[".pdf"],
type="filepath"
)
process_btn = gr.Button("Process PDF", variant="primary")
status_text = gr.Textbox(
label="Status",
value="Upload a PDF to get started.",
interactive=False,
lines=3,
max_lines=5
)
clear_btn = gr.Button("Reset Chat", variant="stop")
with gr.Column(scale=2):
chatbot = gr.Chatbot(height=500)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Ask a question about the PDF...",
scale=4
)
submit_btn = gr.Button("Send", scale=1)
if not DEFAULT_API_KEY:
gr.Markdown("### Instructions:")
gr.Markdown("1. Get a free API key from [Groq Console](https://console.groq.com)")
gr.Markdown("2. Enter your API key above")
gr.Markdown("3. Upload a PDF file")
gr.Markdown("4. Ask questions about the content!")
# Event handlers
process_btn.click(
process_pdf,
inputs=[pdf_upload, api_key_input],
outputs=[status_text, chatbot]
)
msg.submit(
chat_with_pdf,
inputs=[msg, chatbot, api_key_input],
outputs=[chatbot, msg]
)
submit_btn.click(
chat_with_pdf,
inputs=[msg, chatbot, api_key_input],
outputs=[chatbot, msg]
)
clear_btn.click(
reset_chat,
outputs=[chatbot, status_text]
)
if __name__ == "__main__":
demo.launch()