Spaces:

Bijay13
/

Pdf-RAG-Chatbot

Sleeping

App Files Files Community

Pdf-RAG-Chatbot / app.py

Bijay13

made the status box bigger

8707156 about 1 month ago

raw

history blame contribute delete

6.72 kB

	import os
	import gradio as gr
	from langchain_groq import ChatGroq
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory
	from langchain_community.document_loaders import PyPDFLoader
	import tempfile
	import shutil

	MODEL_NAME = "llama-3.3-70b-versatile"
	DEFAULT_API_KEY = os.getenv("GROQ_API_KEY", "")

	# Global variables
	vectorstore = None
	conversation_chain = None
	chat_history = []

	def process_pdf(pdf_file, api_key):
	"""Process uploaded PDF and create vector store"""
	global vectorstore, conversation_chain, chat_history

	if not api_key:
	return "Please provide a Groq API key first.", None

	if pdf_file is None:
	return "Please upload a PDF file.", None

	try:
	# Save uploaded file temporarily
	temp_dir = tempfile.mkdtemp()
	temp_pdf_path = os.path.join(temp_dir, "uploaded.pdf")
	shutil.copy(pdf_file.name, temp_pdf_path)

	# Load PDF
	loader = PyPDFLoader(temp_pdf_path)
	documents = loader.load()

	# Split documents into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len
	)
	chunks = text_splitter.split_documents(documents)

	# Create embeddings and vector store
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)
	vectorstore = FAISS.from_documents(chunks, embeddings)

	# Initialize LLM
	llm = ChatGroq(
	groq_api_key=api_key,
	model_name=MODEL_NAME,
	temperature=0.7,
	max_tokens=1024
	)

	# Create conversation chain
	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True,
	output_key="answer"
	)

	conversation_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vectorstore.as_retriever(search_kwargs={"k": 3}),
	memory=memory,
	return_source_documents=True
	)

	# Reset chat history
	chat_history = []

	# Cleanup
	shutil.rmtree(temp_dir)

	return f"✅ PDF processed successfully! Found {len(chunks)} text chunks. You can now ask questions about the document.", []

	except Exception as e:
	return f"Error processing PDF: {str(e)}", None

	def chat_with_pdf(message, chat_history_ui, api_key):
	"""Handle chat interactions with the PDF content"""
	global conversation_chain, chat_history

	if not message.strip():
	return chat_history_ui, ""

	if conversation_chain is None:
	chat_history_ui.append({
	"role": "user",
	"content": message
	})
	chat_history_ui.append({
	"role": "assistant",
	"content": "Please upload a PDF file first before asking questions."
	})
	return chat_history_ui, ""

	try:
	# Add user message
	chat_history_ui.append({
	"role": "user",
	"content": message
	})

	# Get response from RAG chain
	response = conversation_chain({"question": message})
	answer = response["answer"]

	# Add assistant response
	chat_history_ui.append({
	"role": "assistant",
	"content": answer
	})

	return chat_history_ui, ""

	except Exception as e:
	chat_history_ui.append({
	"role": "assistant",
	"content": f"Error: {str(e)}"
	})
	return chat_history_ui, ""

	def reset_chat():
	"""Reset the conversation"""
	global conversation_chain, vectorstore, chat_history
	conversation_chain = None
	vectorstore = None
	chat_history = []
	return [], "Ready to upload a new PDF."

	# Build Gradio Interface
	with gr.Blocks(title="PDF RAG Chatbot") as demo:
	gr.Markdown("# 📄 PDF RAG Chatbot")
	gr.Markdown("Upload a PDF and chat with its content using AI")
	gr.Markdown(f"Model: `{MODEL_NAME}`")

	with gr.Row():
	with gr.Column(scale=1):
	if not DEFAULT_API_KEY:
	api_key_input = gr.Textbox(
	label="Groq API Key",
	placeholder="Enter your Groq API key here...",
	type="password"
	)
	else:
	api_key_input = gr.Textbox(
	type="password",
	value=DEFAULT_API_KEY,
	visible=False
	)

	pdf_upload = gr.File(
	label="Upload PDF",
	file_types=[".pdf"],
	type="filepath"
	)

	process_btn = gr.Button("Process PDF", variant="primary")
	status_text = gr.Textbox(
	label="Status",
	value="Upload a PDF to get started.",
	interactive=False,
	lines=3,
	max_lines=5
	)

	clear_btn = gr.Button("Reset Chat", variant="stop")

	with gr.Column(scale=2):
	chatbot = gr.Chatbot(height=500)

	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Ask a question about the PDF...",
	scale=4
	)
	submit_btn = gr.Button("Send", scale=1)

	if not DEFAULT_API_KEY:
	gr.Markdown("### Instructions:")
	gr.Markdown("1. Get a free API key from [Groq Console](https://console.groq.com)")
	gr.Markdown("2. Enter your API key above")
	gr.Markdown("3. Upload a PDF file")
	gr.Markdown("4. Ask questions about the content!")

	# Event handlers
	process_btn.click(
	process_pdf,
	inputs=[pdf_upload, api_key_input],
	outputs=[status_text, chatbot]
	)

	msg.submit(
	chat_with_pdf,
	inputs=[msg, chatbot, api_key_input],
	outputs=[chatbot, msg]
	)

	submit_btn.click(
	chat_with_pdf,
	inputs=[msg, chatbot, api_key_input],
	outputs=[chatbot, msg]
	)

	clear_btn.click(
	reset_chat,
	outputs=[chatbot, status_text]
	)

	if __name__ == "__main__":
	demo.launch()