Spaces:

MuhammadAhmadZia
/

RAG-Chatbot-MSDSF25M004

Sleeping

Muhammad Ahmad Zia

Update app.py

ac4c360 verified 4 months ago

3.39 kB

	import os
	import gradio as gr
	from langchain_groq import ChatGroq
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain.memory import ConversationBufferMemory

	# --- Configuration ---
	# You will set this as a Secret in Hugging Face Spaces, but for local testing:
	# os.environ["GROQ_API_KEY"] = "YOUR_ACTUAL_API_KEY_HERE"

	def initialize_llm():
	"""Initialize the Groq LLM"""
	return ChatGroq(
	temperature=0,
	model_name="llama3-8b-8192"
	)

	def process_pdfs(files):
	"""
	1. Extract text from uploaded PDFs.
	2. Split text into semantic chunks (Enhancement #6).
	3. Create vector embeddings (Enhancement #1).
	"""
	if not files:
	return None, "No files uploaded."

	documents = []
	for file in files:
	loader = PyPDFLoader(file.name)
	documents.extend(loader.load())

	# Enhancement #6: Improved Chunking using RecursiveCharacterTextSplitter
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200
	)
	splits = text_splitter.split_documents(documents)

	# Enhancement #1: Use Sentence Transformers for Embeddings
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Create Vector Store
	vectorstore = FAISS.from_documents(splits, embeddings)

	return vectorstore, f"Processed {len(files)} PDF(s) successfully!"

	def create_conversation_chain(vectorstore):
	"""Create the RAG chain with memory"""
	llm = initialize_llm()

	memory = ConversationBufferMemory(
	memory_key="chat_history",
	return_messages=True
	)

	conversation_chain = ConversationalRetrievalChain.from_llm(
	llm=llm,
	retriever=vectorstore.as_retriever(),
	memory=memory
	)
	return conversation_chain

	# --- Global State for the Chain ---
	conversation = None

	def handle_file_upload(files):
	global conversation
	vectorstore, message = process_pdfs(files)
	if vectorstore:
	conversation = create_conversation_chain(vectorstore)
	return message

	def handle_user_query(message, history):
	global conversation
	if conversation is None:
	return "Please upload and process PDF documents first."

	response = conversation({"question": message})
	return response["answer"]

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 📄 RAG Chatbot with Groq & Llama3")
	gr.Markdown("Upload PDFs, process them, and then ask questions based on their content.")

	with gr.Row():
	file_input = gr.File(label="Upload PDFs", file_count="multiple", file_types=[".pdf"])
	upload_btn = gr.Button("Process Documents")

	status_text = gr.Textbox(label="Status", interactive=False)

	chatbot = gr.ChatInterface(
	fn=handle_user_query,
	title="Chat with your PDFs",
	description="Ask questions about the uploaded documents."
	)

	upload_btn.click(
	fn=handle_file_upload,
	inputs=[file_input],
	outputs=[status_text]
	)

	if __name__ == "__main__":
	demo.launch()