Spaces:

Wosqa
/

rag-pdf-groq-chatbot

Sleeping

App Files Files Community

rag-pdf-groq-chatbot / app.py

Wosqa

Update app.py

34551e8 verified 4 months ago

raw

history blame contribute delete

3.02 kB

	import os
	import gradio as gr
	import PyPDF2
	import numpy as np
	from sentence_transformers import SentenceTransformer
	from sklearn.metrics.pairwise import cosine_similarity
	from groq import Groq

	# ------------------ CONFIG ------------------
	LLM_MODEL = "llama-3.1-8b-instant"
	client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

	# Initialize sentence transformer model
	embedder = SentenceTransformer('all-MiniLM-L6-v2')

	# Global storage for documents and embeddings
	documents = []
	embeddings = []
	metadata = []

	# ------------------ PDF Processing ------------------
	def process_pdf(pdf_file):
	global documents, embeddings, metadata
	documents = []
	metadata = []

	reader = PyPDF2.PdfReader(pdf_file.name)
	for i, page in enumerate(reader.pages):
	text = page.extract_text()
	if text and text.strip():
	documents.append(text)
	metadata.append(f"{pdf_file.name} - Page {i+1}")

	if not documents:
	return "No text extracted from PDF. Are you sure it contains text?"

	embeddings = embedder.encode(documents)
	return f"✅ Processed {len(documents)} text chunks from PDF: {pdf_file.name}"

	# ------------------ Retrieve Context ------------------
	def retrieve_context(question, top_k=5):
	q_emb = embedder.encode([question])
	scores = cosine_similarity(q_emb, embeddings)[0]
	top_indices = np.argsort(scores)[-top_k:][::-1]

	context = ""
	sources = []
	for idx in top_indices:
	context += documents[idx] + "\n"
	sources.append(metadata[idx])
	return context, sources

	# ------------------ Chat with Groq ------------------
	def chat(question):
	if not documents:
	return "Please upload and process a PDF first."

	context, sources = retrieve_context(question)
	try:
	response = client.chat.completions.create(
	model=LLM_MODEL,
	messages=[
	{"role": "system", "content": "You are a helpful assistant answering questions based on the provided PDF context."},
	{"role": "user", "content": f"Context: {context}\n\nQuestion: {question}"}
	],
	temperature=0.2
	)
	answer = response.choices[0].message.content
	answer += "\n\nSources:\n" + "\n".join(sources)
	return answer
	except Exception as e:
	return f"Error communicating with Groq: {e}"

	# ------------------ GRADIO UI ------------------
	with gr.Blocks(title="RAG PDF Chatbot") as demo:
	gr.Markdown("# 📄 RAG PDF Chatbot")
	gr.Markdown("Upload a PDF, process it, and ask questions based on its content.")

	pdf_input = gr.File(label="Upload PDF", file_types=['.pdf']) # ✅ single PDF only
	process_status = gr.Markdown()
	process_btn = gr.Button("Process PDF")

	question = gr.Textbox(label="Ask a question")
	ask_btn = gr.Button("Ask")
	answer = gr.Markdown(label="Answer")

	process_btn.click(process_pdf, pdf_input, process_status)
	ask_btn.click(chat, question, answer)

	demo.launch()