Spaces:

tayy786
/

RAG-based-application

Runtime error

App Files Files Community

RAG-based-application / app.py

tayy786

Update app.py

f8ad5a3 verified about 1 month ago

raw

history blame contribute delete

4.54 kB

	import os
	import faiss
	import numpy as np
	import gradio as gr
	from pypdf import PdfReader
	from sentence_transformers import SentenceTransformer
	from groq import Groq

	# -----------------------------
	# Initialize Models
	# -----------------------------
	embedder = SentenceTransformer("all-MiniLM-L6-v2")

	client = Groq(
	api_key=os.environ.get("Tgb"),
	)

	# -----------------------------
	# Global Variables
	# -----------------------------
	index = None
	documents = []

	# -----------------------------
	# PDF Processing
	# -----------------------------
	def read_pdf(file):
	reader = PdfReader(file)
	text = ""
	for page in reader.pages:
	if page.extract_text():
	text += page.extract_text()
	return text


	def chunk_text(text, chunk_size=500, overlap=100):
	chunks = []
	start = 0

	while start < len(text):
	end = start + chunk_size
	chunk = text[start:end]
	chunks.append(chunk)
	start += chunk_size - overlap

	return chunks


	# -----------------------------
	# Create FAISS Index
	# -----------------------------
	def create_index(chunks):
	global index, documents

	documents = chunks
	embeddings = embedder.encode(chunks)

	dimension = embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings))


	# -----------------------------
	# Retrieval with Relevance Check
	# -----------------------------
	def retrieve(query, k=3, threshold=1.2):
	if index is None:
	return [], None

	query_embedding = embedder.encode([query])
	distances, indices = index.search(np.array(query_embedding), k)

	relevant_chunks = []
	valid_distances = []

	for i, dist in zip(indices[0], distances[0]):
	if dist < threshold:
	relevant_chunks.append(documents[i])
	valid_distances.append(dist)

	# Confidence score (lower distance = better)
	confidence = None
	if len(valid_distances) > 0:
	avg_dist = np.mean(valid_distances)
	if avg_dist < 0.5:
	confidence = "High"
	elif avg_dist < 1.0:
	confidence = "Medium"
	else:
	confidence = "Low"

	return relevant_chunks, confidence


	# -----------------------------
	# Ask Groq LLM
	# -----------------------------
	def ask_groq(context_chunks, question):
	context = "\n".join(context_chunks)

	prompt = f"""
	You are an intelligent assistant.

	Rules:
	1. If the answer is clearly present in the context, answer normally.
	2. If the answer is NOT directly present but somewhat related, say:
	"This is not explicitly mentioned in the document, but based on related context..."
	then give a helpful answer.
	3. If the context is completely irrelevant, say:
	"The document does not contain information related to this question."

	Context:
	{context}

	Question:
	{question}
	"""

	chat_completion = client.chat.completions.create(
	messages=[
	{"role": "user", "content": prompt}
	],
	model="llama-3.3-70b-versatile",
	)

	return chat_completion.choices[0].message.content


	# -----------------------------
	# Main Pipeline
	# -----------------------------
	def process_pdf(file):
	if file is None:
	return "Please upload a PDF first."

	text = read_pdf(file)
	if not text.strip():
	return "Could not extract text from PDF."

	chunks = chunk_text(text)
	create_index(chunks)

	return f"PDF processed successfully! Total chunks: {len(chunks)}"


	def answer_question(question):
	if index is None:
	return "Please upload and process a PDF first."

	context_chunks, confidence = retrieve(question)

	if len(context_chunks) == 0:
	return "The document does not contain information related to this question."

	answer = ask_groq(context_chunks, question)

	if confidence:
	answer = f"(Confidence: {confidence})\n\n" + answer

	return answer


	# -----------------------------
	# Gradio UI
	# -----------------------------
	with gr.Blocks() as demo:
	gr.Markdown("# 📄 RAG PDF Q&A App (Groq + FAISS)")

	file_input = gr.File(label="Upload PDF")
	upload_btn = gr.Button("Process PDF")
	status = gr.Textbox(label="Status")

	question = gr.Textbox(label="Ask a question")
	answer = gr.Textbox(label="Answer")

	upload_btn.click(process_pdf, inputs=file_input, outputs=status)
	question.submit(answer_question, inputs=question, outputs=answer)


	# -----------------------------
	# Run App
	# -----------------------------
	if __name__ == "__main__":
	demo.launch()