Spaces:

Talha812
/

GenAI_SANDBOX_RAG_APP

Sleeping

App Files Files Community

GenAI_SANDBOX_RAG_APP / app.py

Talha812

Update app.py

f122bc0 verified 9 months ago

raw

history blame contribute delete

2.4 kB

	import os
	import faiss
	import numpy as np
	import streamlit as st
	from PyPDF2 import PdfReader
	from sentence_transformers import SentenceTransformer
	from groq import Groq

	GROQ_API_KEY = os.environ['GROQ_API_KEY']

	# Initialize Groq Client
	client = Groq(api_key=GROQ_API_KEY)

	# Initialize embedder
	# embedder = SentenceTransformer('all-MiniLM-L6-v2')
	embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	# App UI
	st.set_page_config(page_title="🧠 RAG Chat with PDF", layout="wide")
	st.title("📄 Chat with your PDF")

	# Function to read PDF
	def read_pdf(file):
	reader = PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Function to chunk text
	def chunk_text(text, max_length=500):
	words = text.split()
	chunks = [' '.join(words[i:i + max_length]) for i in range(0, len(words), max_length)]
	return chunks

	# Function to embed and create FAISS index
	def create_faiss_index(chunks):
	embeddings = embedder.encode(chunks)
	dim = embeddings.shape[1]
	index = faiss.IndexFlatL2(dim)
	index.add(np.array(embeddings))
	return index, embeddings

	# Function to search from index
	def search_index(index, query, chunks, top_k=3):
	query_embedding = embedder.encode([query])
	D, I = index.search(np.array(query_embedding), top_k)
	return [chunks[i] for i in I[0]]

	# File uploader
	uploaded_file = st.file_uploader("Upload a PDF document", type="pdf")

	if uploaded_file:
	with st.spinner("📖 Reading and processing PDF..."):
	text = read_pdf(uploaded_file)
	chunks = chunk_text(text)
	index, embeddings = create_faiss_index(chunks)
	st.success("✅ PDF processed. You can now ask questions!")

	query = st.text_input("Ask a question from the PDF:")

	if query:
	with st.spinner("🔍 Retrieving context..."):
	context_chunks = search_index(index, query, chunks, top_k=3)
	prompt = "\n".join(context_chunks) + f"\n\nQuestion: {query}"

	with st.spinner("🤖 Getting answer from Groq..."):
	response = client.chat.completions.create(
	messages=[
	{"role": "user", "content": prompt}
	],
	model="llama-3.3-70b-versatile"
	)
	answer = response.choices[0].message.content
	st.markdown(f"Answer: {answer}")