Spaces:

Zeeshan24
/

RAG-BASED-APPLICATION

Sleeping

App Files Files Community

RAG-BASED-APPLICATION / app.py

Zeeshan24

Create app.py

15da411 verified about 1 year ago

raw

history blame contribute delete

3.32 kB

	import os
	import streamlit as st
	from PyPDF2 import PdfReader
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	from groq import Groq
	import requests
	from io import BytesIO

	# Hardcoded API Key
	GROQ_API_KEY = "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"

	# Function to download PDF from a URL
	def download_pdf_from_url(url):
	try:
	response = requests.get(url)
	response.raise_for_status()
	return BytesIO(response.content)
	except requests.exceptions.RequestException as e:
	st.error(f"Failed to download PDF: {e}")
	return None

	# Function to extract text from PDF
	def extract_text_from_pdf(pdf_file):
	reader = PdfReader(pdf_file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	# Function to split text into chunks
	def create_chunks(text, chunk_size=500):
	words = text.split()
	chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
	return chunks

	# Function to create embeddings
	def create_embeddings(chunks, model_name='all-MiniLM-L6-v2'):
	model = SentenceTransformer(model_name)
	embeddings = model.encode(chunks)
	return embeddings

	# Function to store embeddings in FAISS
	def store_embeddings_in_faiss(embeddings):
	dimension = embeddings.shape[1]
	index = faiss.IndexFlatL2(dimension)
	index.add(np.array(embeddings))
	return index

	# Function to query FAISS index
	def query_faiss(index, query_embedding, k=5):
	distances, indices = index.search(query_embedding, k)
	return indices

	# Function to interact with Groq API
	def send_query_to_groq(query):
	client = Groq(api_key=GROQ_API_KEY)
	response = client.chat.completions.create(
	messages=[{"role": "user", "content": query}],
	model="llama3-8b-8192"
	)
	return response.choices[0].message.content

	# Preload and process PDF links
	def preload_pdfs(pdf_links):
	st.write("Downloading and processing PDFs...")
	all_chunks = []
	for url in pdf_links:
	pdf_file = download_pdf_from_url(url)
	if pdf_file:
	text = extract_text_from_pdf(pdf_file)
	chunks = create_chunks(text)
	all_chunks.extend(chunks)
	return all_chunks

	# Streamlit UI
	def main():
	st.title("RAG-based Application")

	# Predefined PDF links
	pdf_links = [
	"https://drive.google.com/uc?id=1hF6exN7tYScy-mxQAP5X9R_200X-ukMB", # Add your links here
	# Add more links as needed
	]

	# Preload PDFs and create embeddings
	chunks = preload_pdfs(pdf_links)
	embeddings = create_embeddings(chunks)
	index = store_embeddings_in_faiss(embeddings)

	st.success("All PDFs processed successfully! You can now ask questions.")

	# Input for user query
	query = st.text_input("Ask your question:")
	if query:
	st.write("Fetching relevant chunks...")
	query_embedding = create_embeddings([query])
	relevant_indices = query_faiss(index, query_embedding)
	relevant_texts = [chunks[i] for i in relevant_indices[0]]
	context = " ".join(relevant_texts)

	st.write("Sending query to Groq API...")
	response = send_query_to_groq(context)
	st.write("Response:", response)

	if __name__ == "__main__":
	main()