Spaces:

shamilcoded
/

RagBaseApp

Sleeping

RagBaseApp / app.py

SHAMIL SHAHBAZ AWAN

Update app.py

7a29a17 verified about 1 year ago

6.88 kB

	import os
	import streamlit as st
	import pdfplumber
	from sentence_transformers import SentenceTransformer
	import faiss
	import numpy as np
	from groq import Groq

	# Set background image and customize colors
	background_image_url = "https://cdn.pixabay.com/photo/2016/06/02/02/33/triangles-1430105_1280.png"
	st.markdown(
	f"""
	<style>
	.stApp {{
	background-image: url("{background_image_url}");
	background-size: cover;
	background-position: center center;
	background-repeat: no-repeat;
	}}

	/* Ensure title is black */
	h1 {{
	color: black !important; /* Force title color to black */
	}}

	/* Set footer text color to white */
	h2, h3, h4, h5, h6, p {{
	color: white; /* Set all text color to white */
	}}

	/* Set footer styling */
	.footer {{
	position: fixed;
	bottom: 0;
	left: 0;
	right: 0;
	background-color: rgba(0, 0, 0, 0.6);
	color: white;
	text-align: center;
	padding: 10px 0;
	font-size: 14px;
	}}

	/* Set processing button color to green */
	.stButton button {{
	background-color: green;
	color: white;
	}}
	/* Set query input block background color to white */
	.stTextInput input {{
	background-color: white;
	color: black;
	border-radius: 5px;
	padding: 10px;
	}}

	/* Set all output text (retrieved chunks and responses) to white */
	.stMarkdown, .stTextInput, .stText, .stCode, .stJson, .stFileUploader, .stError, .stSuccess {{
	color: white !important;
	}}
	</style>
	""",
	unsafe_allow_html=True
	)

	# Use your Groq API key from Hugging Face Secrets
	HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_KEY")

	if not HUGGINGFACE_KEY:
	st.error("Groq API key not found. Please set it in Hugging Face Secrets.")

	# Initialize Groq client with the correct API key
	groq_client = Groq(api_key=HUGGINGFACE_KEY)

	# Load the SentenceTransformer model for embedding generation
	embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

	# Define file path and vector store folder
	file_path = "The Rise of Agentic AI.pdf" # File directly in the root directory of the app
	VECTORSTORE_FOLDER = "vectorstore" # Folder where the FAISS index will be stored

	# Ensure the vector store folder exists
	if not os.path.exists(VECTORSTORE_FOLDER):
	os.makedirs(VECTORSTORE_FOLDER)

	# Define the vector store path
	vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss") # Correct path to the index file

	# Load or create FAISS index
	if os.path.exists(vectorstore_path):
	# If the index file exists, read it
	try:
	index = faiss.read_index(vectorstore_path)
	except Exception as e:
	st.error(f"Error reading the FAISS index: {e}")
	index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
	else:
	# If the index file doesn't exist, create a new one
	index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())

	# Variable to hold chunks globally
	chunks = []

	# Function to load text from PDF
	def load_pdf_text(file_path):
	"""Extract text from the given PDF file."""
	text = ""
	with pdfplumber.open(file_path) as pdf:
	for page in pdf.pages:
	text += page.extract_text()
	return text

	# Function to chunk text into smaller pieces
	def chunk_text(text, chunk_size=500, overlap=100):
	"""Chunk the text into overlapping chunks."""
	chunks = []
	for i in range(0, len(text), chunk_size - overlap):
	chunks.append(text[i:i + chunk_size])
	return chunks

	# Process the document and update vector store
	def process_and_store_document(file_path):
	"""Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
	global chunks # Make chunks global to access in the query part

	st.info("Processing PDF document...")

	# Extract text from the PDF file
	text = load_pdf_text(file_path)

	# Chunk the text into smaller pieces
	chunks = chunk_text(text)

	# Generate embeddings for each chunk
	embeddings = embedder.encode(chunks, show_progress_bar=True)

	# Add the embeddings to the FAISS index
	index.add(np.array(embeddings))

	# Save the updated FAISS index
	try:
	faiss.write_index(index, vectorstore_path)
	st.success("Document processed and vector store updated!")
	except Exception as e:
	st.error(f"Error saving the FAISS index: {e}")

	# User interface for Streamlit
	st.title("The Rise of Agentic AI RAG Application")

	# Button to trigger document processing
	if st.button("Process PDF"):
	process_and_store_document(file_path)

	# Query input for the user
	user_query = st.text_input("Enter your query:", key="query_input")

	if user_query:
	# Check if there are any chunks in the index
	if not chunks:
	st.error("Please process the document first by clicking 'Process PDF'.")
	else:
	# Generate embedding for the user query
	query_embedding = embedder.encode([user_query])

	# Perform the search on the FAISS index
	distances, indices = index.search(np.array(query_embedding), k=5)

	# Check if the indices returned are valid
	if indices.size == 0 or np.any(indices[0] == -1):
	st.error("No relevant results found in the index.")
	else:
	# Ensure indices are within the bounds of the chunks list
	valid_indices = [idx for idx in indices[0] if idx < len(chunks)]

	if not valid_indices:
	st.error("No valid indices found for the retrieved chunks.")
	else:
	# Retrieve the most relevant chunks based on the valid indices
	retrieved_chunks = [chunks[idx] for idx in valid_indices]

	# Combine the retrieved chunks with the query and generate a response using Groq
	combined_input = " ".join(retrieved_chunks) + user_query

	# Generate a response with Groq
	try:
	chat_completion = groq_client.chat.completions.create(
	messages=[{
	"role": "user",
	"content": combined_input,
	}],
	model="llama3-8b-8192", # Specify the model you want to use
	)

	# Display only the generated response
	st.subheader("Generated Response")
	st.write(chat_completion.choices[0].message.content)
	except Exception as e:
	st.error(f"Error generating response: {e}")

	# Footer
	st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)