RagBaseApp / app.py
SHAMIL SHAHBAZ AWAN
Update app.py
7a29a17 verified
import os
import streamlit as st
import pdfplumber
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from groq import Groq
# Set background image and customize colors
background_image_url = "https://cdn.pixabay.com/photo/2016/06/02/02/33/triangles-1430105_1280.png"
st.markdown(
f"""
<style>
.stApp {{
background-image: url("{background_image_url}");
background-size: cover;
background-position: center center;
background-repeat: no-repeat;
}}
/* Ensure title is black */
h1 {{
color: black !important; /* Force title color to black */
}}
/* Set footer text color to white */
h2, h3, h4, h5, h6, p {{
color: white; /* Set all text color to white */
}}
/* Set footer styling */
.footer {{
position: fixed;
bottom: 0;
left: 0;
right: 0;
background-color: rgba(0, 0, 0, 0.6);
color: white;
text-align: center;
padding: 10px 0;
font-size: 14px;
}}
/* Set processing button color to green */
.stButton button {{
background-color: green;
color: white;
}}
/* Set query input block background color to white */
.stTextInput input {{
background-color: white;
color: black;
border-radius: 5px;
padding: 10px;
}}
/* Set all output text (retrieved chunks and responses) to white */
.stMarkdown, .stTextInput, .stText, .stCode, .stJson, .stFileUploader, .stError, .stSuccess {{
color: white !important;
}}
</style>
""",
unsafe_allow_html=True
)
# Use your Groq API key from Hugging Face Secrets
HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_KEY")
if not HUGGINGFACE_KEY:
st.error("Groq API key not found. Please set it in Hugging Face Secrets.")
# Initialize Groq client with the correct API key
groq_client = Groq(api_key=HUGGINGFACE_KEY)
# Load the SentenceTransformer model for embedding generation
embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
# Define file path and vector store folder
file_path = "The Rise of Agentic AI.pdf" # File directly in the root directory of the app
VECTORSTORE_FOLDER = "vectorstore" # Folder where the FAISS index will be stored
# Ensure the vector store folder exists
if not os.path.exists(VECTORSTORE_FOLDER):
os.makedirs(VECTORSTORE_FOLDER)
# Define the vector store path
vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss") # Correct path to the index file
# Load or create FAISS index
if os.path.exists(vectorstore_path):
# If the index file exists, read it
try:
index = faiss.read_index(vectorstore_path)
except Exception as e:
st.error(f"Error reading the FAISS index: {e}")
index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
else:
# If the index file doesn't exist, create a new one
index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
# Variable to hold chunks globally
chunks = []
# Function to load text from PDF
def load_pdf_text(file_path):
"""Extract text from the given PDF file."""
text = ""
with pdfplumber.open(file_path) as pdf:
for page in pdf.pages:
text += page.extract_text()
return text
# Function to chunk text into smaller pieces
def chunk_text(text, chunk_size=500, overlap=100):
"""Chunk the text into overlapping chunks."""
chunks = []
for i in range(0, len(text), chunk_size - overlap):
chunks.append(text[i:i + chunk_size])
return chunks
# Process the document and update vector store
def process_and_store_document(file_path):
"""Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
global chunks # Make chunks global to access in the query part
st.info("Processing PDF document...")
# Extract text from the PDF file
text = load_pdf_text(file_path)
# Chunk the text into smaller pieces
chunks = chunk_text(text)
# Generate embeddings for each chunk
embeddings = embedder.encode(chunks, show_progress_bar=True)
# Add the embeddings to the FAISS index
index.add(np.array(embeddings))
# Save the updated FAISS index
try:
faiss.write_index(index, vectorstore_path)
st.success("Document processed and vector store updated!")
except Exception as e:
st.error(f"Error saving the FAISS index: {e}")
# User interface for Streamlit
st.title("The Rise of Agentic AI RAG Application")
# Button to trigger document processing
if st.button("Process PDF"):
process_and_store_document(file_path)
# Query input for the user
user_query = st.text_input("Enter your query:", key="query_input")
if user_query:
# Check if there are any chunks in the index
if not chunks:
st.error("Please process the document first by clicking 'Process PDF'.")
else:
# Generate embedding for the user query
query_embedding = embedder.encode([user_query])
# Perform the search on the FAISS index
distances, indices = index.search(np.array(query_embedding), k=5)
# Check if the indices returned are valid
if indices.size == 0 or np.any(indices[0] == -1):
st.error("No relevant results found in the index.")
else:
# Ensure indices are within the bounds of the chunks list
valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
if not valid_indices:
st.error("No valid indices found for the retrieved chunks.")
else:
# Retrieve the most relevant chunks based on the valid indices
retrieved_chunks = [chunks[idx] for idx in valid_indices]
# Combine the retrieved chunks with the query and generate a response using Groq
combined_input = " ".join(retrieved_chunks) + user_query
# Generate a response with Groq
try:
chat_completion = groq_client.chat.completions.create(
messages=[{
"role": "user",
"content": combined_input,
}],
model="llama3-8b-8192", # Specify the model you want to use
)
# Display only the generated response
st.subheader("Generated Response")
st.write(chat_completion.choices[0].message.content)
except Exception as e:
st.error(f"Error generating response: {e}")
# Footer
st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)