Spaces:

shamilcoded
/

RagBaseApp

Sleeping

App Files Files Community

SHAMIL SHAHBAZ AWAN commited on Dec 25, 2024

Commit

e6aac09

verified ·

1 Parent(s): e44a38f

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -44

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import pdfplumber
 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
-from groq import Client  # Ensure you're importing the correct Groq client
 # Set background image and customize colors
 background_image_url = "https://www.shutterstock.com/image-vector/artificial-intelligence-circuit-electric-line-600nw-2465096659.jpg"
@@ -18,17 +18,14 @@ st.markdown(
         background-repeat: no-repeat;
     }}
-    /* Ensure title is black */
     h1 {{
-        color: black !important;  /* Force title color to black */
     }}
-    /* Set all text in the app to white */
-    h2, h3, h4, h5, h6, p, div {{
-        color: white !important;  /* Set all text color to white */
     }}
-    /* Set footer styling */
     .footer {{
         position: fixed;
         bottom: 0;
@@ -41,13 +38,11 @@ st.markdown(
         font-size: 14px;
     }}
-    /* Set processing button color to green */
     .stButton button {{
         background-color: green;
         color: white;
     }}
-    /* Set query input block background color to white */
     .stTextInput input {{
         background-color: white;
         color: black;
@@ -63,38 +58,35 @@ if not HUGGINGFACE_KEY:
     st.error("Hugging Face API token not found. Please set it in the Hugging Face Secrets.")
 # Initialize Groq client
-groq_client = Client(api_key=HUGGINGFACE_KEY)
 # Load the SentenceTransformer model for embedding generation
 embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 # Define file path and vector store folder
-file_path = "The Rise of Agentic AI.pdf"  # File directly in the root directory of the app
-VECTORSTORE_FOLDER = "vectorstore"  # Folder where the FAISS index will be stored
 # Ensure the vector store folder exists
 if not os.path.exists(VECTORSTORE_FOLDER):
     os.makedirs(VECTORSTORE_FOLDER)
 # Define the vector store path
-vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss")  # Correct path to the index file
 # Load or create FAISS index
 if os.path.exists(vectorstore_path):
-    # If the index file exists, read it
     try:
         index = faiss.read_index(vectorstore_path)
     except Exception as e:
         st.error(f"Error reading the FAISS index: {e}")
         index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
 else:
-    # If the index file doesn't exist, create a new one
     index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
 # Variable to hold chunks globally
 chunks = []
-# Function to load text from PDF
 def load_pdf_text(file_path):
     """Extract text from the given PDF file."""
     text = ""
@@ -103,7 +95,6 @@ def load_pdf_text(file_path):
             text += page.extract_text()
     return text
-# Function to chunk text into smaller pieces
 def chunk_text(text, chunk_size=500, overlap=100):
     """Chunk the text into overlapping chunks."""
     chunks = []
@@ -111,83 +102,65 @@ def chunk_text(text, chunk_size=500, overlap=100):
         chunks.append(text[i:i + chunk_size])
     return chunks
-# Process the document and update vector store
 def process_and_store_document(file_path):
     """Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
-    global chunks  # Make chunks global to access in the query part
     st.info("Processing PDF document...")
-    # Extract text from the PDF file
     text = load_pdf_text(file_path)
-    # Chunk the text into smaller pieces
     chunks = chunk_text(text)
-    # Generate embeddings for each chunk
     embeddings = embedder.encode(chunks, show_progress_bar=True)
-    # Add the embeddings to the FAISS index
     index.add(np.array(embeddings))
-    # Save the updated FAISS index
     try:
         faiss.write_index(index, vectorstore_path)
         st.success("Document processed and vector store updated!")
     except Exception as e:
         st.error(f"Error saving the FAISS index: {e}")
-# User interface for Streamlit
 st.title("The Rise of Agentic AI RAG Application")
-# Button to trigger document processing
 if st.button("Process PDF"):
     process_and_store_document(file_path)
-# Query input for the user
 user_query = st.text_input("Enter your query:")
 if user_query:
-    # Check if there are any chunks in the index
     if not chunks:
         st.error("Please process the document first by clicking 'Process PDF'.")
     else:
-        # Generate embedding for the user query
         query_embedding = embedder.encode([user_query])
-        # Perform the search on the FAISS index
         distances, indices = index.search(np.array(query_embedding), k=5)
-        # Check if the indices returned are valid
         if indices.size == 0 or np.any(indices[0] == -1):
             st.error("No relevant results found in the index.")
         else:
-            # Ensure indices are within the bounds of the chunks list
             valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
             if not valid_indices:
                 st.error("No valid indices found for the retrieved chunks.")
             else:
-                # Retrieve the most relevant chunks based on the valid indices
                 retrieved_chunks = [chunks[idx] for idx in valid_indices]
-                # Display the retrieved chunks in white text
                 st.subheader("Retrieved Chunks")
                 for chunk in retrieved_chunks:
-                    st.markdown(f"<p style='color:white;'>{chunk}</p>", unsafe_allow_html=True)
-                # Combine the retrieved chunks with the query and generate a response using Groq
                 combined_input = " ".join(retrieved_chunks) + user_query
                 try:
-                    # Assuming the correct Groq method is `predict` or another name; this is a placeholder
-                    response = groq_client.predict(model="llama3-8b-8192", prompt=combined_input, max_tokens=200)
-                    # Display the generated response in white text
                     st.subheader("Generated Response")
-                    st.markdown(f"<p style='color:white;'>{response['text']}</p>", unsafe_allow_html=True)
                 except Exception as e:
                     st.error(f"Error generating response: {e}")
-# Footer
 st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)

 from sentence_transformers import SentenceTransformer
 import faiss
 import numpy as np
+from groq import Groq
 # Set background image and customize colors
 background_image_url = "https://www.shutterstock.com/image-vector/artificial-intelligence-circuit-electric-line-600nw-2465096659.jpg"
         background-repeat: no-repeat;
     }}
     h1 {{
+        color: black !important;
     }}
+    h2, h3, h4, h5, h6, p {{
+        color: black;
     }}
     .footer {{
         position: fixed;
         bottom: 0;
         font-size: 14px;
     }}
     .stButton button {{
         background-color: green;
         color: white;
     }}
     .stTextInput input {{
         background-color: white;
         color: black;
     st.error("Hugging Face API token not found. Please set it in the Hugging Face Secrets.")
 # Initialize Groq client
+groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 # Load the SentenceTransformer model for embedding generation
 embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 # Define file path and vector store folder
+file_path = "The Rise of Agentic AI.pdf"
+VECTORSTORE_FOLDER = "vectorstore"
 # Ensure the vector store folder exists
 if not os.path.exists(VECTORSTORE_FOLDER):
     os.makedirs(VECTORSTORE_FOLDER)
 # Define the vector store path
+vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss")
 # Load or create FAISS index
 if os.path.exists(vectorstore_path):
     try:
         index = faiss.read_index(vectorstore_path)
     except Exception as e:
         st.error(f"Error reading the FAISS index: {e}")
         index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
 else:
     index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
 # Variable to hold chunks globally
 chunks = []
 def load_pdf_text(file_path):
     """Extract text from the given PDF file."""
     text = ""
             text += page.extract_text()
     return text
 def chunk_text(text, chunk_size=500, overlap=100):
     """Chunk the text into overlapping chunks."""
     chunks = []
         chunks.append(text[i:i + chunk_size])
     return chunks
 def process_and_store_document(file_path):
     """Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
+    global chunks
     st.info("Processing PDF document...")
     text = load_pdf_text(file_path)
     chunks = chunk_text(text)
     embeddings = embedder.encode(chunks, show_progress_bar=True)
     index.add(np.array(embeddings))
     try:
         faiss.write_index(index, vectorstore_path)
         st.success("Document processed and vector store updated!")
     except Exception as e:
         st.error(f"Error saving the FAISS index: {e}")
 st.title("The Rise of Agentic AI RAG Application")
 if st.button("Process PDF"):
     process_and_store_document(file_path)
 user_query = st.text_input("Enter your query:")
 if user_query:
     if not chunks:
         st.error("Please process the document first by clicking 'Process PDF'.")
     else:
         query_embedding = embedder.encode([user_query])
         distances, indices = index.search(np.array(query_embedding), k=5)
         if indices.size == 0 or np.any(indices[0] == -1):
             st.error("No relevant results found in the index.")
         else:
             valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
             if not valid_indices:
                 st.error("No valid indices found for the retrieved chunks.")
             else:
                 retrieved_chunks = [chunks[idx] for idx in valid_indices]
                 st.subheader("Retrieved Chunks")
                 for chunk in retrieved_chunks:
+                    st.write(chunk)
                 combined_input = " ".join(retrieved_chunks) + user_query
                 try:
+                    # Using the Groq client for generating a response
+                    chat_completion = groq_client.chat.completions.create(
+                        messages=[{"role": "user", "content": combined_input}],
+                        model="llama3-8b-8192"
+                    )
+                    response = chat_completion.choices[0].message.content
                     st.subheader("Generated Response")
+                    st.write(response)
                 except Exception as e:
                     st.error(f"Error generating response: {e}")
 st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)