SHAMIL SHAHBAZ AWAN commited on
Commit
e6aac09
·
verified ·
1 Parent(s): e44a38f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -44
app.py CHANGED
@@ -4,7 +4,7 @@ import pdfplumber
4
  from sentence_transformers import SentenceTransformer
5
  import faiss
6
  import numpy as np
7
- from groq import Client # Ensure you're importing the correct Groq client
8
 
9
  # Set background image and customize colors
10
  background_image_url = "https://www.shutterstock.com/image-vector/artificial-intelligence-circuit-electric-line-600nw-2465096659.jpg"
@@ -18,17 +18,14 @@ st.markdown(
18
  background-repeat: no-repeat;
19
  }}
20
 
21
- /* Ensure title is black */
22
  h1 {{
23
- color: black !important; /* Force title color to black */
24
  }}
25
 
26
- /* Set all text in the app to white */
27
- h2, h3, h4, h5, h6, p, div {{
28
- color: white !important; /* Set all text color to white */
29
  }}
30
 
31
- /* Set footer styling */
32
  .footer {{
33
  position: fixed;
34
  bottom: 0;
@@ -41,13 +38,11 @@ st.markdown(
41
  font-size: 14px;
42
  }}
43
 
44
- /* Set processing button color to green */
45
  .stButton button {{
46
  background-color: green;
47
  color: white;
48
  }}
49
 
50
- /* Set query input block background color to white */
51
  .stTextInput input {{
52
  background-color: white;
53
  color: black;
@@ -63,38 +58,35 @@ if not HUGGINGFACE_KEY:
63
  st.error("Hugging Face API token not found. Please set it in the Hugging Face Secrets.")
64
 
65
  # Initialize Groq client
66
- groq_client = Client(api_key=HUGGINGFACE_KEY)
67
 
68
  # Load the SentenceTransformer model for embedding generation
69
  embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
70
 
71
  # Define file path and vector store folder
72
- file_path = "The Rise of Agentic AI.pdf" # File directly in the root directory of the app
73
- VECTORSTORE_FOLDER = "vectorstore" # Folder where the FAISS index will be stored
74
 
75
  # Ensure the vector store folder exists
76
  if not os.path.exists(VECTORSTORE_FOLDER):
77
  os.makedirs(VECTORSTORE_FOLDER)
78
 
79
  # Define the vector store path
80
- vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss") # Correct path to the index file
81
 
82
  # Load or create FAISS index
83
  if os.path.exists(vectorstore_path):
84
- # If the index file exists, read it
85
  try:
86
  index = faiss.read_index(vectorstore_path)
87
  except Exception as e:
88
  st.error(f"Error reading the FAISS index: {e}")
89
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
90
  else:
91
- # If the index file doesn't exist, create a new one
92
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
93
 
94
  # Variable to hold chunks globally
95
  chunks = []
96
 
97
- # Function to load text from PDF
98
  def load_pdf_text(file_path):
99
  """Extract text from the given PDF file."""
100
  text = ""
@@ -103,7 +95,6 @@ def load_pdf_text(file_path):
103
  text += page.extract_text()
104
  return text
105
 
106
- # Function to chunk text into smaller pieces
107
  def chunk_text(text, chunk_size=500, overlap=100):
108
  """Chunk the text into overlapping chunks."""
109
  chunks = []
@@ -111,83 +102,65 @@ def chunk_text(text, chunk_size=500, overlap=100):
111
  chunks.append(text[i:i + chunk_size])
112
  return chunks
113
 
114
- # Process the document and update vector store
115
  def process_and_store_document(file_path):
116
  """Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
117
- global chunks # Make chunks global to access in the query part
118
 
119
  st.info("Processing PDF document...")
120
 
121
- # Extract text from the PDF file
122
  text = load_pdf_text(file_path)
123
-
124
- # Chunk the text into smaller pieces
125
  chunks = chunk_text(text)
126
 
127
- # Generate embeddings for each chunk
128
  embeddings = embedder.encode(chunks, show_progress_bar=True)
129
-
130
- # Add the embeddings to the FAISS index
131
  index.add(np.array(embeddings))
132
 
133
- # Save the updated FAISS index
134
  try:
135
  faiss.write_index(index, vectorstore_path)
136
  st.success("Document processed and vector store updated!")
137
  except Exception as e:
138
  st.error(f"Error saving the FAISS index: {e}")
139
 
140
- # User interface for Streamlit
141
  st.title("The Rise of Agentic AI RAG Application")
142
 
143
- # Button to trigger document processing
144
  if st.button("Process PDF"):
145
  process_and_store_document(file_path)
146
 
147
- # Query input for the user
148
  user_query = st.text_input("Enter your query:")
149
 
150
  if user_query:
151
- # Check if there are any chunks in the index
152
  if not chunks:
153
  st.error("Please process the document first by clicking 'Process PDF'.")
154
  else:
155
- # Generate embedding for the user query
156
  query_embedding = embedder.encode([user_query])
157
-
158
- # Perform the search on the FAISS index
159
  distances, indices = index.search(np.array(query_embedding), k=5)
160
 
161
- # Check if the indices returned are valid
162
  if indices.size == 0 or np.any(indices[0] == -1):
163
  st.error("No relevant results found in the index.")
164
  else:
165
- # Ensure indices are within the bounds of the chunks list
166
  valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
167
 
168
  if not valid_indices:
169
  st.error("No valid indices found for the retrieved chunks.")
170
  else:
171
- # Retrieve the most relevant chunks based on the valid indices
172
  retrieved_chunks = [chunks[idx] for idx in valid_indices]
173
 
174
- # Display the retrieved chunks in white text
175
  st.subheader("Retrieved Chunks")
176
  for chunk in retrieved_chunks:
177
- st.markdown(f"<p style='color:white;'>{chunk}</p>", unsafe_allow_html=True)
178
 
179
- # Combine the retrieved chunks with the query and generate a response using Groq
180
  combined_input = " ".join(retrieved_chunks) + user_query
181
 
182
  try:
183
- # Assuming the correct Groq method is `predict` or another name; this is a placeholder
184
- response = groq_client.predict(model="llama3-8b-8192", prompt=combined_input, max_tokens=200)
 
 
 
185
 
186
- # Display the generated response in white text
187
  st.subheader("Generated Response")
188
- st.markdown(f"<p style='color:white;'>{response['text']}</p>", unsafe_allow_html=True)
189
  except Exception as e:
190
  st.error(f"Error generating response: {e}")
191
 
192
- # Footer
193
  st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)
 
4
  from sentence_transformers import SentenceTransformer
5
  import faiss
6
  import numpy as np
7
+ from groq import Groq
8
 
9
  # Set background image and customize colors
10
  background_image_url = "https://www.shutterstock.com/image-vector/artificial-intelligence-circuit-electric-line-600nw-2465096659.jpg"
 
18
  background-repeat: no-repeat;
19
  }}
20
 
 
21
  h1 {{
22
+ color: black !important;
23
  }}
24
 
25
+ h2, h3, h4, h5, h6, p {{
26
+ color: black;
 
27
  }}
28
 
 
29
  .footer {{
30
  position: fixed;
31
  bottom: 0;
 
38
  font-size: 14px;
39
  }}
40
 
 
41
  .stButton button {{
42
  background-color: green;
43
  color: white;
44
  }}
45
 
 
46
  .stTextInput input {{
47
  background-color: white;
48
  color: black;
 
58
  st.error("Hugging Face API token not found. Please set it in the Hugging Face Secrets.")
59
 
60
  # Initialize Groq client
61
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
62
 
63
  # Load the SentenceTransformer model for embedding generation
64
  embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
65
 
66
  # Define file path and vector store folder
67
+ file_path = "The Rise of Agentic AI.pdf"
68
+ VECTORSTORE_FOLDER = "vectorstore"
69
 
70
  # Ensure the vector store folder exists
71
  if not os.path.exists(VECTORSTORE_FOLDER):
72
  os.makedirs(VECTORSTORE_FOLDER)
73
 
74
  # Define the vector store path
75
+ vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss")
76
 
77
  # Load or create FAISS index
78
  if os.path.exists(vectorstore_path):
 
79
  try:
80
  index = faiss.read_index(vectorstore_path)
81
  except Exception as e:
82
  st.error(f"Error reading the FAISS index: {e}")
83
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
84
  else:
 
85
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
86
 
87
  # Variable to hold chunks globally
88
  chunks = []
89
 
 
90
  def load_pdf_text(file_path):
91
  """Extract text from the given PDF file."""
92
  text = ""
 
95
  text += page.extract_text()
96
  return text
97
 
 
98
  def chunk_text(text, chunk_size=500, overlap=100):
99
  """Chunk the text into overlapping chunks."""
100
  chunks = []
 
102
  chunks.append(text[i:i + chunk_size])
103
  return chunks
104
 
 
105
  def process_and_store_document(file_path):
106
  """Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
107
+ global chunks
108
 
109
  st.info("Processing PDF document...")
110
 
 
111
  text = load_pdf_text(file_path)
 
 
112
  chunks = chunk_text(text)
113
 
 
114
  embeddings = embedder.encode(chunks, show_progress_bar=True)
 
 
115
  index.add(np.array(embeddings))
116
 
 
117
  try:
118
  faiss.write_index(index, vectorstore_path)
119
  st.success("Document processed and vector store updated!")
120
  except Exception as e:
121
  st.error(f"Error saving the FAISS index: {e}")
122
 
 
123
  st.title("The Rise of Agentic AI RAG Application")
124
 
 
125
  if st.button("Process PDF"):
126
  process_and_store_document(file_path)
127
 
 
128
  user_query = st.text_input("Enter your query:")
129
 
130
  if user_query:
 
131
  if not chunks:
132
  st.error("Please process the document first by clicking 'Process PDF'.")
133
  else:
 
134
  query_embedding = embedder.encode([user_query])
 
 
135
  distances, indices = index.search(np.array(query_embedding), k=5)
136
 
 
137
  if indices.size == 0 or np.any(indices[0] == -1):
138
  st.error("No relevant results found in the index.")
139
  else:
 
140
  valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
141
 
142
  if not valid_indices:
143
  st.error("No valid indices found for the retrieved chunks.")
144
  else:
 
145
  retrieved_chunks = [chunks[idx] for idx in valid_indices]
146
 
 
147
  st.subheader("Retrieved Chunks")
148
  for chunk in retrieved_chunks:
149
+ st.write(chunk)
150
 
 
151
  combined_input = " ".join(retrieved_chunks) + user_query
152
 
153
  try:
154
+ # Using the Groq client for generating a response
155
+ chat_completion = groq_client.chat.completions.create(
156
+ messages=[{"role": "user", "content": combined_input}],
157
+ model="llama3-8b-8192"
158
+ )
159
 
160
+ response = chat_completion.choices[0].message.content
161
  st.subheader("Generated Response")
162
+ st.write(response)
163
  except Exception as e:
164
  st.error(f"Error generating response: {e}")
165
 
 
166
  st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)