SHAMIL SHAHBAZ AWAN commited on
Commit
a23ed3c
·
verified ·
1 Parent(s): 182a751

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -20
app.py CHANGED
@@ -18,14 +18,17 @@ st.markdown(
18
  background-repeat: no-repeat;
19
  }}
20
 
 
21
  h1 {{
22
- color: black !important;
23
  }}
24
 
 
25
  h2, h3, h4, h5, h6, p {{
26
- color: black;
27
  }}
28
 
 
29
  .footer {{
30
  position: fixed;
31
  bottom: 0;
@@ -38,11 +41,12 @@ st.markdown(
38
  font-size: 14px;
39
  }}
40
 
 
41
  .stButton button {{
42
  background-color: green;
43
  color: white;
44
  }}
45
-
46
  .stTextInput input {{
47
  background-color: white;
48
  color: black;
@@ -52,41 +56,45 @@ st.markdown(
52
  unsafe_allow_html=True
53
  )
54
 
55
- # Load Hugging Face Secrets
56
  HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_KEY")
 
57
  if not HUGGINGFACE_KEY:
58
- st.error("Hugging Face API token not found. Please set it in the Hugging Face Secrets.")
59
 
60
- # Initialize Groq client
61
- groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
62
 
63
  # Load the SentenceTransformer model for embedding generation
64
  embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
65
 
66
  # Define file path and vector store folder
67
- file_path = "The Rise of Agentic AI.pdf"
68
- VECTORSTORE_FOLDER = "vectorstore"
69
 
70
  # Ensure the vector store folder exists
71
  if not os.path.exists(VECTORSTORE_FOLDER):
72
  os.makedirs(VECTORSTORE_FOLDER)
73
 
74
  # Define the vector store path
75
- vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss")
76
 
77
  # Load or create FAISS index
78
  if os.path.exists(vectorstore_path):
 
79
  try:
80
  index = faiss.read_index(vectorstore_path)
81
  except Exception as e:
82
  st.error(f"Error reading the FAISS index: {e}")
83
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
84
  else:
 
85
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
86
 
87
  # Variable to hold chunks globally
88
  chunks = []
89
 
 
90
  def load_pdf_text(file_path):
91
  """Extract text from the given PDF file."""
92
  text = ""
@@ -95,6 +103,7 @@ def load_pdf_text(file_path):
95
  text += page.extract_text()
96
  return text
97
 
 
98
  def chunk_text(text, chunk_size=500, overlap=100):
99
  """Chunk the text into overlapping chunks."""
100
  chunks = []
@@ -102,65 +111,89 @@ def chunk_text(text, chunk_size=500, overlap=100):
102
  chunks.append(text[i:i + chunk_size])
103
  return chunks
104
 
 
105
  def process_and_store_document(file_path):
106
  """Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
107
- global chunks
108
 
109
  st.info("Processing PDF document...")
110
 
 
111
  text = load_pdf_text(file_path)
 
 
112
  chunks = chunk_text(text)
113
 
 
114
  embeddings = embedder.encode(chunks, show_progress_bar=True)
 
 
115
  index.add(np.array(embeddings))
116
 
 
117
  try:
118
  faiss.write_index(index, vectorstore_path)
119
  st.success("Document processed and vector store updated!")
120
  except Exception as e:
121
  st.error(f"Error saving the FAISS index: {e}")
122
 
 
123
  st.title("The Rise of Agentic AI RAG Application")
124
 
125
- if st.button("Process PDF"):
126
- process_and_store_document(file_path)
127
-
128
  user_query = st.text_input("Enter your query:")
129
 
130
  if user_query:
 
131
  if not chunks:
132
  st.error("Please process the document first by clicking 'Process PDF'.")
133
  else:
 
134
  query_embedding = embedder.encode([user_query])
 
 
135
  distances, indices = index.search(np.array(query_embedding), k=5)
136
 
 
137
  if indices.size == 0 or np.any(indices[0] == -1):
138
  st.error("No relevant results found in the index.")
139
  else:
 
140
  valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
141
 
142
  if not valid_indices:
143
  st.error("No valid indices found for the retrieved chunks.")
144
  else:
 
145
  retrieved_chunks = [chunks[idx] for idx in valid_indices]
146
 
 
147
  st.subheader("Retrieved Chunks")
148
  for chunk in retrieved_chunks:
149
  st.write(chunk)
150
 
 
151
  combined_input = " ".join(retrieved_chunks) + user_query
152
 
 
153
  try:
154
- # Using the Groq client for generating a response
155
  chat_completion = groq_client.chat.completions.create(
156
- messages=[{"role": "user", "content": combined_input}],
157
- model="llama3-8b-8192"
 
 
 
158
  )
159
-
160
- response = chat_completion.choices[0].message.content
161
  st.subheader("Generated Response")
162
- st.write(response)
163
  except Exception as e:
164
  st.error(f"Error generating response: {e}")
165
 
 
 
 
 
 
166
  st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)
 
18
  background-repeat: no-repeat;
19
  }}
20
 
21
+ /* Ensure title is black */
22
  h1 {{
23
+ color: black !important; /* Force title color to black */
24
  }}
25
 
26
+ /* Set footer text color to white */
27
  h2, h3, h4, h5, h6, p {{
28
+ color: black; /* Set text color to black */
29
  }}
30
 
31
+ /* Set footer styling */
32
  .footer {{
33
  position: fixed;
34
  bottom: 0;
 
41
  font-size: 14px;
42
  }}
43
 
44
+ /* Set processing button color to green */
45
  .stButton button {{
46
  background-color: green;
47
  color: white;
48
  }}
49
+ /* Set query input block background color to white */
50
  .stTextInput input {{
51
  background-color: white;
52
  color: black;
 
56
  unsafe_allow_html=True
57
  )
58
 
59
+ # Use your Groq API key from Hugging Face Secrets
60
  HUGGINGFACE_KEY = os.getenv("HUGGINGFACE_KEY")
61
+
62
  if not HUGGINGFACE_KEY:
63
+ st.error("Groq API key not found. Please set it in Hugging Face Secrets.")
64
 
65
+ # Initialize Groq client with the correct API key
66
+ groq_client = Groq(api_key=HUGGINGFACE_KEY)
67
 
68
  # Load the SentenceTransformer model for embedding generation
69
  embedder = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
70
 
71
  # Define file path and vector store folder
72
+ file_path = "The Rise of Agentic AI.pdf" # File directly in the root directory of the app
73
+ VECTORSTORE_FOLDER = "vectorstore" # Folder where the FAISS index will be stored
74
 
75
  # Ensure the vector store folder exists
76
  if not os.path.exists(VECTORSTORE_FOLDER):
77
  os.makedirs(VECTORSTORE_FOLDER)
78
 
79
  # Define the vector store path
80
+ vectorstore_path = os.path.join(VECTORSTORE_FOLDER, "index.faiss") # Correct path to the index file
81
 
82
  # Load or create FAISS index
83
  if os.path.exists(vectorstore_path):
84
+ # If the index file exists, read it
85
  try:
86
  index = faiss.read_index(vectorstore_path)
87
  except Exception as e:
88
  st.error(f"Error reading the FAISS index: {e}")
89
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
90
  else:
91
+ # If the index file doesn't exist, create a new one
92
  index = faiss.IndexFlatL2(embedder.get_sentence_embedding_dimension())
93
 
94
  # Variable to hold chunks globally
95
  chunks = []
96
 
97
+ # Function to load text from PDF
98
  def load_pdf_text(file_path):
99
  """Extract text from the given PDF file."""
100
  text = ""
 
103
  text += page.extract_text()
104
  return text
105
 
106
+ # Function to chunk text into smaller pieces
107
  def chunk_text(text, chunk_size=500, overlap=100):
108
  """Chunk the text into overlapping chunks."""
109
  chunks = []
 
111
  chunks.append(text[i:i + chunk_size])
112
  return chunks
113
 
114
+ # Process the document and update vector store
115
  def process_and_store_document(file_path):
116
  """Process the PDF document, chunk text, generate embeddings, and store them in FAISS."""
117
+ global chunks # Make chunks global to access in the query part
118
 
119
  st.info("Processing PDF document...")
120
 
121
+ # Extract text from the PDF file
122
  text = load_pdf_text(file_path)
123
+
124
+ # Chunk the text into smaller pieces
125
  chunks = chunk_text(text)
126
 
127
+ # Generate embeddings for each chunk
128
  embeddings = embedder.encode(chunks, show_progress_bar=True)
129
+
130
+ # Add the embeddings to the FAISS index
131
  index.add(np.array(embeddings))
132
 
133
+ # Save the updated FAISS index
134
  try:
135
  faiss.write_index(index, vectorstore_path)
136
  st.success("Document processed and vector store updated!")
137
  except Exception as e:
138
  st.error(f"Error saving the FAISS index: {e}")
139
 
140
+ # User interface for Streamlit
141
  st.title("The Rise of Agentic AI RAG Application")
142
 
143
+ # Query input for the user
 
 
144
  user_query = st.text_input("Enter your query:")
145
 
146
  if user_query:
147
+ # Check if there are any chunks in the index
148
  if not chunks:
149
  st.error("Please process the document first by clicking 'Process PDF'.")
150
  else:
151
+ # Generate embedding for the user query
152
  query_embedding = embedder.encode([user_query])
153
+
154
+ # Perform the search on the FAISS index
155
  distances, indices = index.search(np.array(query_embedding), k=5)
156
 
157
+ # Check if the indices returned are valid
158
  if indices.size == 0 or np.any(indices[0] == -1):
159
  st.error("No relevant results found in the index.")
160
  else:
161
+ # Ensure indices are within the bounds of the chunks list
162
  valid_indices = [idx for idx in indices[0] if idx < len(chunks)]
163
 
164
  if not valid_indices:
165
  st.error("No valid indices found for the retrieved chunks.")
166
  else:
167
+ # Retrieve the most relevant chunks based on the valid indices
168
  retrieved_chunks = [chunks[idx] for idx in valid_indices]
169
 
170
+ # Display the retrieved chunks
171
  st.subheader("Retrieved Chunks")
172
  for chunk in retrieved_chunks:
173
  st.write(chunk)
174
 
175
+ # Combine the retrieved chunks with the query and generate a response using Groq
176
  combined_input = " ".join(retrieved_chunks) + user_query
177
 
178
+ # Generate a response with Groq
179
  try:
 
180
  chat_completion = groq_client.chat.completions.create(
181
+ messages=[{
182
+ "role": "user",
183
+ "content": combined_input,
184
+ }],
185
+ model="llama3-8b-8192", # Specify the model you want to use
186
  )
187
+
188
+ # Display the generated response
189
  st.subheader("Generated Response")
190
+ st.write(chat_completion.choices[0].message.content)
191
  except Exception as e:
192
  st.error(f"Error generating response: {e}")
193
 
194
+ # Button to trigger document processing
195
+ if st.button("Process PDF"):
196
+ process_and_store_document(file_path)
197
+
198
+ # Footer
199
  st.markdown("<div class='footer'>Created by Shamil Shahbaz</div>", unsafe_allow_html=True)