ash2203 commited on
Commit
65e37fd
·
verified ·
1 Parent(s): 061c1bf

Update app.py

Browse files

Handling deletion of indexes

Files changed (1) hide show
  1. app.py +59 -37
app.py CHANGED
@@ -58,6 +58,16 @@ def get_session_index_name():
58
  # Combine base name with unique ID, ensuring total length is under 45 chars
59
  return f"{base_name}-{unique_id}" # This will be like "docdb-12345678"
60
 
 
 
 
 
 
 
 
 
 
 
61
  if not st.session_state.initialized:
62
  # Clear everything only on first run or page refresh
63
  if os.path.exists("data"):
@@ -69,14 +79,8 @@ if not st.session_state.initialized:
69
  st.session_state.retriever = None
70
  st.session_state.initialized = True
71
 
72
- # Delete any existing index for this session (in case of page refresh)
73
- try:
74
- pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
75
- index_name = get_session_index_name()
76
- if index_name in pc.list_indexes().names():
77
- pc.delete_index(index_name)
78
- except Exception as e:
79
- st.error(f"Error cleaning up old index: {str(e)}")
80
 
81
  def save_uploaded_file(uploaded_file):
82
  """Save uploaded file to the data directory"""
@@ -93,11 +97,11 @@ def save_uploaded_file(uploaded_file):
93
  if os.path.exists(file_path):
94
  return file_path
95
  else:
96
- st.error(f"File not saved: {file_path}")
97
  return None
98
 
99
  except Exception as e:
100
- st.error(f"Error saving file: {str(e)}")
101
  return None
102
 
103
  def process_documents(uploaded_files_dict):
@@ -108,13 +112,16 @@ def process_documents(uploaded_files_dict):
108
 
109
  try:
110
  with st.spinner('Processing documents...'):
 
 
 
111
  docs = []
112
  # Process each file
113
  for filename, file_info in uploaded_files_dict.items():
114
  file_path = file_info["path"]
115
 
116
  if not os.path.exists(file_path):
117
- st.error(f"File not found: {file_path}")
118
  continue
119
 
120
  if filename.endswith(".pdf"):
@@ -131,7 +138,7 @@ def process_documents(uploaded_files_dict):
131
  docs.extend(file_doc)
132
 
133
  if not docs:
134
- st.error("No documents were successfully processed")
135
  return False
136
 
137
  # Split documents
@@ -150,10 +157,6 @@ def process_documents(uploaded_files_dict):
150
  index_name = get_session_index_name()
151
 
152
  try:
153
- # Recreate the index
154
- if index_name in pc.list_indexes().names():
155
- pc.delete_index(index_name)
156
-
157
  pc.create_index(
158
  name=index_name,
159
  dimension=512,
@@ -178,12 +181,14 @@ def process_documents(uploaded_files_dict):
178
  return True
179
 
180
  except PineconeApiException as e:
181
- st.error("File upload failed! Avoid interrupting document processing by uploading or removing files. Kindly refresh the app to continue.")
 
182
  st.session_state.chat_enabled = False
183
  return False
184
 
185
  except Exception as e:
186
- st.error(f"An error occurred during processing: {str(e)}")
 
187
  st.session_state.chat_enabled = False
188
  return False
189
  finally:
@@ -194,24 +199,35 @@ def doc2str(docs):
194
 
195
  def format_reranked_docs(pc, retriever, question):
196
  """Rerank documents using Pinecone's reranking model"""
197
- relevant_docs = [doc.page_content for doc in retriever.invoke(question) if len(doc.page_content)>5]
198
-
199
- reranked_docs = pc.inference.rerank(
200
- model="pinecone-rerank-v0",
201
- query=question,
202
- documents=relevant_docs,
203
- top_n=3,
204
- return_documents=True
205
- )
 
 
 
 
 
 
 
 
206
 
207
- final_docs = [d.document.text for d in reranked_docs.data]
208
- context = doc2str(final_docs)
209
- return context
 
 
 
 
 
210
 
211
  def run_chatbot(retriever, pc, llm):
212
  """Run the chatbot with the given components"""
213
- # st.markdown("<h4>💬 Chat with your Documents</h4>", unsafe_allow_html=True)
214
-
215
  # Initialize chat prompt
216
  prompt = ChatPromptTemplate.from_template("""
217
  You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
@@ -220,8 +236,11 @@ def run_chatbot(retriever, pc, llm):
220
  {context}
221
  </context>
222
 
223
- Important: Don't start revealing context in your responses until its asked. First look at the question and then think if the context is needed to answer this or its a normal question, once you have judged then only answer the question.
224
-
 
 
 
225
  Answer the following question:
226
 
227
  {question}""")
@@ -262,10 +281,11 @@ def run_chatbot(retriever, pc, llm):
262
  # Add assistant response to chat history
263
  st.session_state.messages.append({"role": "assistant", "content": response})
264
  except Exception as e:
265
- error_msg = f"An error occurred while processing your question: {str(e)}"
266
  with st.chat_message("assistant"):
267
- st.error(error_msg)
268
- st.session_state.messages.append({"role": "assistant", "content": f"❌ {error_msg}"})
 
269
 
270
  def process_and_chat():
271
  """Process documents and handle chat interface"""
@@ -285,6 +305,8 @@ def process_and_chat():
285
  # Check for removed files
286
  files_to_remove = set(st.session_state.uploaded_files.keys()) - current_uploaded_filenames
287
  if files_to_remove:
 
 
288
  for file_name in files_to_remove:
289
  # Remove file from session state
290
  if file_name in st.session_state.uploaded_files:
 
58
  # Combine base name with unique ID, ensuring total length is under 45 chars
59
  return f"{base_name}-{unique_id}" # This will be like "docdb-12345678"
60
 
61
+ def cleanup_pinecone_index():
62
+ """Clean up existing Pinecone index for the current session"""
63
+ try:
64
+ pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
65
+ index_name = get_session_index_name()
66
+ if index_name in pc.list_indexes().names():
67
+ pc.delete_index(index_name)
68
+ except Exception as e:
69
+ print(f"Error cleaning up index: {str(e)}") # Log error internally
70
+
71
  if not st.session_state.initialized:
72
  # Clear everything only on first run or page refresh
73
  if os.path.exists("data"):
 
79
  st.session_state.retriever = None
80
  st.session_state.initialized = True
81
 
82
+ # Clean up any existing index
83
+ cleanup_pinecone_index()
 
 
 
 
 
 
84
 
85
  def save_uploaded_file(uploaded_file):
86
  """Save uploaded file to the data directory"""
 
97
  if os.path.exists(file_path):
98
  return file_path
99
  else:
100
+ print(f"File not saved: {file_path}") # Log error internally
101
  return None
102
 
103
  except Exception as e:
104
+ print(f"Error saving file: {str(e)}") # Log error internally
105
  return None
106
 
107
  def process_documents(uploaded_files_dict):
 
112
 
113
  try:
114
  with st.spinner('Processing documents...'):
115
+ # Clean up existing index before processing
116
+ cleanup_pinecone_index()
117
+
118
  docs = []
119
  # Process each file
120
  for filename, file_info in uploaded_files_dict.items():
121
  file_path = file_info["path"]
122
 
123
  if not os.path.exists(file_path):
124
+ print(f"File not found: {file_path}") # Log error internally
125
  continue
126
 
127
  if filename.endswith(".pdf"):
 
138
  docs.extend(file_doc)
139
 
140
  if not docs:
141
+ st.warning("Unable to process the documents. Please try again.")
142
  return False
143
 
144
  # Split documents
 
157
  index_name = get_session_index_name()
158
 
159
  try:
 
 
 
 
160
  pc.create_index(
161
  name=index_name,
162
  dimension=512,
 
181
  return True
182
 
183
  except PineconeApiException as e:
184
+ print(f"Pinecone API error: {str(e)}") # Log error internally
185
+ st.warning("Unable to process documents at the moment. Please try again.")
186
  st.session_state.chat_enabled = False
187
  return False
188
 
189
  except Exception as e:
190
+ print(f"Processing error: {str(e)}") # Log error internally
191
+ st.warning("Unable to process documents at the moment. Please try again.")
192
  st.session_state.chat_enabled = False
193
  return False
194
  finally:
 
199
 
200
  def format_reranked_docs(pc, retriever, question):
201
  """Rerank documents using Pinecone's reranking model"""
202
+ # Get relevant docs and ensure they're not empty
203
+ relevant_docs = [doc.page_content for doc in retriever.invoke(question) if doc.page_content.strip()]
204
+
205
+ if not relevant_docs:
206
+ return "I don't have enough context to answer this question."
207
+
208
+ try:
209
+ # Format documents for reranking
210
+ formatted_docs = [{"text": doc} for doc in relevant_docs]
211
+
212
+ reranked_docs = pc.inference.rerank(
213
+ model="pinecone-rerank-v0",
214
+ query=question,
215
+ documents=formatted_docs,
216
+ top_n=3,
217
+ return_documents=True
218
+ )
219
 
220
+ # Extract text from reranked documents
221
+ final_docs = [d.document["text"] for d in reranked_docs.data]
222
+ context = "\n\n".join(final_docs)
223
+ return context
224
+ except Exception as e:
225
+ print(f"Error during reranking: {str(e)}") # Log error internally
226
+ # Fallback to using retrieved docs without reranking
227
+ return "\n\n".join(relevant_docs[:3])
228
 
229
  def run_chatbot(retriever, pc, llm):
230
  """Run the chatbot with the given components"""
 
 
231
  # Initialize chat prompt
232
  prompt = ChatPromptTemplate.from_template("""
233
  You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.
 
236
  {context}
237
  </context>
238
 
239
+ <important>
240
+ Don't start revealing context in your responses until its asked. First look at the question and then think if the context is needed to answer this or its a normal question, once you have judged then only answer the question.
241
+ When there is no context, just respond on your own knowledge as a normal assistant.
242
+ </important>
243
+
244
  Answer the following question:
245
 
246
  {question}""")
 
281
  # Add assistant response to chat history
282
  st.session_state.messages.append({"role": "assistant", "content": response})
283
  except Exception as e:
284
+ print(f"Chat error: {str(e)}") # Log error internally
285
  with st.chat_message("assistant"):
286
+ error_msg = "I'm having trouble processing your question. Please try asking something else."
287
+ st.markdown(error_msg)
288
+ st.session_state.messages.append({"role": "assistant", "content": error_msg})
289
 
290
  def process_and_chat():
291
  """Process documents and handle chat interface"""
 
305
  # Check for removed files
306
  files_to_remove = set(st.session_state.uploaded_files.keys()) - current_uploaded_filenames
307
  if files_to_remove:
308
+ # Clean up index when files are removed
309
+ cleanup_pinecone_index()
310
  for file_name in files_to_remove:
311
  # Remove file from session state
312
  if file_name in st.session_state.uploaded_files: