cryogenic22 commited on
Commit
786a572
·
verified ·
1 Parent(s): 499ee3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -15
app.py CHANGED
@@ -293,26 +293,37 @@ def initialize_chat_from_existing():
293
  return False
294
 
295
  def initialize_chat_from_collection():
296
- """Initialize chat system from existing collection or documents."""
297
  try:
298
- if not st.session_state.chat_ready:
299
- documents = None
300
- if st.session_state.get('current_collection'):
301
- documents = get_collection_documents(st.session_state.db_conn,
302
- st.session_state.current_collection['id'])
303
- else:
304
- documents = get_all_documents(st.session_state.db_conn)
305
 
306
- if documents:
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  embeddings = get_embeddings_model()
308
  text_splitter = RecursiveCharacterTextSplitter(
309
- chunk_size=700,
310
- chunk_overlap=100,
311
  length_function=len,
312
  separators=["\n\n", "\n", " ", ""]
313
  )
314
 
315
- # Process documents into chunks
316
  chunks = []
317
  for doc in documents:
318
  doc_chunks = text_splitter.split_text(doc['content'])
@@ -322,7 +333,6 @@ def initialize_chat_from_collection():
322
  'metadata': {'source': doc['name'], 'document_id': doc['id']}
323
  })
324
 
325
- # Initialize vector store
326
  vector_store = FAISS.from_texts(
327
  [chunk['content'] for chunk in chunks],
328
  embeddings,
@@ -331,9 +341,10 @@ def initialize_chat_from_collection():
331
 
332
  st.session_state.vector_store = vector_store
333
  st.session_state.qa_system = initialize_qa_system(vector_store)
334
- st.session_state.chat_ready = True
335
  return True
336
- return st.session_state.chat_ready
 
 
337
  except Exception as e:
338
  st.error(f"Error initializing chat: {e}")
339
  return False
 
293
  return False
294
 
295
  def initialize_chat_from_collection():
296
+ """Initialize chat system with vector store reuse."""
297
  try:
298
+ documents = None
299
+ if st.session_state.get('current_collection'):
300
+ documents = get_collection_documents(st.session_state.db_conn,
301
+ st.session_state.current_collection['id'])
302
+ else:
303
+ documents = get_all_documents(st.session_state.db_conn)
 
304
 
305
+ if documents:
306
+ document_ids = [doc['id'] for doc in documents]
307
+
308
+ # Check for existing vector store
309
+ vector_store = get_existing_vector_store(document_ids)
310
+
311
+ if vector_store:
312
+ # Reuse existing vector store
313
+ st.session_state.vector_store = vector_store
314
+ st.session_state.qa_system = initialize_qa_system(vector_store)
315
+ return True
316
+
317
+ # If no existing vector store, create new one
318
+ with st.spinner("Initializing chat system..."):
319
  embeddings = get_embeddings_model()
320
  text_splitter = RecursiveCharacterTextSplitter(
321
+ chunk_size=500,
322
+ chunk_overlap=50,
323
  length_function=len,
324
  separators=["\n\n", "\n", " ", ""]
325
  )
326
 
 
327
  chunks = []
328
  for doc in documents:
329
  doc_chunks = text_splitter.split_text(doc['content'])
 
333
  'metadata': {'source': doc['name'], 'document_id': doc['id']}
334
  })
335
 
 
336
  vector_store = FAISS.from_texts(
337
  [chunk['content'] for chunk in chunks],
338
  embeddings,
 
341
 
342
  st.session_state.vector_store = vector_store
343
  st.session_state.qa_system = initialize_qa_system(vector_store)
 
344
  return True
345
+
346
+ return False
347
+
348
  except Exception as e:
349
  st.error(f"Error initializing chat: {e}")
350
  return False