Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -293,26 +293,37 @@ def initialize_chat_from_existing():
|
|
| 293 |
return False
|
| 294 |
|
| 295 |
def initialize_chat_from_collection():
|
| 296 |
-
"""Initialize chat system
|
| 297 |
try:
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
documents = get_all_documents(st.session_state.db_conn)
|
| 305 |
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 307 |
embeddings = get_embeddings_model()
|
| 308 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 309 |
-
chunk_size=
|
| 310 |
-
chunk_overlap=
|
| 311 |
length_function=len,
|
| 312 |
separators=["\n\n", "\n", " ", ""]
|
| 313 |
)
|
| 314 |
|
| 315 |
-
# Process documents into chunks
|
| 316 |
chunks = []
|
| 317 |
for doc in documents:
|
| 318 |
doc_chunks = text_splitter.split_text(doc['content'])
|
|
@@ -322,7 +333,6 @@ def initialize_chat_from_collection():
|
|
| 322 |
'metadata': {'source': doc['name'], 'document_id': doc['id']}
|
| 323 |
})
|
| 324 |
|
| 325 |
-
# Initialize vector store
|
| 326 |
vector_store = FAISS.from_texts(
|
| 327 |
[chunk['content'] for chunk in chunks],
|
| 328 |
embeddings,
|
|
@@ -331,9 +341,10 @@ def initialize_chat_from_collection():
|
|
| 331 |
|
| 332 |
st.session_state.vector_store = vector_store
|
| 333 |
st.session_state.qa_system = initialize_qa_system(vector_store)
|
| 334 |
-
st.session_state.chat_ready = True
|
| 335 |
return True
|
| 336 |
-
|
|
|
|
|
|
|
| 337 |
except Exception as e:
|
| 338 |
st.error(f"Error initializing chat: {e}")
|
| 339 |
return False
|
|
|
|
| 293 |
return False
|
| 294 |
|
| 295 |
def initialize_chat_from_collection():
|
| 296 |
+
"""Initialize chat system with vector store reuse."""
|
| 297 |
try:
|
| 298 |
+
documents = None
|
| 299 |
+
if st.session_state.get('current_collection'):
|
| 300 |
+
documents = get_collection_documents(st.session_state.db_conn,
|
| 301 |
+
st.session_state.current_collection['id'])
|
| 302 |
+
else:
|
| 303 |
+
documents = get_all_documents(st.session_state.db_conn)
|
|
|
|
| 304 |
|
| 305 |
+
if documents:
|
| 306 |
+
document_ids = [doc['id'] for doc in documents]
|
| 307 |
+
|
| 308 |
+
# Check for existing vector store
|
| 309 |
+
vector_store = get_existing_vector_store(document_ids)
|
| 310 |
+
|
| 311 |
+
if vector_store:
|
| 312 |
+
# Reuse existing vector store
|
| 313 |
+
st.session_state.vector_store = vector_store
|
| 314 |
+
st.session_state.qa_system = initialize_qa_system(vector_store)
|
| 315 |
+
return True
|
| 316 |
+
|
| 317 |
+
# If no existing vector store, create new one
|
| 318 |
+
with st.spinner("Initializing chat system..."):
|
| 319 |
embeddings = get_embeddings_model()
|
| 320 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 321 |
+
chunk_size=500,
|
| 322 |
+
chunk_overlap=50,
|
| 323 |
length_function=len,
|
| 324 |
separators=["\n\n", "\n", " ", ""]
|
| 325 |
)
|
| 326 |
|
|
|
|
| 327 |
chunks = []
|
| 328 |
for doc in documents:
|
| 329 |
doc_chunks = text_splitter.split_text(doc['content'])
|
|
|
|
| 333 |
'metadata': {'source': doc['name'], 'document_id': doc['id']}
|
| 334 |
})
|
| 335 |
|
|
|
|
| 336 |
vector_store = FAISS.from_texts(
|
| 337 |
[chunk['content'] for chunk in chunks],
|
| 338 |
embeddings,
|
|
|
|
| 341 |
|
| 342 |
st.session_state.vector_store = vector_store
|
| 343 |
st.session_state.qa_system = initialize_qa_system(vector_store)
|
|
|
|
| 344 |
return True
|
| 345 |
+
|
| 346 |
+
return False
|
| 347 |
+
|
| 348 |
except Exception as e:
|
| 349 |
st.error(f"Error initializing chat: {e}")
|
| 350 |
return False
|