Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -242,7 +242,7 @@ def index_document_content(doc_content, doc_id):
|
|
| 242 |
texts = text_splitter.split_text(doc_content)
|
| 243 |
|
| 244 |
# Create embeddings for each chunk
|
| 245 |
-
embeddings = OpenAIEmbeddings(model="text-embedding-
|
| 246 |
doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
|
| 247 |
vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
|
| 248 |
|
|
@@ -257,6 +257,7 @@ def index_document_content(doc_content, doc_id):
|
|
| 257 |
|
| 258 |
|
| 259 |
|
|
|
|
| 260 |
def fetch_trustbuilders(user_id):
|
| 261 |
"""
|
| 262 |
Retrieve TrustBuilders from Firebase for a specific user.
|
|
@@ -546,8 +547,8 @@ def load_main_data_source():
|
|
| 546 |
|
| 547 |
# Split text into chunks
|
| 548 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 549 |
-
chunk_size=
|
| 550 |
-
chunk_overlap=
|
| 551 |
)
|
| 552 |
main_texts = text_splitter.split_text(file_text)
|
| 553 |
|
|
@@ -572,16 +573,15 @@ def refresh_main_faiss_index():
|
|
| 572 |
st.session_state["main_faiss_db"] = FAISS.from_documents(main_sources, embeddings)
|
| 573 |
|
| 574 |
num_docs = len(st.session_state["main_faiss_db"].docstore._dict)
|
| 575 |
-
|
|
|
|
| 576 |
|
| 577 |
def refresh_faiss_index(selected_doc_id=None):
|
| 578 |
"""Refresh FAISS index while keeping the main knowledge base intact."""
|
| 579 |
if selected_doc_id is None:
|
| 580 |
-
print("❌ No document selected. FAISS index was NOT updated.")
|
| 581 |
return
|
| 582 |
|
| 583 |
if "documents" not in st.session_state or selected_doc_id not in st.session_state["documents"]:
|
| 584 |
-
print(f"❌ Selected document ID {selected_doc_id} not found in session state.")
|
| 585 |
return
|
| 586 |
|
| 587 |
doc_content = st.session_state["documents"][selected_doc_id]["content"]
|
|
@@ -590,7 +590,7 @@ def refresh_faiss_index(selected_doc_id=None):
|
|
| 590 |
return
|
| 591 |
|
| 592 |
# Create embeddings and index only the selected document
|
| 593 |
-
embeddings = OpenAIEmbeddings()
|
| 594 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
|
| 595 |
texts = text_splitter.split_text(doc_content)
|
| 596 |
|
|
@@ -606,7 +606,7 @@ def refresh_faiss_index(selected_doc_id=None):
|
|
| 606 |
st.session_state["faiss_db"] = new_vector_store # ✅ Store only the selected doc
|
| 607 |
|
| 608 |
num_docs = len(st.session_state["faiss_db"].docstore._dict)
|
| 609 |
-
|
| 610 |
|
| 611 |
|
| 612 |
|
|
|
|
| 242 |
texts = text_splitter.split_text(doc_content)
|
| 243 |
|
| 244 |
# Create embeddings for each chunk
|
| 245 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=client)
|
| 246 |
doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
|
| 247 |
vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
|
| 248 |
|
|
|
|
| 257 |
|
| 258 |
|
| 259 |
|
| 260 |
+
|
| 261 |
def fetch_trustbuilders(user_id):
|
| 262 |
"""
|
| 263 |
Retrieve TrustBuilders from Firebase for a specific user.
|
|
|
|
| 547 |
|
| 548 |
# Split text into chunks
|
| 549 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 550 |
+
chunk_size=1500, # Keep large sections intact
|
| 551 |
+
chunk_overlap=200, # Large overlap for context retention
|
| 552 |
)
|
| 553 |
main_texts = text_splitter.split_text(file_text)
|
| 554 |
|
|
|
|
| 573 |
st.session_state["main_faiss_db"] = FAISS.from_documents(main_sources, embeddings)
|
| 574 |
|
| 575 |
num_docs = len(st.session_state["main_faiss_db"].docstore._dict)
|
| 576 |
+
|
| 577 |
+
|
| 578 |
|
| 579 |
def refresh_faiss_index(selected_doc_id=None):
|
| 580 |
"""Refresh FAISS index while keeping the main knowledge base intact."""
|
| 581 |
if selected_doc_id is None:
|
|
|
|
| 582 |
return
|
| 583 |
|
| 584 |
if "documents" not in st.session_state or selected_doc_id not in st.session_state["documents"]:
|
|
|
|
| 585 |
return
|
| 586 |
|
| 587 |
doc_content = st.session_state["documents"][selected_doc_id]["content"]
|
|
|
|
| 590 |
return
|
| 591 |
|
| 592 |
# Create embeddings and index only the selected document
|
| 593 |
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=client)
|
| 594 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
|
| 595 |
texts = text_splitter.split_text(doc_content)
|
| 596 |
|
|
|
|
| 606 |
st.session_state["faiss_db"] = new_vector_store # ✅ Store only the selected doc
|
| 607 |
|
| 608 |
num_docs = len(st.session_state["faiss_db"].docstore._dict)
|
| 609 |
+
|
| 610 |
|
| 611 |
|
| 612 |
|