Spaces:

trustlogic
/

Live

Sleeping

App Files Files Community

Wajahat698 commited on Apr 28, 2025

Commit

f92b7ae

verified ·

1 Parent(s): 8248bf9

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -242,7 +242,7 @@ def index_document_content(doc_content, doc_id):
     texts = text_splitter.split_text(doc_content)
     # Create embeddings for each chunk
-    embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", api_key=openai_api_key)
     doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
     vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
@@ -257,6 +257,7 @@ def index_document_content(doc_content, doc_id):
 def fetch_trustbuilders(user_id):
     """
     Retrieve TrustBuilders from Firebase for a specific user.
@@ -546,8 +547,8 @@ def load_main_data_source():
         # Split text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=8000,  # Keep large sections intact
-            chunk_overlap=2000,  # Large overlap for context retention
         )
         main_texts = text_splitter.split_text(file_text)
@@ -572,16 +573,15 @@ def refresh_main_faiss_index():
     st.session_state["main_faiss_db"] = FAISS.from_documents(main_sources, embeddings)
     num_docs = len(st.session_state["main_faiss_db"].docstore._dict)
-    print(f"✅ Main FAISS index loaded with {num_docs} documents.")
 def refresh_faiss_index(selected_doc_id=None):
     """Refresh FAISS index while keeping the main knowledge base intact."""
     if selected_doc_id is None:
-        print("❌ No document selected. FAISS index was NOT updated.")
         return
     if "documents" not in st.session_state or selected_doc_id not in st.session_state["documents"]:
-        print(f"❌ Selected document ID {selected_doc_id} not found in session state.")
         return
     doc_content = st.session_state["documents"][selected_doc_id]["content"]
@@ -590,7 +590,7 @@ def refresh_faiss_index(selected_doc_id=None):
         return
     # Create embeddings and index only the selected document
-    embeddings = OpenAIEmbeddings()
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
     texts = text_splitter.split_text(doc_content)
@@ -606,7 +606,7 @@ def refresh_faiss_index(selected_doc_id=None):
         st.session_state["faiss_db"] = new_vector_store  # ✅ Store only the selected doc
     num_docs = len(st.session_state["faiss_db"].docstore._dict)
-    print(f"✅ FAISS index updated with only the selected document ({num_docs} chunks).")

     texts = text_splitter.split_text(doc_content)
     # Create embeddings for each chunk
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=client)
     doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
     vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
 def fetch_trustbuilders(user_id):
     """
     Retrieve TrustBuilders from Firebase for a specific user.
         # Split text into chunks
         text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1500,  # Keep large sections intact
+            chunk_overlap=200,  # Large overlap for context retention
         )
         main_texts = text_splitter.split_text(file_text)
     st.session_state["main_faiss_db"] = FAISS.from_documents(main_sources, embeddings)
     num_docs = len(st.session_state["main_faiss_db"].docstore._dict)
 def refresh_faiss_index(selected_doc_id=None):
     """Refresh FAISS index while keeping the main knowledge base intact."""
     if selected_doc_id is None:
         return
     if "documents" not in st.session_state or selected_doc_id not in st.session_state["documents"]:
         return
     doc_content = st.session_state["documents"][selected_doc_id]["content"]
         return
     # Create embeddings and index only the selected document
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=client)
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
     texts = text_splitter.split_text(doc_content)
         st.session_state["faiss_db"] = new_vector_store  # ✅ Store only the selected doc
     num_docs = len(st.session_state["faiss_db"].docstore._dict)