Wajahat698 commited on
Commit
f92b7ae
·
verified ·
1 Parent(s): 8248bf9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -242,7 +242,7 @@ def index_document_content(doc_content, doc_id):
242
  texts = text_splitter.split_text(doc_content)
243
 
244
  # Create embeddings for each chunk
245
- embeddings = OpenAIEmbeddings(model="text-embedding-ada-002", api_key=openai_api_key)
246
  doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
247
  vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
248
 
@@ -257,6 +257,7 @@ def index_document_content(doc_content, doc_id):
257
 
258
 
259
 
 
260
  def fetch_trustbuilders(user_id):
261
  """
262
  Retrieve TrustBuilders from Firebase for a specific user.
@@ -546,8 +547,8 @@ def load_main_data_source():
546
 
547
  # Split text into chunks
548
  text_splitter = RecursiveCharacterTextSplitter(
549
- chunk_size=8000, # Keep large sections intact
550
- chunk_overlap=2000, # Large overlap for context retention
551
  )
552
  main_texts = text_splitter.split_text(file_text)
553
 
@@ -572,16 +573,15 @@ def refresh_main_faiss_index():
572
  st.session_state["main_faiss_db"] = FAISS.from_documents(main_sources, embeddings)
573
 
574
  num_docs = len(st.session_state["main_faiss_db"].docstore._dict)
575
- print(f"✅ Main FAISS index loaded with {num_docs} documents.")
 
576
 
577
  def refresh_faiss_index(selected_doc_id=None):
578
  """Refresh FAISS index while keeping the main knowledge base intact."""
579
  if selected_doc_id is None:
580
- print("❌ No document selected. FAISS index was NOT updated.")
581
  return
582
 
583
  if "documents" not in st.session_state or selected_doc_id not in st.session_state["documents"]:
584
- print(f"❌ Selected document ID {selected_doc_id} not found in session state.")
585
  return
586
 
587
  doc_content = st.session_state["documents"][selected_doc_id]["content"]
@@ -590,7 +590,7 @@ def refresh_faiss_index(selected_doc_id=None):
590
  return
591
 
592
  # Create embeddings and index only the selected document
593
- embeddings = OpenAIEmbeddings()
594
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
595
  texts = text_splitter.split_text(doc_content)
596
 
@@ -606,7 +606,7 @@ def refresh_faiss_index(selected_doc_id=None):
606
  st.session_state["faiss_db"] = new_vector_store # ✅ Store only the selected doc
607
 
608
  num_docs = len(st.session_state["faiss_db"].docstore._dict)
609
- print(f"✅ FAISS index updated with only the selected document ({num_docs} chunks).")
610
 
611
 
612
 
 
242
  texts = text_splitter.split_text(doc_content)
243
 
244
  # Create embeddings for each chunk
245
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=client)
246
  doc_metadata = [{"doc_id": doc_id, "chunk_id": i} for i in range(len(texts))]
247
  vector_store = FAISS.from_texts(texts, embeddings, metadatas=doc_metadata)
248
 
 
257
 
258
 
259
 
260
+
261
  def fetch_trustbuilders(user_id):
262
  """
263
  Retrieve TrustBuilders from Firebase for a specific user.
 
547
 
548
  # Split text into chunks
549
  text_splitter = RecursiveCharacterTextSplitter(
550
+ chunk_size=1500, # Keep large sections intact
551
+ chunk_overlap=200, # Large overlap for context retention
552
  )
553
  main_texts = text_splitter.split_text(file_text)
554
 
 
573
  st.session_state["main_faiss_db"] = FAISS.from_documents(main_sources, embeddings)
574
 
575
  num_docs = len(st.session_state["main_faiss_db"].docstore._dict)
576
+
577
+
578
 
579
  def refresh_faiss_index(selected_doc_id=None):
580
  """Refresh FAISS index while keeping the main knowledge base intact."""
581
  if selected_doc_id is None:
 
582
  return
583
 
584
  if "documents" not in st.session_state or selected_doc_id not in st.session_state["documents"]:
 
585
  return
586
 
587
  doc_content = st.session_state["documents"][selected_doc_id]["content"]
 
590
  return
591
 
592
  # Create embeddings and index only the selected document
593
+ embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=client)
594
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
595
  texts = text_splitter.split_text(doc_content)
596
 
 
606
  st.session_state["faiss_db"] = new_vector_store # ✅ Store only the selected doc
607
 
608
  num_docs = len(st.session_state["faiss_db"].docstore._dict)
609
+
610
 
611
 
612