Spaces:

anl139
/

test

Sleeping

anl139 commited on Feb 7, 2025

Commit

c7533c2

verified ·

1 Parent(s): 5e3c715

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -67,13 +67,13 @@ def load_and_process_data(file_path: str):
 # Data Loading and Preprocessing
 # -------------------------------
-file_path = './2024data.json'  # Ensure this file is available in your environment.
 docs = load_and_process_data(file_path)
 # Use a text splitter to create chunks from the documents
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 text_splitter = RecursiveCharacterTextSplitter(
-    chunk_size=1000,
     chunk_overlap=150,
     add_start_index=True
 )
@@ -84,24 +84,11 @@ all_splits = text_splitter.split_documents(docs)
 # -------------------------------
 # Create a Chroma vector store using the document splits
-persist_directory = "./chroma_db"
-# Check if the persist directory exists and contains data.
-if os.path.exists(persist_directory) and os.listdir(persist_directory):
-    # Load the persisted vector store
-    vectorstore = Chroma(
-        persist_directory=persist_directory,
-        embedding_function=OpenAIEmbeddings()
-    )
-    print("Loaded vector store from persist directory.")
-else:
-    # Create a new vector store from your document splits and persist it.
-    vectorstore = Chroma.from_documents(
-        documents=all_splits,
-        embedding=OpenAIEmbeddings(),
-        persist_directory=persist_directory
-    )
-    print("Created new vector store and persisted embeddings.")
 # Create a BM25 retriever from the document splits
 bm25_retriever = BM25Retriever.from_documents(all_splits)

 # Data Loading and Preprocessing
 # -------------------------------
+file_path = './data.json'  # Ensure this file is available in your environment.
 docs = load_and_process_data(file_path)
 # Use a text splitter to create chunks from the documents
 from langchain_text_splitters import RecursiveCharacterTextSplitter
 text_splitter = RecursiveCharacterTextSplitter(
+    chunk_size=1500,
     chunk_overlap=150,
     add_start_index=True
 )
 # -------------------------------
 # Create a Chroma vector store using the document splits
+vectorstore = Chroma.from_documents(
+    documents=all_splits,
+    embedding=OpenAIEmbeddings(),
+    persist_directory="./chroma_db"
+)
 # Create a BM25 retriever from the document splits
 bm25_retriever = BM25Retriever.from_documents(all_splits)