anl139 commited on
Commit
c7533c2
·
verified ·
1 Parent(s): 5e3c715

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -20
app.py CHANGED
@@ -67,13 +67,13 @@ def load_and_process_data(file_path: str):
67
  # Data Loading and Preprocessing
68
  # -------------------------------
69
 
70
- file_path = './2024data.json' # Ensure this file is available in your environment.
71
  docs = load_and_process_data(file_path)
72
 
73
  # Use a text splitter to create chunks from the documents
74
  from langchain_text_splitters import RecursiveCharacterTextSplitter
75
  text_splitter = RecursiveCharacterTextSplitter(
76
- chunk_size=1000,
77
  chunk_overlap=150,
78
  add_start_index=True
79
  )
@@ -84,24 +84,11 @@ all_splits = text_splitter.split_documents(docs)
84
  # -------------------------------
85
 
86
  # Create a Chroma vector store using the document splits
87
- persist_directory = "./chroma_db"
88
-
89
- # Check if the persist directory exists and contains data.
90
- if os.path.exists(persist_directory) and os.listdir(persist_directory):
91
- # Load the persisted vector store
92
- vectorstore = Chroma(
93
- persist_directory=persist_directory,
94
- embedding_function=OpenAIEmbeddings()
95
- )
96
- print("Loaded vector store from persist directory.")
97
- else:
98
- # Create a new vector store from your document splits and persist it.
99
- vectorstore = Chroma.from_documents(
100
- documents=all_splits,
101
- embedding=OpenAIEmbeddings(),
102
- persist_directory=persist_directory
103
- )
104
- print("Created new vector store and persisted embeddings.")
105
 
106
  # Create a BM25 retriever from the document splits
107
  bm25_retriever = BM25Retriever.from_documents(all_splits)
 
67
  # Data Loading and Preprocessing
68
  # -------------------------------
69
 
70
+ file_path = './data.json' # Ensure this file is available in your environment.
71
  docs = load_and_process_data(file_path)
72
 
73
  # Use a text splitter to create chunks from the documents
74
  from langchain_text_splitters import RecursiveCharacterTextSplitter
75
  text_splitter = RecursiveCharacterTextSplitter(
76
+ chunk_size=1500,
77
  chunk_overlap=150,
78
  add_start_index=True
79
  )
 
84
  # -------------------------------
85
 
86
  # Create a Chroma vector store using the document splits
87
+ vectorstore = Chroma.from_documents(
88
+ documents=all_splits,
89
+ embedding=OpenAIEmbeddings(),
90
+ persist_directory="./chroma_db"
91
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  # Create a BM25 retriever from the document splits
94
  bm25_retriever = BM25Retriever.from_documents(all_splits)