Update app.py
Browse files
app.py
CHANGED
|
@@ -67,13 +67,13 @@ def load_and_process_data(file_path: str):
|
|
| 67 |
# Data Loading and Preprocessing
|
| 68 |
# -------------------------------
|
| 69 |
|
| 70 |
-
file_path = './
|
| 71 |
docs = load_and_process_data(file_path)
|
| 72 |
|
| 73 |
# Use a text splitter to create chunks from the documents
|
| 74 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 75 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 76 |
-
chunk_size=
|
| 77 |
chunk_overlap=150,
|
| 78 |
add_start_index=True
|
| 79 |
)
|
|
@@ -84,24 +84,11 @@ all_splits = text_splitter.split_documents(docs)
|
|
| 84 |
# -------------------------------
|
| 85 |
|
| 86 |
# Create a Chroma vector store using the document splits
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
vectorstore = Chroma(
|
| 93 |
-
persist_directory=persist_directory,
|
| 94 |
-
embedding_function=OpenAIEmbeddings()
|
| 95 |
-
)
|
| 96 |
-
print("Loaded vector store from persist directory.")
|
| 97 |
-
else:
|
| 98 |
-
# Create a new vector store from your document splits and persist it.
|
| 99 |
-
vectorstore = Chroma.from_documents(
|
| 100 |
-
documents=all_splits,
|
| 101 |
-
embedding=OpenAIEmbeddings(),
|
| 102 |
-
persist_directory=persist_directory
|
| 103 |
-
)
|
| 104 |
-
print("Created new vector store and persisted embeddings.")
|
| 105 |
|
| 106 |
# Create a BM25 retriever from the document splits
|
| 107 |
bm25_retriever = BM25Retriever.from_documents(all_splits)
|
|
|
|
| 67 |
# Data Loading and Preprocessing
|
| 68 |
# -------------------------------
|
| 69 |
|
| 70 |
+
file_path = './data.json' # Ensure this file is available in your environment.
|
| 71 |
docs = load_and_process_data(file_path)
|
| 72 |
|
| 73 |
# Use a text splitter to create chunks from the documents
|
| 74 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 75 |
text_splitter = RecursiveCharacterTextSplitter(
|
| 76 |
+
chunk_size=1500,
|
| 77 |
chunk_overlap=150,
|
| 78 |
add_start_index=True
|
| 79 |
)
|
|
|
|
| 84 |
# -------------------------------
|
| 85 |
|
| 86 |
# Create a Chroma vector store using the document splits
|
| 87 |
+
vectorstore = Chroma.from_documents(
|
| 88 |
+
documents=all_splits,
|
| 89 |
+
embedding=OpenAIEmbeddings(),
|
| 90 |
+
persist_directory="./chroma_db"
|
| 91 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
# Create a BM25 retriever from the document splits
|
| 94 |
bm25_retriever = BM25Retriever.from_documents(all_splits)
|