Update app.py
Browse files
app.py
CHANGED
|
@@ -78,6 +78,7 @@ text_splitter = RecursiveCharacterTextSplitter(
|
|
| 78 |
chunk_size=1600,
|
| 79 |
chunk_overlap=150,
|
| 80 |
add_start_index=True
|
|
|
|
| 81 |
)
|
| 82 |
def split_document_with_metadata(document):
|
| 83 |
# Split the document text into chunks.
|
|
@@ -110,10 +111,10 @@ vectorstore = Chroma.from_documents(
|
|
| 110 |
print("Created new vector store and persisted embeddings.")
|
| 111 |
|
| 112 |
# Create a BM25 retriever from the document splits.
|
| 113 |
-
bm25_retriever = BM25Retriever.from_documents(all_splits)
|
| 114 |
ensemble_retriever = EnsembleRetriever(
|
| 115 |
retrievers=[
|
| 116 |
-
vectorstore.as_retriever(),
|
| 117 |
bm25_retriever
|
| 118 |
],
|
| 119 |
weights=[0.9, 0.1]
|
|
|
|
| 78 |
chunk_size=1600,
|
| 79 |
chunk_overlap=150,
|
| 80 |
add_start_index=True
|
| 81 |
+
separators=["\n\n", "\n", ". ", " ", ""]
|
| 82 |
)
|
| 83 |
def split_document_with_metadata(document):
|
| 84 |
# Split the document text into chunks.
|
|
|
|
| 111 |
print("Created new vector store and persisted embeddings.")
|
| 112 |
|
| 113 |
# Create a BM25 retriever from the document splits.
|
| 114 |
+
bm25_retriever = BM25Retriever.from_documents(all_splits,search_kwargs={"score_threshold": 0.7})
|
| 115 |
ensemble_retriever = EnsembleRetriever(
|
| 116 |
retrievers=[
|
| 117 |
+
vectorstore.as_retriever(search_kwargs={"score_threshold": 0.5},),
|
| 118 |
bm25_retriever
|
| 119 |
],
|
| 120 |
weights=[0.9, 0.1]
|