anl139 commited on
Commit
6d0b309
·
verified ·
1 Parent(s): a3052bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -2
app.py CHANGED
@@ -78,6 +78,7 @@ text_splitter = RecursiveCharacterTextSplitter(
78
  chunk_size=1600,
79
  chunk_overlap=150,
80
  add_start_index=True
 
81
  )
82
  def split_document_with_metadata(document):
83
  # Split the document text into chunks.
@@ -110,10 +111,10 @@ vectorstore = Chroma.from_documents(
110
  print("Created new vector store and persisted embeddings.")
111
 
112
  # Create a BM25 retriever from the document splits.
113
- bm25_retriever = BM25Retriever.from_documents(all_splits)
114
  ensemble_retriever = EnsembleRetriever(
115
  retrievers=[
116
- vectorstore.as_retriever(),
117
  bm25_retriever
118
  ],
119
  weights=[0.9, 0.1]
 
78
  chunk_size=1600,
79
  chunk_overlap=150,
80
  add_start_index=True
81
+ separators=["\n\n", "\n", ". ", " ", ""]
82
  )
83
  def split_document_with_metadata(document):
84
  # Split the document text into chunks.
 
111
  print("Created new vector store and persisted embeddings.")
112
 
113
  # Create a BM25 retriever from the document splits.
114
+ bm25_retriever = BM25Retriever.from_documents(all_splits,search_kwargs={"score_threshold": 0.7})
115
  ensemble_retriever = EnsembleRetriever(
116
  retrievers=[
117
+ vectorstore.as_retriever(search_kwargs={"score_threshold": 0.5},),
118
  bm25_retriever
119
  ],
120
  weights=[0.9, 0.1]