Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +5 -5
functions.py
CHANGED
|
@@ -204,15 +204,15 @@ def create_retriever_from_chroma(vectorstore_path="./docs/chroma/", search_type=
|
|
| 204 |
docs = extract_sentences_from_web(links=urls)
|
| 205 |
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
is_separator_regex = True
|
| 211 |
)
|
| 212 |
-
|
| 213 |
|
| 214 |
|
| 215 |
-
|
| 216 |
documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
|
| 217 |
)
|
| 218 |
|
|
|
|
| 204 |
docs = extract_sentences_from_web(links=urls)
|
| 205 |
|
| 206 |
|
| 207 |
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
| 208 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
| 209 |
+
separators=["\n\n \n\n","\n\n\n", "\n\n", r"In \[[0-9]+\]", r"\n+", r"\s+"],
|
| 210 |
is_separator_regex = True
|
| 211 |
)
|
| 212 |
+
split_docs = text_splitter.split_documents(docs)
|
| 213 |
|
| 214 |
|
| 215 |
+
vectorstore = Chroma.from_documents(
|
| 216 |
documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
|
| 217 |
)
|
| 218 |
|