Spaces:

ArturG9
/

Info_Assistant

Sleeping

ArturG9 commited on Sep 9, 2024

Commit

8ac6fa5

verified ·

1 Parent(s): 3ca0178

Update functions.py

Files changed (1) hide show

functions.py CHANGED Viewed

@@ -204,15 +204,15 @@ def create_retriever_from_chroma(vectorstore_path="./docs/chroma/", search_type=
     docs = extract_sentences_from_web(links=urls)
-        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
-        chunk_size=chunk_size, chunk_overlap=chunk_overlap,
-        separators=["\n\n \n\n","\n\n\n", "\n\n", r"In \[[0-9]+\]", r"\n+", r"\s+"],
         is_separator_regex = True
     )
-        split_docs = text_splitter.split_documents(docs)
-        vectorstore = Chroma.from_documents(
             documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
         )

     docs = extract_sentences_from_web(links=urls)
+    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+    chunk_size=chunk_size, chunk_overlap=chunk_overlap,
+    separators=["\n\n \n\n","\n\n\n", "\n\n", r"In \[[0-9]+\]", r"\n+", r"\s+"],
         is_separator_regex = True
     )
+    split_docs = text_splitter.split_documents(docs)
+    vectorstore = Chroma.from_documents(
             documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
         )