added seperator arg
Browse files- ChromaDBFlow.py +3 -1
- ChromaDBFlow.yaml +1 -0
ChromaDBFlow.py
CHANGED
|
@@ -42,6 +42,7 @@ class ChromaDBFlow(AtomicFlow):
|
|
| 42 |
- `filter` (str): The filter to apply to the documents. Default: null
|
| 43 |
- `paths_to_data` (List[str]): The paths to the data to store in the database at instantiation. Default: []
|
| 44 |
- `chunk_size` (int): The size of the chunks to split the documents into. Default: 700
|
|
|
|
| 45 |
- `chunk_overlap` (int): The overlap between the chunks. Default: 0
|
| 46 |
- `persist_directory` (str): The directory to persist the database. Default: "./demo_db_dir"
|
| 47 |
|
|
@@ -133,7 +134,8 @@ class ChromaDBFlow(AtomicFlow):
|
|
| 133 |
full_docs = []
|
| 134 |
text_splitter = CharacterTextSplitter(
|
| 135 |
chunk_size=self.flow_config["chunk_size"],
|
| 136 |
-
chunk_overlap=self.flow_config["chunk_overlap"]
|
|
|
|
| 137 |
)
|
| 138 |
|
| 139 |
for path in self.flow_config["paths_to_data"]:
|
|
|
|
| 42 |
- `filter` (str): The filter to apply to the documents. Default: null
|
| 43 |
- `paths_to_data` (List[str]): The paths to the data to store in the database at instantiation. Default: []
|
| 44 |
- `chunk_size` (int): The size of the chunks to split the documents into. Default: 700
|
| 45 |
+
- `seperator` (str): The separator to use to split the documents. Default: "\n"
|
| 46 |
- `chunk_overlap` (int): The overlap between the chunks. Default: 0
|
| 47 |
- `persist_directory` (str): The directory to persist the database. Default: "./demo_db_dir"
|
| 48 |
|
|
|
|
| 134 |
full_docs = []
|
| 135 |
text_splitter = CharacterTextSplitter(
|
| 136 |
chunk_size=self.flow_config["chunk_size"],
|
| 137 |
+
chunk_overlap=self.flow_config["chunk_overlap"],
|
| 138 |
+
separator=self.flow_config["separator"]
|
| 139 |
)
|
| 140 |
|
| 141 |
for path in self.flow_config["paths_to_data"]:
|
ChromaDBFlow.yaml
CHANGED
|
@@ -16,4 +16,5 @@ output_interface:
|
|
| 16 |
paths_to_data: []
|
| 17 |
chunk_size: 700
|
| 18 |
chunk_overlap: 0
|
|
|
|
| 19 |
persist_directory: ./demo_db_dir
|
|
|
|
| 16 |
paths_to_data: []
|
| 17 |
chunk_size: 700
|
| 18 |
chunk_overlap: 0
|
| 19 |
+
separator: "\n"
|
| 20 |
persist_directory: ./demo_db_dir
|