trykopy / emdedd /Splitter.py
Pavol Liška
v1
869eb7d
raw
history blame
631 Bytes
from langchain.text_splitter import RecursiveCharacterTextSplitter
class Splitter:
separators = []
chunk_overlap: int
chunk_size: int
def __init__(self, separators, chunk_overlap, chunk_size):
self.separators = separators
self.chunk_overlap = chunk_overlap
self.chunk_size = chunk_size
def split(self, text):
text_splitter = RecursiveCharacterTextSplitter(
separators=self.separators,
is_separator_regex=True,
chunk_overlap=self.chunk_overlap,
chunk_size=self.chunk_size
)
return text_splitter.split_text(text)