| | from typing import Any |
| |
|
| | from langchain_text_splitters import RecursiveCharacterTextSplitter, TextSplitter |
| |
|
| | from langflow.base.textsplitters.model import LCTextSplitterComponent |
| | from langflow.inputs.inputs import DataInput, IntInput, MessageTextInput |
| | from langflow.utils.util import unescape_string |
| |
|
| |
|
| | class RecursiveCharacterTextSplitterComponent(LCTextSplitterComponent): |
| | display_name: str = "Recursive Character Text Splitter" |
| | description: str = "Split text trying to keep all related text together." |
| | documentation: str = "https://docs.langflow.org/components/text-splitters#recursivecharactertextsplitter" |
| | name = "RecursiveCharacterTextSplitter" |
| | icon = "LangChain" |
| |
|
| | inputs = [ |
| | IntInput( |
| | name="chunk_size", |
| | display_name="Chunk Size", |
| | info="The maximum length of each chunk.", |
| | value=1000, |
| | ), |
| | IntInput( |
| | name="chunk_overlap", |
| | display_name="Chunk Overlap", |
| | info="The amount of overlap between chunks.", |
| | value=200, |
| | ), |
| | DataInput( |
| | name="data_input", |
| | display_name="Input", |
| | info="The texts to split.", |
| | input_types=["Document", "Data"], |
| | ), |
| | MessageTextInput( |
| | name="separators", |
| | display_name="Separators", |
| | info='The characters to split on.\nIf left empty defaults to ["\\n\\n", "\\n", " ", ""].', |
| | is_list=True, |
| | ), |
| | ] |
| |
|
| | def get_data_input(self) -> Any: |
| | return self.data_input |
| |
|
| | def build_text_splitter(self) -> TextSplitter: |
| | if not self.separators: |
| | separators: list[str] | None = None |
| | else: |
| | |
| | |
| | separators = [unescape_string(x) for x in self.separators] |
| |
|
| | return RecursiveCharacterTextSplitter( |
| | separators=separators, |
| | chunk_size=self.chunk_size, |
| | chunk_overlap=self.chunk_overlap, |
| | ) |
| |
|