| """**Text Splitters** are classes for splitting text. |
| |
| **Class hierarchy:** |
| |
| .. code-block:: |
| |
| BaseDocumentTransformer --> TextSplitter --> <name>TextSplitter # Example: CharacterTextSplitter |
| RecursiveCharacterTextSplitter --> <name>TextSplitter |
| |
| Note: **MarkdownHeaderTextSplitter** and **HTMLHeaderTextSplitter do not derive from TextSplitter. |
| |
| |
| **Main helpers:** |
| |
| .. code-block:: |
| |
| Document, Tokenizer, Language, LineType, HeaderType |
| |
| """ |
|
|
| from langchain_text_splitters.base import ( |
| Language, |
| TextSplitter, |
| Tokenizer, |
| TokenTextSplitter, |
| split_text_on_tokens, |
| ) |
| from langchain_text_splitters.character import ( |
| CharacterTextSplitter, |
| RecursiveCharacterTextSplitter, |
| ) |
| from langchain_text_splitters.html import ( |
| ElementType, |
| HTMLHeaderTextSplitter, |
| HTMLSectionSplitter, |
| HTMLSemanticPreservingSplitter, |
| ) |
| from langchain_text_splitters.json import RecursiveJsonSplitter |
| from langchain_text_splitters.jsx import JSFrameworkTextSplitter |
| from langchain_text_splitters.konlpy import KonlpyTextSplitter |
| from langchain_text_splitters.latex import LatexTextSplitter |
| from langchain_text_splitters.markdown import ( |
| ExperimentalMarkdownSyntaxTextSplitter, |
| HeaderType, |
| LineType, |
| MarkdownHeaderTextSplitter, |
| MarkdownTextSplitter, |
| ) |
| from langchain_text_splitters.nltk import NLTKTextSplitter |
| from langchain_text_splitters.python import PythonCodeTextSplitter |
| from langchain_text_splitters.sentence_transformers import ( |
| SentenceTransformersTokenTextSplitter, |
| ) |
| from langchain_text_splitters.spacy import SpacyTextSplitter |
|
|
| __all__ = [ |
| "TokenTextSplitter", |
| "TextSplitter", |
| "Tokenizer", |
| "Language", |
| "RecursiveCharacterTextSplitter", |
| "RecursiveJsonSplitter", |
| "LatexTextSplitter", |
| "JSFrameworkTextSplitter", |
| "PythonCodeTextSplitter", |
| "KonlpyTextSplitter", |
| "SpacyTextSplitter", |
| "NLTKTextSplitter", |
| "split_text_on_tokens", |
| "SentenceTransformersTokenTextSplitter", |
| "ElementType", |
| "HeaderType", |
| "LineType", |
| "HTMLHeaderTextSplitter", |
| "HTMLSectionSplitter", |
| "HTMLSemanticPreservingSplitter", |
| "MarkdownHeaderTextSplitter", |
| "MarkdownTextSplitter", |
| "CharacterTextSplitter", |
| "ExperimentalMarkdownSyntaxTextSplitter", |
| ] |
|
|