Spaces:
Sleeping
Sleeping
File size: 535 Bytes
c5e1945 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
MarkdownHeaderTextSplitter
)
recursive_500 = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
length_function=len,
separators=[
"\n\n", # Paragraph breaks
"\n", # Line breaks
".", # Sentences
",", # Clauses
" ", # Words
]
)
markdown_splitter = MarkdownHeaderTextSplitter(
headers_to_split_on=[
("#", "company_title"),
("##", "section"),
]
)
|