Spaces:
Running
Running
File size: 570 Bytes
2a8faae 0a5dcf9 2a8faae |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
from langchain.text_splitter import (
RecursiveCharacterTextSplitter,
MarkdownHeaderTextSplitter
)
recursive_splitter = RecursiveCharacterTextSplitter(
chunk_size=3500,
chunk_overlap=400,
length_function=len,
separators=["\n\n", "\n", ". ", " ", ""],
)
markdown_splitter = MarkdownHeaderTextSplitter(
headers_to_split_on=[
("##", "Header 2"), # Main guideline source (NCCN, ESMO, WHO…)
("###", "Header 3"), # Subsections (Features, Recommendations, Statistics…)
],
strip_headers=False,
)
|