from langchain.text_splitter import ( RecursiveCharacterTextSplitter, MarkdownHeaderTextSplitter ) recursive_splitter = RecursiveCharacterTextSplitter( chunk_size=2000, chunk_overlap=200, length_function=len, separators=["\n\n", "\n", ". ", " ", ""], ) markdown_splitter = MarkdownHeaderTextSplitter( headers_to_split_on=[ ("##", "Header 2"), # Main guideline source (NCCN, ESMO, WHO…) ("###", "Header 3"), # Subsections (Features, Recommendations, Statistics…) ], strip_headers=False, )