Spaces:
Sleeping
Sleeping
| from langchain.text_splitter import ( | |
| RecursiveCharacterTextSplitter, | |
| MarkdownHeaderTextSplitter | |
| ) | |
| recursive_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=2000, | |
| chunk_overlap=200, | |
| length_function=len, | |
| separators=["\n\n", "\n", ". ", " ", ""], | |
| ) | |
| markdown_splitter = MarkdownHeaderTextSplitter( | |
| headers_to_split_on=[ | |
| ("##", "Header 2"), # Main guideline source (NCCN, ESMO, WHO…) | |
| ("###", "Header 3"), # Subsections (Features, Recommendations, Statistics…) | |
| ], | |
| strip_headers=False, | |
| ) | |