Spaces:
Running
Running
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| def chunk_text(text, chunk_size=1000, chunk_overlap=200): | |
| if not text: | |
| return [] | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=chunk_size, | |
| chunk_overlap=chunk_overlap, | |
| separators=["\n\n", "\n", ".", " ", ""] | |
| ) | |
| chunks = text_splitter.split_text(text) | |
| print(f"Split document into {len(chunks)} chunks.") | |
| return chunks |