from langchain_text_splitters import RecursiveCharacterTextSplitter def chunk_text(text, chunk_size=1000, chunk_overlap=200): if not text: return [] text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n\n", "\n", ".", " ", ""] ) chunks = text_splitter.split_text(text) print(f"Split document into {len(chunks)} chunks.") return chunks