# split_documents.py – v2 from langchain_text_splitters import RecursiveCharacterTextSplitter CHUNK_SIZE = 1500 CHUNK_OVERLAP = 200 def split_documents(docs): splitter = RecursiveCharacterTextSplitter( chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP, separators=["\n\n", "\n", ". ", " ", ""], ) chunks = splitter.split_documents(docs) for c in chunks: c.metadata["chunk_size"] = CHUNK_SIZE c.metadata["chunk_overlap"] = CHUNK_OVERLAP return chunks if __name__ == "__main__": from load_documents import load_documents docs = load_documents() chunks = split_documents(docs) print("Docs:", len(docs), "Chunks:", len(chunks)) print(chunks[0].page_content[:300], chunks[0].metadata)