from langchain_text_splitters import RecursiveCharacterTextSplitter from app.config import config from app.utils.logger import logger class Chunker: def __init__(self): chunk_size = config["rag"]["text_splitter"]["chunk_size"] chunk_overlap = config["rag"]["text_splitter"]["chunk_overlap"] self.text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, is_separator_regex=False ) logger.info(f"Text splitter initialized: chunk_size={chunk_size}, overlap={chunk_overlap}") def split_text(self, text: str) -> list[str]: chunks = self.text_splitter.split_text(text) logger.info(f"Split text into {len(chunks)} chunks") return chunks def split_documents(self, documents: list) -> list: chunks = self.text_splitter.split_documents(documents) logger.info(f"Split {len(documents)} documents into {len(chunks)} chunks") return chunks chunker = Chunker()