rag-chatbot / ingestion /chunker.py
Abeshith's picture
RAG Chatbot with LangChain, FastAPI, and service layer architecture
64d7fdf
from langchain_text_splitters import RecursiveCharacterTextSplitter
from app.config import config
from app.utils.logger import logger
class Chunker:
def __init__(self):
chunk_size = config["rag"]["text_splitter"]["chunk_size"]
chunk_overlap = config["rag"]["text_splitter"]["chunk_overlap"]
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len,
is_separator_regex=False
)
logger.info(f"Text splitter initialized: chunk_size={chunk_size}, overlap={chunk_overlap}")
def split_text(self, text: str) -> list[str]:
chunks = self.text_splitter.split_text(text)
logger.info(f"Split text into {len(chunks)} chunks")
return chunks
def split_documents(self, documents: list) -> list:
chunks = self.text_splitter.split_documents(documents)
logger.info(f"Split {len(documents)} documents into {len(chunks)} chunks")
return chunks
chunker = Chunker()