rag-chatbot / tests /test_chunker.py
Abeshith's picture
RAG Chatbot with LangChain, FastAPI, and service layer architecture
64d7fdf
raw
history blame contribute delete
908 Bytes
import pytest
from langchain_core.documents import Document
from ingestion.chunker import chunker
@pytest.mark.unit
class TestChunker:
def test_split_documents(self, sample_document_text):
docs = [Document(page_content=sample_document_text * 10)]
chunks = chunker.split_documents(docs)
assert len(chunks) > 0
assert all(isinstance(chunk, Document) for chunk in chunks)
assert all(len(chunk.page_content) <= 512 + 50 for chunk in chunks)
def test_chunk_metadata_preserved(self):
doc = Document(
page_content="This is a test document. " * 100,
metadata={"source": "test.pdf", "page": 1}
)
chunks = chunker.split_documents([doc])
assert all(chunk.metadata.get("source") == "test.pdf" for chunk in chunks)
assert all(chunk.metadata.get("page") == 1 for chunk in chunks)