File size: 908 Bytes
64d7fdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import pytest
from langchain_core.documents import Document
from ingestion.chunker import chunker


@pytest.mark.unit
class TestChunker:
    
    def test_split_documents(self, sample_document_text):
        docs = [Document(page_content=sample_document_text * 10)]
        chunks = chunker.split_documents(docs)
        
        assert len(chunks) > 0
        assert all(isinstance(chunk, Document) for chunk in chunks)
        assert all(len(chunk.page_content) <= 512 + 50 for chunk in chunks)
    
    def test_chunk_metadata_preserved(self):
        doc = Document(
            page_content="This is a test document. " * 100,
            metadata={"source": "test.pdf", "page": 1}
        )
        chunks = chunker.split_documents([doc])
        
        assert all(chunk.metadata.get("source") == "test.pdf" for chunk in chunks)
        assert all(chunk.metadata.get("page") == 1 for chunk in chunks)