Spaces:
Sleeping
Sleeping
| import pytest | |
| from langchain_core.documents import Document | |
| from ingestion.chunker import chunker | |
| class TestChunker: | |
| def test_split_documents(self, sample_document_text): | |
| docs = [Document(page_content=sample_document_text * 10)] | |
| chunks = chunker.split_documents(docs) | |
| assert len(chunks) > 0 | |
| assert all(isinstance(chunk, Document) for chunk in chunks) | |
| assert all(len(chunk.page_content) <= 512 + 50 for chunk in chunks) | |
| def test_chunk_metadata_preserved(self): | |
| doc = Document( | |
| page_content="This is a test document. " * 100, | |
| metadata={"source": "test.pdf", "page": 1} | |
| ) | |
| chunks = chunker.split_documents([doc]) | |
| assert all(chunk.metadata.get("source") == "test.pdf" for chunk in chunks) | |
| assert all(chunk.metadata.get("page") == 1 for chunk in chunks) | |