Spaces:
Sleeping
Sleeping
File size: 657 Bytes
2e8d6bf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 | from src.tools.ingest import chunk_text
def test_chunk_text_basic():
text = " ".join(f"word{i}" for i in range(100))
chunks = chunk_text(text, chunk_size=20, overlap=5)
assert len(chunks) > 1
assert all(len(c.split()) <= 20 for c in chunks)
def test_chunk_text_overlap():
words = [f"w{i}" for i in range(50)]
text = " ".join(words)
chunks = chunk_text(text, chunk_size=10, overlap=3)
# Second chunk should start 7 words in (10 - 3 overlap)
second_words = chunks[1].split()
assert second_words[0] == "w7"
def test_chunk_text_empty():
chunks = chunk_text("", chunk_size=10, overlap=2)
assert chunks == []
|