Spaces:
Runtime error
Runtime error
File size: 471 Bytes
4d9fcca | 1 2 3 4 5 6 7 8 9 10 11 12 13 | def chunk_text(text: str, chunk_size : int = 200, overlap: int = 50) -> str:
"""
Here, we will break the text into overlapping chunks and then feed
them to the embedding pipeline
"""
chunks = []
start = 0
while start < len(text):
end = start + chunk_size
curr_chunk = text[start:end]
chunks.append(curr_chunk)
start += chunk_size - overlap # we need some overlap between the chunks
return chunks |