Spaces:
Sleeping
Sleeping
File size: 657 Bytes
63105da | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | from typing import List, Dict
from .splitter_base import SplitterBase
class FixedChunker(SplitterBase):
def chunk(self, text: str, chunk_size: int, overlap: int) -> List[Dict]:
chunks = []
idx = 0
while idx < len(text):
end = min(idx + chunk_size, len(text))
chunk_text = text[idx:end]
chunks.append({
"text": chunk_text,
"start": idx,
"end": end,
"meta": {"source": "fixed"}
})
if end == len(text):
break
idx += chunk_size - overlap
return chunks
|