File size: 657 Bytes
63105da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from typing import List, Dict
from .splitter_base import SplitterBase

class FixedChunker(SplitterBase):
    def chunk(self, text: str, chunk_size: int, overlap: int) -> List[Dict]:
        chunks = []
        idx = 0
        while idx < len(text):
            end = min(idx + chunk_size, len(text))
            chunk_text = text[idx:end]
            chunks.append({
                "text": chunk_text,
                "start": idx,
                "end": end,
                "meta": {"source": "fixed"}
            })
            if end == len(text):
                break
            idx += chunk_size - overlap
        return chunks