def chunk_text(text: str, chunk_size : int = 200, overlap: int = 50) -> str: """ Here, we will break the text into overlapping chunks and then feed them to the embedding pipeline """ chunks = [] start = 0 while start < len(text): end = start + chunk_size curr_chunk = text[start:end] chunks.append(curr_chunk) start += chunk_size - overlap # we need some overlap between the chunks return chunks