Spaces:
Runtime error
Runtime error
| from textwrap3 import dedent | |
| from unidecode import unidecode | |
| import re | |
| def chunk_text(text, max_size=4000): | |
| paragraphs = dedent(text) | |
| ascii_paragraphs = re.findall(r"[^.?!]+[(\.)?!]", unidecode(paragraphs)) | |
| chuncks = [] | |
| chunck = "" | |
| for sentence in ascii_paragraphs: | |
| if len(chunck) + len(sentence) < max_size: | |
| chunck += sentence | |
| else: | |
| chuncks.append(chunck.strip()) | |
| chunck = "" | |
| return chuncks | |