def split_list(array: list[str], separator: str) -> list[str]: r = [] placeholder = "\uE000" for s in array: s_with_marker = s.replace(separator, separator + placeholder) parts = s_with_marker.split(placeholder) r.extend(parts) return r def split(text: str) -> list[str]: for replacement in [' \n', '\n ', '\n\n']: while replacement in text: text = text.replace(replacement, '\n') protections = ['d. h.', 'Abs.', 'Art.', 'Bem.', 'Bst.', ' ff.', ' f.', '(ff.', '(f.', 'insbes.', 'S.', 'V.'] for protection in protections: text = text.replace(protection, protection.replace('.', '\uE000')) placeholder = "\uE001" for i in range(3, len(text) - 3): if text[i] == '.': if ( (text[i - 2] == ' ') or ( not text[i + 2].isupper()) or (text[i - 1].isdigit()) ): text = text[:i] + placeholder + text[i+1:] array = [text] for value in ['\n', '. ', '? ']: array = split_list(array, value) final_list = [] for s in array: cleaned_s = s.replace(placeholder, '.').strip() final_list.append(cleaned_s) return final_list