Spaces:

Arrcttacsrks
/

VieNeu-TTS-Run-On-CPU2

Running

VieNeu-TTS-Run-On-CPU2 / utils /core_utils.py

Upload 16 files

d949d26 verified 1 day ago

1.48 kB

	import re
	from typing import List

	def split_text_into_chunks(text: str, max_chars: int = 256) -> List[str]:
	"""
	Split raw text into chunks no longer than max_chars.
	Preference is given to sentence boundaries; otherwise falls back to word-based splitting.
	"""
	sentences = re.split(r"(?<=[\.\!\?\…])\s+", text.strip())
	chunks: List[str] = []
	buffer = ""

	def flush_buffer():
	nonlocal buffer
	if buffer:
	chunks.append(buffer.strip())
	buffer = ""

	for sentence in sentences:
	sentence = sentence.strip()
	if not sentence:
	continue

	if len(sentence) <= max_chars:
	candidate = f"{buffer} {sentence}".strip() if buffer else sentence
	if len(candidate) <= max_chars:
	buffer = candidate
	else:
	flush_buffer()
	buffer = sentence
	continue

	flush_buffer()
	words = sentence.split()
	current = ""
	for word in words:
	candidate = f"{current} {word}".strip() if current else word
	if len(candidate) > max_chars and current:
	chunks.append(current.strip())
	current = word
	else:
	current = candidate
	if current:
	chunks.append(current.strip())

	flush_buffer()
	return [chunk for chunk in chunks if chunk]