Spaces:
Running
Running
File size: 359 Bytes
9ea5e05 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | import regex as re
WS = re.compile(r"\s+")
SENT_SPLIT = re.compile(r"(?<=[.!?])\s+")
def norm_text(s: str) -> str:
return WS.sub(" ", (s or "").strip())
def word_count(s: str) -> int:
if not s: return 0
return len(re.findall(r"\b[\p{L}\p{N}’']+\b", s))
def sentences(s: str):
s = norm_text(s)
return SENT_SPLIT.split(s) if s else []
|