File size: 192 Bytes
acf77ab
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
from __future__ import annotations

import re

_SPLIT_RE = re.compile(r"[^\w]+", re.UNICODE)


def tokenize(text: str) -> list[str]:
    return [t for t in _SPLIT_RE.split(text.lower()) if t]