DialectAnalysis / dialect_analysis /tokenization.py
thomascerniglia's picture
Upload 8 files
d0326ea verified
raw
history blame contribute delete
263 Bytes
from __future__ import annotations
from typing import List
def tokenize(text: str) -> List[str]:
"""Tokenize a normalized text into whitespace-delimited tokens."""
if not text:
return []
return [t for t in text.split(" ") if t]