| from __future__ import annotations | |
| import re | |
| import time | |
| from typing import List | |
| def now_hms() -> str: | |
| return time.strftime("%H:%M:%S") | |
| def simple_jp_tokenize(text: str) -> List[str]: | |
| text = (text or "").strip() | |
| parts = re.split(r"[\s、。,.(){}\[\]<>:;\"'!?/\\|+=\-—–\n\r\t]+", text) | |
| toks = [] | |
| for p in parts: | |
| p = p.strip() | |
| if not p: | |
| continue | |
| if len(p) <= 64: | |
| toks.append(p) | |
| return toks |