verantyx / axis /axis_core /util.py
kofdai's picture
Upload folder using huggingface_hub
6d07351 verified
from __future__ import annotations
import re
import time
from typing import List
def now_hms() -> str:
return time.strftime("%H:%M:%S")
def simple_jp_tokenize(text: str) -> List[str]:
text = (text or "").strip()
parts = re.split(r"[\s、。,.(){}\[\]<>:;\"'!?/\\|+=\-—–\n\r\t]+", text)
toks = []
for p in parts:
p = p.strip()
if not p:
continue
if len(p) <= 64:
toks.append(p)
return toks