Miau / Token
Miauinthesky's picture
SimpleTokenizer
57a0c19
raw
history blame contribute delete
210 Bytes
import re
from typing import List
class SimpleTokenizer:
def __init__(self):
self.regex = re.compile(r'\w+')
def tokenize(self, text: str) -> List[str]:
return self.regex.findall(text)