Miauinthesky commited on
Commit
57a0c19
·
1 Parent(s): e069bc2

SimpleTokenizer

Browse files
Files changed (1) hide show
  1. Token +9 -0
Token ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import List
3
+
4
+ class SimpleTokenizer:
5
+ def __init__(self):
6
+ self.regex = re.compile(r'\w+')
7
+
8
+ def tokenize(self, text: str) -> List[str]:
9
+ return self.regex.findall(text)