distinct
Browse files- tokenizer_13a.py +1 -3
tokenizer_13a.py
CHANGED
|
@@ -67,8 +67,7 @@ class TokenizerRegexp(BaseTokenizer):
|
|
| 67 |
# no leading or trailing spaces, single space within words
|
| 68 |
# return ' '.join(line.split())
|
| 69 |
# This line is changed with regards to the original tokenizer (seen above) to return individual words
|
| 70 |
-
|
| 71 |
-
print("1:", line.split())
|
| 72 |
return line.split()
|
| 73 |
|
| 74 |
|
|
@@ -98,7 +97,6 @@ class Tokenizer13a(BaseTokenizer):
|
|
| 98 |
line = line.replace("&", "&")
|
| 99 |
line = line.replace("<", "<")
|
| 100 |
line = line.replace(">", ">")
|
| 101 |
-
print(line)
|
| 102 |
|
| 103 |
return self._post_tokenizer(f" {line} ")
|
| 104 |
|
|
|
|
| 67 |
# no leading or trailing spaces, single space within words
|
| 68 |
# return ' '.join(line.split())
|
| 69 |
# This line is changed with regards to the original tokenizer (seen above) to return individual words
|
| 70 |
+
|
|
|
|
| 71 |
return line.split()
|
| 72 |
|
| 73 |
|
|
|
|
| 97 |
line = line.replace("&", "&")
|
| 98 |
line = line.replace("<", "<")
|
| 99 |
line = line.replace(">", ">")
|
|
|
|
| 100 |
|
| 101 |
return self._post_tokenizer(f" {line} ")
|
| 102 |
|