nioushasadjadi
commited on
Commit
·
3888382
1
Parent(s):
bcd9e56
Fixing tokenize return bug
Browse files- tokenizer.py +0 -2
tokenizer.py
CHANGED
|
@@ -28,8 +28,6 @@ class KmerTokenizer(PreTrainedTokenizer):
|
|
| 28 |
|
| 29 |
def tokenize(self, text, **kwargs):
|
| 30 |
splits = [text[i:i + self.k] for i in range(0, len(text) - self.k + 1, self.stride)]
|
| 31 |
-
if kwargs.get('return_tensors') == 'pt':
|
| 32 |
-
return torch.tensor(splits)
|
| 33 |
return splits
|
| 34 |
|
| 35 |
def _encode(self, text, **kwargs):
|
|
|
|
| 28 |
|
| 29 |
def tokenize(self, text, **kwargs):
|
| 30 |
splits = [text[i:i + self.k] for i in range(0, len(text) - self.k + 1, self.stride)]
|
|
|
|
|
|
|
| 31 |
return splits
|
| 32 |
|
| 33 |
def _encode(self, text, **kwargs):
|