Spaces:

edwjin
/

docker-classifier

Sleeping

edwjin commited on Jun 29, 2024

Commit

b25aa2d

verified ·

1 Parent(s): 727198a

Update tokenizer.py

Files changed (1) hide show

tokenizer.py CHANGED Viewed

@@ -11,7 +11,6 @@ class SimpleTokenizer:
     """
     def __init__(self):
-        print(nltk.__version__)
         """Initialize the tokenizer with an empty vocabulary."""
         self.vocab = set()
         self.stoi = {'<pad>': 0, '<unk>': 1}
@@ -22,7 +21,7 @@ class SimpleTokenizer:
         """Update vocabulary with new text."""
         tokens = word_tokenize(text)
         new_tokens = set(tokens) - self.vocab
         for token in new_tokens:
             index = self.vocab_size
             self.vocab.add(token)
@@ -37,5 +36,4 @@ class SimpleTokenizer:
     def decode(self, indices):
         """Decode the list of indices back into text."""
-        return ' '.join([self.itos.get(index, '<unk>') for index in indices])

     """
     def __init__(self):
         """Initialize the tokenizer with an empty vocabulary."""
         self.vocab = set()
         self.stoi = {'<pad>': 0, '<unk>': 1}
         """Update vocabulary with new text."""
         tokens = word_tokenize(text)
         new_tokens = set(tokens) - self.vocab
         for token in new_tokens:
             index = self.vocab_size
             self.vocab.add(token)
     def decode(self, indices):
         """Decode the list of indices back into text."""
+        return ' '.join([self.itos.get(index, '<unk>') for index in indices])