Kosasih commited on
Commit
855cbe6
·
verified ·
1 Parent(s): 6cb9c8c

Create test_model.py

Browse files
Files changed (1) hide show
  1. test_model.py +32 -0
test_model.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+ from tokenizer import BPETokenizer
3
+
4
+ class TokenizerTest(unittest.TestCase):
5
+ def setUp(self):
6
+ self.vocab = {
7
+ "<PAD>":0,
8
+ "<UNK>":1,
9
+ "a":2,
10
+ "b":3,
11
+ "c":4,
12
+ "ab":5,
13
+ "bc":6,
14
+ "abc":7,
15
+ "</w>":8
16
+ }
17
+ self.merges = [["a","b"],["b","c"],["ab","c"]]
18
+ self.tokenizer = BPETokenizer(vocab=self.vocab, merges=self.merges)
19
+
20
+ def test_tokenize_and_encode(self):
21
+ tokens = self.tokenizer.tokenize("abc")
22
+ self.assertTrue(isinstance(tokens, list))
23
+ indices = self.tokenizer.encode("abc")
24
+ self.assertTrue(all(isinstance(i, int) for i in indices))
25
+
26
+ def test_decode(self):
27
+ encoded = self.tokenizer.encode("abc")
28
+ decoded = self.tokenizer.decode(encoded)
29
+ self.assertTrue(isinstance(decoded, str))
30
+
31
+ if __name__ == "__main__":
32
+ unittest.main()