Text Generation
TejAndrewsACC commited on
Commit
05328c3
·
verified ·
1 Parent(s): 56eeeca

Create phi_tokenizer.py

Browse files
Files changed (1) hide show
  1. phi_tokenizer.py +23 -0
phi_tokenizer.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Tokenizer:
2
+ def __init__(self):
3
+ self.tokens = []
4
+
5
+ def tokenize(self, text):
6
+ for char in text:
7
+ self.tokens.append(ord(char))
8
+ return self.tokens
9
+
10
+ def detokenize(self, tokens):
11
+ text = ''.join(chr(token) for token in tokens)
12
+ return text
13
+
14
+ def main():
15
+ phi_tokenizer = Tokenizer()
16
+ text = "hello world"
17
+ tokenized = phi_tokenizer.tokenize(text)
18
+ print(tokenized)
19
+ detokenized = phi_tokenizer.detokenize(tokenized)
20
+ print(detokenized)
21
+
22
+ if __name__ == "__main__":
23
+ main()