File size: 537 Bytes
05328c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
class Tokenizer:
def __init__(self):
self.tokens = []
def tokenize(self, text):
for char in text:
self.tokens.append(ord(char))
return self.tokens
def detokenize(self, tokens):
text = ''.join(chr(token) for token in tokens)
return text
def main():
phi_tokenizer = Tokenizer()
text = "hello world"
tokenized = phi_tokenizer.tokenize(text)
print(tokenized)
detokenized = phi_tokenizer.detokenize(tokenized)
print(detokenized)
if __name__ == "__main__":
main() |