| class Tokenizer: | |
| def __init__(self): | |
| self.tokens = [] | |
| def tokenize(self, text): | |
| for char in text: | |
| self.tokens.append(ord(char)) | |
| return self.tokens | |
| def detokenize(self, tokens): | |
| text = ''.join(chr(token) for token in tokens) | |
| return text | |
| def main(): | |
| phi_tokenizer = Tokenizer() | |
| text = "hello world" | |
| tokenized = phi_tokenizer.tokenize(text) | |
| print(tokenized) | |
| detokenized = phi_tokenizer.detokenize(tokenized) | |
| print(detokenized) | |
| if __name__ == "__main__": | |
| main() |