Text Generation
MiCon / phi_tokenizer.py
TejAndrewsACC's picture
Create phi_tokenizer.py
05328c3 verified
raw
history blame contribute delete
537 Bytes
class Tokenizer:
def __init__(self):
self.tokens = []
def tokenize(self, text):
for char in text:
self.tokens.append(ord(char))
return self.tokens
def detokenize(self, tokens):
text = ''.join(chr(token) for token in tokens)
return text
def main():
phi_tokenizer = Tokenizer()
text = "hello world"
tokenized = phi_tokenizer.tokenize(text)
print(tokenized)
detokenized = phi_tokenizer.detokenize(tokenized)
print(detokenized)
if __name__ == "__main__":
main()