| from tokenizers import ByteLevelBPETokenizer |
| from transformers import AutoTokenizer |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| auto_tokenizer = AutoTokenizer.from_pretrained("./", use_fast=False, trust_remote_code=True) |
|
|
| |
| text = "Hello, world!" |
| encoded = auto_tokenizer.encode(text) |
| decoded = auto_tokenizer.decode(encoded) |
|
|
| print("Encoded:", encoded) |
| print("Decoded:", decoded) |
|
|
| messages = [ |
| {"role": "system", "content": "You are a helpful assistant."}, |
| {"role": "user", "content": "Hello, how are you?"}, |
| {"role": "assistant", "content": "I'm good, thank you! How can I help you today?"}, |
| {"role": "user", "content": "Nothing"}, |
| ] |
|
|
| print('messages:', messages) |
| ids = auto_tokenizer.apply_chat_template(messages) |
| print(f"input_ids:\t{ids}") |
| text = auto_tokenizer.decode(ids) |
| print(f"input_text:\t[{text}]") |