| import pprint | |
| def sort_by_token(tokenizer): | |
| vocab = tokenizer.get_vocab() | |
| sorted_vocab = dict(sorted(vocab.items(), key=lambda item: len(item[0]))) | |
| pprint.pprint(sorted_vocab, sort_dicts=False) | |
| def sort_by_id(tokenizer): | |
| vocab = tokenizer.get_vocab() | |
| sorted_vocab = dict(sorted(vocab.items(), key=lambda item: item[1])) | |
| pprint.pprint(sorted_vocab, sort_dicts=False) | |