Instructions to use fenguhao/hmm-tokenizer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use fenguhao/hmm-tokenizer with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("fenguhao/hmm-tokenizer", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "0 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "1 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "2 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 3, | |
| "content": "3 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 4, | |
| "content": "4 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 5, | |
| "content": "5 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 6, | |
| "content": "6 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 7, | |
| "content": "7 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 8, | |
| "content": "8 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 9, | |
| "content": "9 ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 10, | |
| "content": "a ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 11, | |
| "content": "b ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 12, | |
| "content": "c ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 13, | |
| "content": "d ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 14, | |
| "content": "e ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 15, | |
| "content": "f ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 16, | |
| "content": "g ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 17, | |
| "content": "h ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 18, | |
| "content": "i ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 19, | |
| "content": "j ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 20, | |
| "content": "k ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 21, | |
| "content": "l ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 22, | |
| "content": "m ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 23, | |
| "content": "n ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 24, | |
| "content": "o ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 25, | |
| "content": "p ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 26, | |
| "content": "q ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 27, | |
| "content": "r ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 28, | |
| "content": "s ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 29, | |
| "content": "t ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 30, | |
| "content": "u ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 31, | |
| "content": "v ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 32, | |
| "content": "w ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 33, | |
| "content": "x ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 34, | |
| "content": "y ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 35, | |
| "content": "z ", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| } | |
| ], | |
| "normalizer": null, | |
| "pre_tokenizer": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": false, | |
| "trim_offsets": true, | |
| "use_regex": true | |
| }, | |
| "post_processor": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": true, | |
| "trim_offsets": false, | |
| "use_regex": true | |
| }, | |
| "decoder": { | |
| "type": "ByteLevel", | |
| "add_prefix_space": true, | |
| "trim_offsets": true, | |
| "use_regex": true | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": null, | |
| "continuing_subword_prefix": "", | |
| "end_of_word_suffix": "", | |
| "fuse_unk": false, | |
| "byte_fallback": false, | |
| "ignore_merges": false, | |
| "vocab": { | |
| "0 ": 0 | |
| }, | |
| "merges": [] | |
| } | |
| } |