Instructions to use Omarrran/Kashmiri_Char_Tokenizer with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Omarrran/Kashmiri_Char_Tokenizer with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Omarrran/Kashmiri_Char_Tokenizer", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "[PAD]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "[UNK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "[CLS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "[SEP]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "[MASK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "NFC" | |
| }, | |
| "pre_tokenizer": { | |
| "type": "Split", | |
| "pattern": { | |
| "String": "" | |
| }, | |
| "behavior": "Isolated", | |
| "invert": false | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": [ | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| } | |
| ], | |
| "pair": [ | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "B", | |
| "type_id": 1 | |
| } | |
| } | |
| ], | |
| "special_tokens": {} | |
| }, | |
| "decoder": null, | |
| "model": { | |
| "type": "WordLevel", | |
| "vocab": { | |
| "[PAD]": 0, | |
| "[UNK]": 1, | |
| "[CLS]": 2, | |
| "[SEP]": 3, | |
| "[MASK]": 4, | |
| "[BOS]": 5, | |
| "[EOS]": 6, | |
| "\n": 7, | |
| " ": 8, | |
| "!": 9, | |
| "\"": 10, | |
| "'": 11, | |
| "-": 12, | |
| ".": 13, | |
| "«": 14, | |
| "»": 15, | |
| "": 16, | |
| "،": 17, | |
| "؎": 18, | |
| "ؐ": 19, | |
| "ؑ": 20, | |
| "ؒ": 21, | |
| "ؓ": 22, | |
| "ؔ": 23, | |
| "؛": 24, | |
| "؟": 25, | |
| "ؠ": 26, | |
| "ء": 27, | |
| "آ": 28, | |
| "أ": 29, | |
| "ؤ": 30, | |
| "إ": 31, | |
| "ئ": 32, | |
| "ا": 33, | |
| "ب": 34, | |
| "ت": 35, | |
| "ث": 36, | |
| "ج": 37, | |
| "ح": 38, | |
| "خ": 39, | |
| "د": 40, | |
| "ذ": 41, | |
| "ر": 42, | |
| "ز": 43, | |
| "س": 44, | |
| "ش": 45, | |
| "ص": 46, | |
| "ض": 47, | |
| "ط": 48, | |
| "ظ": 49, | |
| "ع": 50, | |
| "غ": 51, | |
| "ف": 52, | |
| "ق": 53, | |
| "ك": 54, | |
| "ل": 55, | |
| "م": 56, | |
| "ن": 57, | |
| "ه": 58, | |
| "و": 59, | |
| "ً": 60, | |
| "ٍ": 61, | |
| "َ": 62, | |
| "ُ": 63, | |
| "ِ": 64, | |
| "ّ": 65, | |
| "ْ": 66, | |
| "ٓ": 67, | |
| "ٔ": 68, | |
| "ٕ": 69, | |
| "ٖ": 70, | |
| "ٗ": 71, | |
| "٘": 72, | |
| "ٚ": 73, | |
| "ٛ": 74, | |
| "ٟ": 75, | |
| "٠": 76, | |
| "١": 77, | |
| "٢": 78, | |
| "٣": 79, | |
| "٤": 80, | |
| "٥": 81, | |
| "٦": 82, | |
| "٧": 83, | |
| "٨": 84, | |
| "٩": 85, | |
| "٪": 86, | |
| "٭": 87, | |
| "ٮ": 88, | |
| "ٮ۪": 89, | |
| "ٰ": 90, | |
| "ٲ": 91, | |
| "ٳ": 92, | |
| "ٹ": 93, | |
| "پ": 94, | |
| "ٿ": 95, | |
| "ڀ": 96, | |
| "چ": 97, | |
| "ڈ": 98, | |
| "ڑ": 99, | |
| "ژ": 100, | |
| "ڙ": 101, | |
| "ک": 102, | |
| "ڪ": 103, | |
| "ڮ": 104, | |
| "گ": 105, | |
| "ں": 106, | |
| "ھ": 107, | |
| "ہ": 108, | |
| "ۂ": 109, | |
| "ۃ": 110, | |
| "ۄ": 111, | |
| "ۅ": 112, | |
| "ۆ": 113, | |
| "ی": 114, | |
| "ۍ": 115, | |
| "ے": 116, | |
| "ۓ": 117, | |
| "۔": 118, | |
| "۪": 119, | |
| "ۭ": 120, | |
| "۰": 121, | |
| "۱": 122, | |
| "۲": 123, | |
| "۳": 124, | |
| "۴": 125, | |
| "۵": 126, | |
| "۶": 127, | |
| "۷": 128, | |
| "۸": 129, | |
| "۹": 130, | |
| "﴾": 131, | |
| "﴿": 132 | |
| }, | |
| "unk_token": "[UNK]" | |
| } | |
| } |