| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "<unk>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 67, | |
| "content": "<s>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 68, | |
| "content": "</s>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 69, | |
| "content": "<pad>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 70, | |
| "content": "<mask>", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "NFKC" | |
| }, | |
| "pre_tokenizer": { | |
| "type": "Metaspace", | |
| "replacement": "▁", | |
| "add_prefix_space": true, | |
| "prepend_scheme": "always" | |
| }, | |
| "post_processor": null, | |
| "decoder": { | |
| "type": "Metaspace", | |
| "replacement": "▁", | |
| "add_prefix_space": true, | |
| "prepend_scheme": "always" | |
| }, | |
| "model": { | |
| "type": "BPE", | |
| "dropout": null, | |
| "unk_token": "<unk>", | |
| "continuing_subword_prefix": null, | |
| "end_of_word_suffix": null, | |
| "fuse_unk": false, | |
| "byte_fallback": false, | |
| "vocab": { | |
| "<unk>": 0, | |
| "ං": 1, | |
| "අ": 2, | |
| "ආ": 3, | |
| "ඇ": 4, | |
| "ඈ": 5, | |
| "ඉ": 6, | |
| "ඊ": 7, | |
| "උ": 8, | |
| "ඌ": 9, | |
| "එ": 10, | |
| "ඒ": 11, | |
| "ඓ": 12, | |
| "ඔ": 13, | |
| "ඕ": 14, | |
| "ක": 15, | |
| "ඛ": 16, | |
| "ග": 17, | |
| "ඝ": 18, | |
| "ඟ": 19, | |
| "ච": 20, | |
| "ඡ": 21, | |
| "ජ": 22, | |
| "ඤ": 23, | |
| "ට": 24, | |
| "ඨ": 25, | |
| "ඩ": 26, | |
| "ඪ": 27, | |
| "ණ": 28, | |
| "ඬ": 29, | |
| "ත": 30, | |
| "ථ": 31, | |
| "ද": 32, | |
| "ධ": 33, | |
| "න": 34, | |
| "ඳ": 35, | |
| "ප": 36, | |
| "ඵ": 37, | |
| "බ": 38, | |
| "භ": 39, | |
| "ම": 40, | |
| "ඹ": 41, | |
| "ය": 42, | |
| "ර": 43, | |
| "ල": 44, | |
| "ව": 45, | |
| "ශ": 46, | |
| "ෂ": 47, | |
| "ස": 48, | |
| "හ": 49, | |
| "ළ": 50, | |
| "ෆ": 51, | |
| "්": 52, | |
| "ා": 53, | |
| "ැ": 54, | |
| "ෑ": 55, | |
| "ි": 56, | |
| "ී": 57, | |
| "ු": 58, | |
| "ූ": 59, | |
| "ෙ": 60, | |
| "ේ": 61, | |
| "ෛ": 62, | |
| "ො": 63, | |
| "ෝ": 64, | |
| "ෞ": 65, | |
| "▁": 66 | |
| }, | |
| "merges": [] | |
| } | |
| } |