| { | |
| "version": "1.0", | |
| "truncation": null, | |
| "padding": null, | |
| "added_tokens": [ | |
| { | |
| "id": 0, | |
| "content": "[PAD]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 1, | |
| "content": "[UNK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 2, | |
| "content": "[CLS]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 3, | |
| "content": "[SEP]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 4, | |
| "content": "[MASK]", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": false, | |
| "special": true | |
| }, | |
| { | |
| "id": 500, | |
| "content": ".", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 501, | |
| "content": ",", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 502, | |
| "content": "!", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 503, | |
| "content": "?", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 504, | |
| "content": "-", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 505, | |
| "content": ":", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 506, | |
| "content": ";", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 507, | |
| "content": "/", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 508, | |
| "content": "(", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 509, | |
| "content": ")", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 510, | |
| "content": "'", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 511, | |
| "content": "\"", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 512, | |
| "content": "...", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 513, | |
| "content": "0", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 514, | |
| "content": "1", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 515, | |
| "content": "2", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 516, | |
| "content": "3", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 517, | |
| "content": "4", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 518, | |
| "content": "5", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 519, | |
| "content": "6", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 520, | |
| "content": "7", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 521, | |
| "content": "8", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| }, | |
| { | |
| "id": 522, | |
| "content": "9", | |
| "single_word": false, | |
| "lstrip": false, | |
| "rstrip": false, | |
| "normalized": true, | |
| "special": false | |
| } | |
| ], | |
| "normalizer": { | |
| "type": "BertNormalizer", | |
| "clean_text": true, | |
| "handle_chinese_chars": true, | |
| "strip_accents": null, | |
| "lowercase": false | |
| }, | |
| "pre_tokenizer": { | |
| "type": "BertPreTokenizer" | |
| }, | |
| "post_processor": { | |
| "type": "TemplateProcessing", | |
| "single": [ | |
| { | |
| "SpecialToken": { | |
| "id": "[CLS]", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "[SEP]", | |
| "type_id": 0 | |
| } | |
| } | |
| ], | |
| "pair": [ | |
| { | |
| "SpecialToken": { | |
| "id": "[CLS]", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "A", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "[SEP]", | |
| "type_id": 0 | |
| } | |
| }, | |
| { | |
| "Sequence": { | |
| "id": "B", | |
| "type_id": 1 | |
| } | |
| }, | |
| { | |
| "SpecialToken": { | |
| "id": "[SEP]", | |
| "type_id": 1 | |
| } | |
| } | |
| ], | |
| "special_tokens": { | |
| "[CLS]": { | |
| "id": "[CLS]", | |
| "ids": [ | |
| 2 | |
| ], | |
| "tokens": [ | |
| "[CLS]" | |
| ] | |
| }, | |
| "[SEP]": { | |
| "id": "[SEP]", | |
| "ids": [ | |
| 3 | |
| ], | |
| "tokens": [ | |
| "[SEP]" | |
| ] | |
| } | |
| } | |
| }, | |
| "decoder": { | |
| "type": "WordPiece", | |
| "prefix": "##", | |
| "cleanup": true | |
| }, | |
| "model": { | |
| "type": "WordPiece", | |
| "unk_token": "[UNK]", | |
| "continuing_subword_prefix": "##", | |
| "max_input_chars_per_word": 100, | |
| "vocab": { | |
| "[PAD]": 0, | |
| "[UNK]": 1, | |
| "[CLS]": 2, | |
| "[SEP]": 3, | |
| "[MASK]": 4, | |
| "": 5, | |
| "ஂ": 6, | |
| "ஃ": 7, | |
| "அ": 8, | |
| "ஆ": 9, | |
| "இ": 10, | |
| "ஈ": 11, | |
| "உ": 12, | |
| "ஊ": 13, | |
| "": 14, | |
| "எ": 15, | |
| "ஏ": 16, | |
| "ஐ": 17, | |
| "": 18, | |
| "ஒ": 19, | |
| "ஓ": 20, | |
| "ஔ": 21, | |
| "க": 22, | |
| "": 23, | |
| "": 24, | |
| "ங": 25, | |
| "ச": 26, | |
| "ஜ": 27, | |
| "ஞ": 28, | |
| "ட": 29, | |
| "": 30, | |
| "": 31, | |
| "": 32, | |
| "ண": 33, | |
| "த": 34, | |
| "": 35, | |
| "": 36, | |
| "": 37, | |
| "ந": 38, | |
| "ன": 39, | |
| "ப": 40, | |
| "": 41, | |
| "": 42, | |
| "": 43, | |
| "ம": 44, | |
| "ய": 45, | |
| "ர": 46, | |
| "ற": 47, | |
| "ல": 48, | |
| "ள": 49, | |
| "ழ": 50, | |
| "வ": 51, | |
| "ஶ": 52, | |
| "ஷ": 53, | |
| "ஸ": 54, | |
| "ஹ": 55, | |
| "": 56, | |
| "ா": 57, | |
| "ி": 58, | |
| "ீ": 59, | |
| "ு": 60, | |
| "ூ": 61, | |
| "": 62, | |
| "ெ": 63, | |
| "ே": 64, | |
| "ை": 65, | |
| "": 66, | |
| "ொ": 67, | |
| "ோ": 68, | |
| "ௌ": 69, | |
| "்": 70, | |
| "ௐ": 71, | |
| "ௗ": 72, | |
| "": 73, | |
| "௦": 74, | |
| "௧": 75, | |
| "௨": 76, | |
| "௩": 77, | |
| "௪": 78, | |
| "௫": 79, | |
| "௬": 80, | |
| "௭": 81, | |
| "௮": 82, | |
| "௯": 83, | |
| "௰": 84, | |
| "௱": 85, | |
| "௲": 86, | |
| "௳": 87, | |
| "௴": 88, | |
| "௵": 89, | |
| "௶": 90, | |
| "௷": 91, | |
| "௸": 92, | |
| "௹": 93, | |
| "௺": 94, | |
| "": 95, | |
| "": 96, | |
| "##ல": 97, | |
| "##ை": 98, | |
| "##க": 99, | |
| "##்": 100, | |
| "##ு": 101, | |
| "##த": 102, | |
| "##ர": 103, | |
| "##வ": 104, | |
| "##ே": 105, | |
| "##ற": 106, | |
| "##ம": 107, | |
| "##ப": 108, | |
| "##ன": 109, | |
| "##ட": 110, | |
| "##ி": 111, | |
| "##ா": 112, | |
| "##ச": 113, | |
| "##ூ": 114, | |
| "##ழ": 115, | |
| "##ந": 116, | |
| "##ோ": 117, | |
| "##ொ": 118, | |
| "##ெ": 119, | |
| "##ள": 120, | |
| "##ங": 121, | |
| "##ய": 122, | |
| "##ஞ": 123, | |
| "##ண": 124, | |
| "##ஸ": 125, | |
| "##ஜ": 126, | |
| "##ஷ": 127, | |
| "##ீ": 128, | |
| "##ஹ": 129, | |
| "##உ": 130, | |
| "##ஃ": 131, | |
| "##அ": 132, | |
| "##ஓ": 133, | |
| "##எ": 134, | |
| "##ஆ": 135, | |
| "##ஊ": 136, | |
| "##இ": 137, | |
| "##ௌ": 138, | |
| "##ஏ": 139, | |
| "##ஒ": 140, | |
| "##ஐ": 141, | |
| "##௫": 142, | |
| "##ஶ": 143, | |
| "##௯": 144, | |
| "##": 145, | |
| "##ஈ": 146, | |
| "##": 147, | |
| "##ஔ": 148, | |
| "##௦": 149, | |
| "##௧": 150, | |
| "##௰": 151, | |
| "##௪": 152, | |
| "##ஂ": 153, | |
| "##௱": 154, | |
| "##ௗ": 155, | |
| "##௬": 156, | |
| "##": 157, | |
| "##": 158, | |
| "##௩": 159, | |
| "##": 160, | |
| "##ௐ": 161, | |
| "##௲": 162, | |
| "##௭": 163, | |
| "##": 164, | |
| "##௮": 165, | |
| "##௨": 166, | |
| "##": 167, | |
| "##௵": 168, | |
| "##": 169, | |
| "##": 170, | |
| "##௶": 171, | |
| "##": 172, | |
| "##௹": 173, | |
| "##௸": 174, | |
| "##": 175, | |
| "##௴": 176, | |
| "##": 177, | |
| "##௳": 178, | |
| "##": 179, | |
| "##": 180, | |
| "##": 181, | |
| "##": 182, | |
| "##": 183, | |
| "##": 184, | |
| "##்க": 185, | |
| "##்த": 186, | |
| "##ம்": 187, | |
| "##ன்": 188, | |
| "##ல்": 189, | |
| "##க்க": 190, | |
| "##்ட": 191, | |
| "##ப்": 192, | |
| "##த்த": 193, | |
| "##ள்": 194, | |
| "##ும்": 195, | |
| "##ர்": 196, | |
| "##ிய": 197, | |
| "##ப்ப": 198, | |
| "##ரு": 199, | |
| "##ந்த": 200, | |
| "##ட்ட": 201, | |
| "##து": 202, | |
| "##ில்": 203, | |
| "##ங்க": 204, | |
| "##ைய": 205, | |
| "##ற்": 206, | |
| "##ின்": 207, | |
| "##ாக": 208, | |
| "##று": 209, | |
| "##ிர": 210, | |
| "##டு": 211, | |
| "##ிக": 212, | |
| "##ண்ட": 213, | |
| "##்ச": 214, | |
| "##க்கு": 215, | |
| "##ர்க": 216, | |
| "##ிற": 217, | |
| "##ில": 218, | |
| "மு": 219, | |
| "##ான": 220, | |
| "##த்து": 221, | |
| "செ": 222, | |
| "என்": 223, | |
| "##டி": 224, | |
| "வி": 225, | |
| "##லை": 226, | |
| "##ற்ற": 227, | |
| "##ள்ள": 228, | |
| "##ார": 229, | |
| "##தி": 230, | |
| "##ார்": 231, | |
| "##ப்பு": 232, | |
| "##ிரு": 233, | |
| "##வு": 234, | |
| "##ட்டு": 235, | |
| "##ல்ல": 236, | |
| "##ரி": 237, | |
| "##வி": 238, | |
| "##க்": 239, | |
| "கு": 240, | |
| "##ான்": 241, | |
| "##ந்து": 242, | |
| "##ால்": 243, | |
| "##ளை": 244, | |
| "##ய்": 245, | |
| "##ச்ச": 246, | |
| "கொ": 247, | |
| "##த்": 248, | |
| "போ": 249, | |
| "இரு": 250, | |
| "##னை": 251, | |
| "அவ": 252, | |
| "கா": 253, | |
| "##ர்கள்": 254, | |
| "##ங்கள்": 255, | |
| "பெ": 256, | |
| "##ண்": 257, | |
| "##ம்ப": 258, | |
| "##றி": 259, | |
| "##ஸ்": 260, | |
| "##ாத": 261, | |
| "##மி": 262, | |
| "பு": 263, | |
| "##கள்": 264, | |
| "##கு": 265, | |
| "##ாவ": 266, | |
| "##மை": 267, | |
| "##ளு": 268, | |
| "வே": 269, | |
| "ஒரு": 270, | |
| "##க்கும்": 271, | |
| "##ின": 272, | |
| "##ழு": 273, | |
| "பா": 274, | |
| "அத": 275, | |
| "தொ": 276, | |
| "இந்த": 277, | |
| "வெ": 278, | |
| "##ண்டு": 279, | |
| "##ாம்": 280, | |
| "வா": 281, | |
| "##ற்க": 282, | |
| "##த்தில்": 283, | |
| "##டை": 284, | |
| "##ன்ன": 285, | |
| "செய": 286, | |
| "##ன்ற": 287, | |
| "##ழ்": 288, | |
| "##மா": 289, | |
| "##ிக்க": 290, | |
| "##டிய": 291, | |
| "நா": 292, | |
| "மா": 293, | |
| "##ச்": 294, | |
| "##ரை": 295, | |
| "##ரா": 296, | |
| "##வா": 297, | |
| "##ரிய": 298, | |
| "##தை": 299, | |
| "##ையில்": 300, | |
| "##ட்": 301, | |
| "##ளி": 302, | |
| "கூ": 303, | |
| "பொ": 304, | |
| "##வே": 305, | |
| "சு": 306, | |
| "##ால": 307, | |
| "##்த்த": 308, | |
| "தமி": 309, | |
| "மே": 310, | |
| "என": 311, | |
| "##றை": 312, | |
| "தே": 313, | |
| "சொ": 314, | |
| "பிர": 315, | |
| "##ங்கள": 316, | |
| "##வை": 317, | |
| "##ாம": 318, | |
| "சி": 319, | |
| "##ப்பட்ட": 320, | |
| "##ற்ப": 321, | |
| "##ையும்": 322, | |
| "##மாக": 323, | |
| "நி": 324, | |
| "##மு": 325, | |
| "##ண்ண": 326, | |
| "பே": 327, | |
| "##த்தை": 328, | |
| "##கிற": 329, | |
| "##திய": 330, | |
| "##ளுக்கு": 331, | |
| "தெ": 332, | |
| "என்று": 333, | |
| "##ட்ச": 334, | |
| "கோ": 335, | |
| "நீ": 336, | |
| "செய்த": 337, | |
| "##ிகள்": 338, | |
| "##வர்": 339, | |
| "##னி": 340, | |
| "##மான": 341, | |
| "##பு": 342, | |
| "என்ற": 343, | |
| "##வும்": 344, | |
| "##சு": 345, | |
| "##ன்று": 346, | |
| "##டுத்த": 347, | |
| "##னு": 348, | |
| "##கள": 349, | |
| "##டன்": 350, | |
| "மற்ற": 351, | |
| "##லி": 352, | |
| "##்கள்": 353, | |
| "##ர்கள": 354, | |
| "உள்ள": 355, | |
| "##ரும்": 356, | |
| "பகு": 357, | |
| "##சி": 358, | |
| "##ற்று": 359, | |
| "##ப்பட": 360, | |
| "##ாள": 361, | |
| "அர": 362, | |
| "செய்": 363, | |
| "பி": 364, | |
| "இத": 365, | |
| "##வத": 366, | |
| "##ணி": 367, | |
| "##வில்": 368, | |
| "##ின்ற": 369, | |
| "##ழி": 370, | |
| "##ாய": 371, | |
| "கே": 372, | |
| "##க்கிற": 373, | |
| "என்ப": 374, | |
| "##ேன்": 375, | |
| "நட": 376, | |
| "து": 377, | |
| "கி": 378, | |
| "##்கு": 379, | |
| "##சிய": 380, | |
| "##னர்": 381, | |
| "திரு": 382, | |
| "##ஞ்ச": 383, | |
| "மற்றும்": 384, | |
| "##டைய": 385, | |
| "##ண்டும்": 386, | |
| "##ிக்": 387, | |
| "தொட": 388, | |
| "வை": 389, | |
| "##பா": 390, | |
| "முத": 391, | |
| "##கம்": 392, | |
| "##டம்": 393, | |
| "மூ": 394, | |
| "##ங்கு": 395, | |
| "##லா": 396, | |
| "கரு": 397, | |
| "சே": 398, | |
| "##ியா": 399, | |
| "பய": 400, | |
| "செய்ய": 401, | |
| "வீ": 402, | |
| "பல": 403, | |
| "ஆக": 404, | |
| "##மைய": 405, | |
| "வரு": 406, | |
| "##வர": 407, | |
| "##ட்டி": 408, | |
| "மீ": 409, | |
| "##களை": 410, | |
| "##னால்": 411, | |
| "##வ்": 412, | |
| "##ப்பா": 413, | |
| "##ளிய": 414, | |
| "இது": 415, | |
| "எழு": 416, | |
| "இருந்த": 417, | |
| "பகுப்பு": 418, | |
| "அறி": 419, | |
| "தி": 420, | |
| "தமிழ்": 421, | |
| "##ப்பி": 422, | |
| "நில": 423, | |
| "##மே": 424, | |
| "அந்த": 425, | |
| "##ும்ப": 426, | |
| "வர": 427, | |
| "பதி": 428, | |
| "##ப்போ": 429, | |
| "##க்கிய": 430, | |
| "நே": 431, | |
| "தலை": 432, | |
| "தமிழ": 433, | |
| "##கை": 434, | |
| "##ணை": 435, | |
| "##லாம்": 436, | |
| "பார": 437, | |
| "##த்தின்": 438, | |
| "##விய": 439, | |
| "வழ": 440, | |
| "##ிருந்த": 441, | |
| "##டுத்து": 442, | |
| "கொண்ட": 443, | |
| "##ர்க்க": 444, | |
| "##ம்பர்": 445, | |
| "குறி": 446, | |
| "##ையை": 447, | |
| "கட": 448, | |
| "என்ன": 449, | |
| "##ூர்": 450, | |
| "அமை": 451, | |
| "##ற்கு": 452, | |
| "##ரம்": 453, | |
| "##லு": 454, | |
| "##ன்ப": 455, | |
| "##நா": 456, | |
| "##கிறது": 457, | |
| "##ிலும்": 458, | |
| "தீ": 459, | |
| "##ழை": 460, | |
| "##க்கள்": 461, | |
| "##வது": 462, | |
| "##லம்": 463, | |
| "##ங்களை": 464, | |
| "##ார்கள்": 465, | |
| "வெளிய": 466, | |
| "இய": 467, | |
| "##ிகள": 468, | |
| "இர": 469, | |
| "##ற்றி": 470, | |
| "##யர்": 471, | |
| "##ணம்": 472, | |
| "சம": 473, | |
| "##ங்கில": 474, | |
| "சா": 475, | |
| "##த்திய": 476, | |
| "சொல்ல": 477, | |
| "##க்கி": 478, | |
| "அதிக": 479, | |
| "வேண்டும்": 480, | |
| "##ாது": 481, | |
| "##னா": 482, | |
| "பத": 483, | |
| "நான்": 484, | |
| "அல்ல": 485, | |
| "இல்": 486, | |
| "பின்": 487, | |
| "இல": 488, | |
| "##ரோ": 489, | |
| "##பி": 490, | |
| "சிற": 491, | |
| "திர": 492, | |
| "##க்கம்": 493, | |
| "##ஸ்ட": 494, | |
| "வந்த": 495, | |
| "##போ": 496, | |
| "##ிற்கு": 497, | |
| "##டிக்க": 498, | |
| "பிற": 499 | |
| } | |
| } | |
| } |