{ "added_tokens_decoder": { "0": { "content": "[PAD]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "100": { "content": "[UNK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "101": { "content": "[CLS]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "102": { "content": "[SEP]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "103": { "content": "[MASK]", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "120138": { "content": "# missing subpieces inferred by greedy wordpiece + gaps", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120139": { "content": "# model: ai-forever/rubert-base", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120140": { "content": "# source txt: ocs_all_sentences.txt", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120141": { "content": "# min_piece_freq = 2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120142": { "content": "# format: subpiecefreq", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120143": { "content": "##҅\t16027", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120144": { "content": "##ѫ\t10509", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120145": { "content": "##ꙑ\t10072", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120146": { "content": "##҆\t9180", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120147": { "content": "##҃\t7225", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120148": { "content": "##ѧ\t5762", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120149": { "content": "##ꙙ\t5468", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120150": { "content": "ꙇ\t4395", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120151": { "content": "##͑\t4228", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120152": { "content": "##ѩ\t4110", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120153": { "content": "##҄\t3343", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120154": { "content": "##̑\t2464", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120155": { "content": "##ѭ\t2377", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120156": { "content": "ѥ҅\t2333", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120157": { "content": "##ꙁ\t2170", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120158": { "content": "##͗\t1982", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120159": { "content": "##ꙿ\t1209", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120160": { "content": "##ⱕ\t1204", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120161": { "content": "##ѥ\t1203", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120162": { "content": "##꙯\t1185", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120163": { "content": "ꙁ\t1145", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120164": { "content": "##ѭ̑\t1103", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120165": { "content": "##ꙗ\t1093", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120166": { "content": "ꙗ҅\t1036", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120167": { "content": "##ѥ҅\t1020", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120168": { "content": "ї\t665", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120169": { "content": "##ѭ҄\t657", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120170": { "content": "ѥ͑\t614", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120171": { "content": "##͆\t597", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120172": { "content": "##҅ꙁ\t575", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120173": { "content": "##ѥ҆\t473", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120174": { "content": "ѩ\t472", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120175": { "content": "##ѣ\t443", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120176": { "content": "##ʼ\t436", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120177": { "content": "##ꙗ҅\t412", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120178": { "content": "ѣ\t379", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120179": { "content": "##҄ѫ\t295", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120180": { "content": "##҆ꙁ\t292", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120181": { "content": "##ѥ͑\t279", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120182": { "content": "##ꙑѧ\t259", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120183": { "content": "##ꙇ\t239", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120184": { "content": "##ѡ̑\t239", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120185": { "content": "ꙇ\t201", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120186": { "content": "##҆ѥ҅\t184", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120187": { "content": "##ꙋ\t182", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120188": { "content": "##ѫѭ̑\t170", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120189": { "content": "##ꙗ҆\t166", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120190": { "content": "##ꙙꙁ\t158", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120191": { "content": "##҄ꙙ\t153", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120192": { "content": "ꙇ҅\t146", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120193": { "content": "ѡ\t145", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120194": { "content": "##ѫѭ\t138", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120195": { "content": "##ѳ\t126", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120196": { "content": "##ѧ҅\t125", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120197": { "content": "##҆ѭ̑\t122", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120198": { "content": "ꙇ҃\t122", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120199": { "content": "ѥ\t114", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120200": { "content": "ѧ\t113", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120201": { "content": "##ꙑѩ\t112", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120202": { "content": "҅\t112", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120203": { "content": "##꙯ꙑ\t108", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120204": { "content": "##ѧ҆\t108", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120205": { "content": "ѭ\t107", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120206": { "content": "##ꙉ\t101", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120207": { "content": "##҆ꙗ\t100", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120208": { "content": "##҆ѥ\t98", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120209": { "content": "ѡ̑\t93", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120210": { "content": "##҄ѭ\t91", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120211": { "content": "##ⸯ\t85", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120212": { "content": "##ꙑѧ҆\t83", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120213": { "content": "ѳ\t82", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120214": { "content": "ѧ҅\t78", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120215": { "content": "ѫ҅\t75", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120216": { "content": "##ѡ҄\t73", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120217": { "content": "##҆ѥ҆\t73", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120218": { "content": "##ѹ\t72", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120219": { "content": "##ѯ\t70", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120220": { "content": "##ѫѭ҄\t69", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120221": { "content": "##ѧѩ\t62", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120222": { "content": "ѫ\t61", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120223": { "content": "ѭ̑\t57", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120224": { "content": "##҆ѭ҄\t57", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120225": { "content": "##ꙙѧ\t52", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120226": { "content": "ꙗ\t51", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120227": { "content": "##ѵ\t48", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120228": { "content": "ѧ҅ꙁꙑ\t48", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120229": { "content": "##ѩѩ\t47", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120230": { "content": "##ѥ͗\t46", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120231": { "content": "ꙗ͑\t46", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120232": { "content": "##ꙁꙑ\t46", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120233": { "content": "##҆ꙗ҆\t46", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120234": { "content": "##҃ѫ\t45", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120235": { "content": "##҆ꙗ҅\t45", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120236": { "content": "ѡ҄\t44", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120237": { "content": "##҃ѩ\t42", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120238": { "content": "ѭ҄\t39", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120239": { "content": "##ѩ҅\t37", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120240": { "content": "##ѡ\t35", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120241": { "content": "##ꙗ͑\t34", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120242": { "content": "##҄ⱕ\t32", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120243": { "content": "ꙇ҅\t31", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120244": { "content": "ѩ҃\t29", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120245": { "content": "ꙉ\t28", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120246": { "content": "##҄ꙗ\t28", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120247": { "content": "##҃҃҃\t27", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120248": { "content": "##ꙗ҅ꙁ\t27", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120249": { "content": "##ѧ͑\t26", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120250": { "content": "##ꙗѥ҅\t26", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120251": { "content": "##̑ѫ\t25", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120252": { "content": "##͆ꙑ\t25", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120253": { "content": "##ꙑꙁ\t25", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120254": { "content": "##҃ѭ\t24", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120255": { "content": "͑\t24", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120256": { "content": "ꙗ҅ꙁ\t24", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120257": { "content": "҆ѡ\t23", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120258": { "content": "ѧ͑\t22", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120259": { "content": "##ꙁꙿ\t22", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120260": { "content": "##ѫ\t21", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120261": { "content": "ѹ\t21", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120262": { "content": "##҄ѧ\t20", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120263": { "content": "##҆ꙁꙙ\t20", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120264": { "content": "##҆ѧ\t20", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120265": { "content": "##͗ѥ͑\t19", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120266": { "content": "##꙯ꙙ\t19", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120267": { "content": "ѫ͑\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120268": { "content": "##ѥѧ\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120269": { "content": "##ꙁꙙ\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120270": { "content": "##ѥ҅ѧ\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120271": { "content": "ѫ҅ꙁ\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120272": { "content": "##ꙁѫ\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120273": { "content": "##ѥ҅ѧ҆\t18", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120274": { "content": "##ѩ\t17", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120275": { "content": "##҃҃\t17", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120276": { "content": "ѱ\t17", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120277": { "content": "##ꙙѧ҆\t17", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120278": { "content": "##ⷬ҇\t17", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120279": { "content": "ѡ\t16", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120280": { "content": "##꙯ѫ\t16", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120281": { "content": "##ѥ҅ѭ҄\t16", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120282": { "content": "##҃ꙑ\t15", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120283": { "content": "##̑ⱕ\t15", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120284": { "content": "##͗ѭ̑\t15", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120285": { "content": "##҆ꙗ҅ѥ҅\t15", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120286": { "content": "ѧ҆\t15", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120287": { "content": "##͗ѭ҄\t14", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120288": { "content": "##ѫѫ\t13", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120289": { "content": "ѩ\t13", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120290": { "content": "##ⰹ\t12", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120291": { "content": "##ѧ͗\t12", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120292": { "content": "##ѧѧ\t12", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120293": { "content": "ѥ҅ѧ\t12", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120294": { "content": "##ѱ\t11", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120295": { "content": "##ꙑѭ̑\t11", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120296": { "content": "##ꙗѭ̑\t11", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120297": { "content": "ѥ҆\t11", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120298": { "content": "##҃ꙉ\t11", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120299": { "content": "##їѣ\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120300": { "content": "##̑ꙙ\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120301": { "content": "##ꙑѧ҅\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120302": { "content": "##ꙙѧ҅\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120303": { "content": "##҅ꙁꙿ\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120304": { "content": "##҆ѧ҆\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120305": { "content": "ѵ҅\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120306": { "content": "ѩ҅\t10", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120307": { "content": "##ꙑѭ\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120308": { "content": "##ⱕѧ\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120309": { "content": "ѫ҅ꙁꙑ\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120310": { "content": "ѥ҅ѧ҆\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120311": { "content": "##ѥ҅ѭ̑\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120312": { "content": "ꙁꙑ\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120313": { "content": "##ꙑⷭ҇\t9", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120314": { "content": "ѯ\t8", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120315": { "content": "##ѫꙁ\t8", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120316": { "content": "##҅ꙗ҅\t8", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120317": { "content": "##ꙗѧ\t8", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120318": { "content": "ѧ҅ꙁ\t8", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120319": { "content": "##ѭ\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120320": { "content": "##͞\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120321": { "content": "##꙯ꙑѧ\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120322": { "content": "ꙁѫ\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120323": { "content": "ѥ҅ѧ҅\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120324": { "content": "##ѭ҅\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120325": { "content": "##ꙑѩ҅\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120326": { "content": "ꙇ҃\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120327": { "content": "##ⷭ҇\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120328": { "content": "##҄ѣ\t7", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120329": { "content": "##ꙇ\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120330": { "content": "͗ѡ\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120331": { "content": "##͆ѫ\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120332": { "content": "##͆ꙙ\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120333": { "content": "ꙗ҆\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120334": { "content": "##҅ѳ\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120335": { "content": "##҆ꙗ҅ѭ̑\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120336": { "content": "##ꙗѧ҆\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120337": { "content": "##ꙗ҅ѭ҄\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120338": { "content": "##ꙑѭ҄\t6", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120339": { "content": "##ї\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120340": { "content": "ѳ҃\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120341": { "content": "ѥѧ\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120342": { "content": "##͗ѥ\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120343": { "content": "##ѡӱ\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120344": { "content": "##꙯ꙑѧ҆\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120345": { "content": "##҅ѡ̑\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120346": { "content": "##҅ѥ҅\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120347": { "content": "##ꙗꙗ\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120348": { "content": "##ꙑѥ҅\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120349": { "content": "##҆ꙗ҅ѧ҆\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120350": { "content": "ѡ҅\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120351": { "content": "##҃҅\t5", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120352": { "content": "ѫ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120353": { "content": "##ꙑ̈\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120354": { "content": "##ꙉ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120355": { "content": "##͆ⱕ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120356": { "content": "##҄ѫѭ҄\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120357": { "content": "##ꙿѳ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120358": { "content": "##҆ꙗѥ҅\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120359": { "content": "##ꙗѭ҄\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120360": { "content": "##҄ꙙѧ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120361": { "content": "ѧ҅ꙁѫ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120362": { "content": "ꙁꙙ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120363": { "content": "##ꙗ҅ѭ̑\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120364": { "content": "##ӱ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120365": { "content": "##ꙗ҅ѥ҅\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120366": { "content": "##҆ꙁꙿ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120367": { "content": "##҄ѫѭ̑\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120368": { "content": "ѵ\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120369": { "content": "ꙇ҃҅\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120370": { "content": "##ⷦ҇\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120371": { "content": "##҇\t4", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120372": { "content": "##҃ѫѭ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120373": { "content": "##ѫѫ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120374": { "content": "ѯ҃\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120375": { "content": "##ѭѫ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120376": { "content": "##ѥ͑ѧ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120377": { "content": "ӱ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120378": { "content": "##̂\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120379": { "content": "͗\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120380": { "content": "ѧ͗\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120381": { "content": "##͗ꙗ͑\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120382": { "content": "##͆ꙑѧ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120383": { "content": "##҄ꙗѥ҅\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120384": { "content": "##҆ꙗ҅ѧ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120385": { "content": "##ꙗ҅ѧ҅\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120386": { "content": "##҆ꙗ҅ѧ҅\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120387": { "content": "##ꙗѥ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120388": { "content": "##ꙗꙿ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120389": { "content": "##ꙁꙋ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120390": { "content": "҆\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120391": { "content": "##ꙿѥ҆\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120392": { "content": "##ѧ҅ꙁꙑ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120393": { "content": "##ꙿꙗ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120394": { "content": "##ⷩ҇\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120395": { "content": "ѭ҅\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120396": { "content": "ꙉ҃\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120397": { "content": "##ѧѩ҅\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120398": { "content": "##ⷯ҇\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120399": { "content": "##ӑ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120400": { "content": "##҃ѧ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120401": { "content": "##҃ꙇ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120402": { "content": "ꙁ꙯\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120403": { "content": "##͆ѣ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120404": { "content": "ꙇꙉ\t3", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120405": { "content": "##҃ѹ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120406": { "content": "##҃҃҃҃\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120407": { "content": "ѻ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120408": { "content": "̒\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120409": { "content": "̓\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120410": { "content": "##ѥ͑ѭ҄\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120411": { "content": "̕\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120412": { "content": "ѥ͑ѧ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120413": { "content": "̔\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120414": { "content": "͛\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120415": { "content": "##ⱕѩ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120416": { "content": "##ѫ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120417": { "content": "##ѥ͑ѧ͗\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120418": { "content": "##͑ѥ͑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120419": { "content": "##ѥѭ҄\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120420": { "content": "ѥ͗\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120421": { "content": "##͑ѳ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120422": { "content": "##͑ѭ҄\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120423": { "content": "##ѥ͑ѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120424": { "content": "##ꙙѧ͑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120425": { "content": "##͗ꙗ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120426": { "content": "ꙋ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120427": { "content": "##͗ꙗѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120428": { "content": "##ѥ҅ѧ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120429": { "content": "##ѡ҄ӱ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120430": { "content": "##ꙋѥ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120431": { "content": "##ѥѧ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120432": { "content": "##҆ꙗ҅ѭ҄\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120433": { "content": "##҆ꙗѥ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120434": { "content": "##ꙋѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120435": { "content": "##꙯꙯\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120436": { "content": "##҆ꙗѧ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120437": { "content": "##҆ꙗѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120438": { "content": "##ꙋѧ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120439": { "content": "##ѡ̑ӱ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120440": { "content": "##ѡ̑ѳ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120441": { "content": "##ⱕ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120442": { "content": "##ѥ҆ѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120443": { "content": "##ꙗ҅ꙿ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120444": { "content": "##҆ꙗѭ҄\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120445": { "content": "##ꙿѥ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120446": { "content": "##ꙑꙗ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120447": { "content": "##ꙗѥ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120448": { "content": "##҆ꙁѫ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120449": { "content": "##҆ꙗ҅ѥ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120450": { "content": "##ꙗѧ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120451": { "content": "##ⱖ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120452": { "content": "ꙋ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120453": { "content": "ѭ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120454": { "content": "ѥ҅ѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120455": { "content": "##ѧѧ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120456": { "content": "##ѥѭ̑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120457": { "content": "##҆ѧ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120458": { "content": "##͡\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120459": { "content": "##҄ꙙѧ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120460": { "content": "##ⷮ҇\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120461": { "content": "ꙇ҃҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120462": { "content": "ѡ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120463": { "content": "##ѭѭ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120464": { "content": "##ѫ҄\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120465": { "content": "##ꙇ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120466": { "content": "##ⷧ҇\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120467": { "content": "##҃ѵ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120468": { "content": "##ꙃ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120469": { "content": "##ⱕѧ͑\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120470": { "content": "##҆ꙗѧ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120471": { "content": "ѯ꙯\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120472": { "content": "##ꙙ҆\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120473": { "content": "##ꙙ҅\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120474": { "content": "҅ѥ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120475": { "content": "ѡ҃\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false }, "120476": { "content": "##ѡꙇ\t2", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false, "special": false } }, "clean_up_tokenization_spaces": true, "cls_token": "[CLS]", "do_basic_tokenize": true, "do_lower_case": true, "extra_special_tokens": {}, "mask_token": "[MASK]", "model_max_length": 1000000000000000019884624838656, "never_split": null, "pad_token": "[PAD]", "sep_token": "[SEP]", "strip_accents": null, "tokenize_chinese_chars": true, "tokenizer_class": "BertTokenizer", "unk_token": "[UNK]" }