{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "import transformers\n", "\n", "model_id = \"./\"\n", "tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "tokenizer.pad_token_id" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "128256" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.audio_token_id" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "128009" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.eos_token_id" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PreTrainedTokenizerFast(name_or_path='/mnt/jeff/InCar/LLamaNemotronOmni/NemotronOmni', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'pad_token': '<|eot_id|>', 'additional_special_tokens': ['', '', '', '']}, clean_up_tokenization_spaces=True, added_tokens_decoder={\n", "\t128000: AddedToken(\"<|begin_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128001: AddedToken(\"<|end_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128002: AddedToken(\"<|reserved_special_token_0|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128003: AddedToken(\"<|reserved_special_token_1|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128004: AddedToken(\"<|finetune_right_pad_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128005: AddedToken(\"<|reserved_special_token_2|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128006: AddedToken(\"<|start_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128007: AddedToken(\"<|end_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128008: AddedToken(\"<|eom_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128009: AddedToken(\"<|eot_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128010: AddedToken(\"<|python_tag|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128011: AddedToken(\"<|reserved_special_token_3|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128012: AddedToken(\"<|reserved_special_token_4|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128013: AddedToken(\"<|reserved_special_token_5|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128014: AddedToken(\"<|reserved_special_token_6|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128015: AddedToken(\"<|reserved_special_token_7|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128016: AddedToken(\"<|reserved_special_token_8|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128017: AddedToken(\"<|reserved_special_token_9|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128018: AddedToken(\"<|reserved_special_token_10|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128019: AddedToken(\"<|reserved_special_token_11|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128020: AddedToken(\"<|reserved_special_token_12|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128021: AddedToken(\"<|reserved_special_token_13|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128022: AddedToken(\"<|reserved_special_token_14|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128023: AddedToken(\"<|reserved_special_token_15|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128024: AddedToken(\"<|reserved_special_token_16|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128025: AddedToken(\"<|reserved_special_token_17|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128026: AddedToken(\"<|reserved_special_token_18|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128027: AddedToken(\"<|reserved_special_token_19|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128028: AddedToken(\"<|reserved_special_token_20|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128029: AddedToken(\"<|reserved_special_token_21|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128030: AddedToken(\"<|reserved_special_token_22|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128031: AddedToken(\"<|reserved_special_token_23|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128032: AddedToken(\"<|reserved_special_token_24|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128033: AddedToken(\"<|reserved_special_token_25|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128034: AddedToken(\"<|reserved_special_token_26|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128035: AddedToken(\"<|reserved_special_token_27|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128036: AddedToken(\"<|reserved_special_token_28|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128037: AddedToken(\"<|reserved_special_token_29|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128038: AddedToken(\"<|reserved_special_token_30|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128039: AddedToken(\"<|reserved_special_token_31|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128040: AddedToken(\"<|reserved_special_token_32|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128041: AddedToken(\"<|reserved_special_token_33|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128042: AddedToken(\"<|reserved_special_token_34|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128043: AddedToken(\"<|reserved_special_token_35|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128044: AddedToken(\"<|reserved_special_token_36|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128045: AddedToken(\"<|reserved_special_token_37|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128046: AddedToken(\"<|reserved_special_token_38|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128047: AddedToken(\"<|reserved_special_token_39|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128048: AddedToken(\"<|reserved_special_token_40|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128049: AddedToken(\"<|reserved_special_token_41|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128050: AddedToken(\"<|reserved_special_token_42|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128051: AddedToken(\"<|reserved_special_token_43|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128052: AddedToken(\"<|reserved_special_token_44|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128053: AddedToken(\"<|reserved_special_token_45|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128054: AddedToken(\"<|reserved_special_token_46|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128055: AddedToken(\"<|reserved_special_token_47|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128056: AddedToken(\"<|reserved_special_token_48|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128057: AddedToken(\"<|reserved_special_token_49|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128058: AddedToken(\"<|reserved_special_token_50|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128059: AddedToken(\"<|reserved_special_token_51|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128060: AddedToken(\"<|reserved_special_token_52|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128061: AddedToken(\"<|reserved_special_token_53|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128062: AddedToken(\"<|reserved_special_token_54|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128063: AddedToken(\"<|reserved_special_token_55|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128064: AddedToken(\"<|reserved_special_token_56|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128065: AddedToken(\"<|reserved_special_token_57|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128066: AddedToken(\"<|reserved_special_token_58|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128067: AddedToken(\"<|reserved_special_token_59|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128068: AddedToken(\"<|reserved_special_token_60|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128069: AddedToken(\"<|reserved_special_token_61|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128070: AddedToken(\"<|reserved_special_token_62|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128071: AddedToken(\"<|reserved_special_token_63|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128072: AddedToken(\"<|reserved_special_token_64|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128073: AddedToken(\"<|reserved_special_token_65|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128074: AddedToken(\"<|reserved_special_token_66|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128075: AddedToken(\"<|reserved_special_token_67|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128076: AddedToken(\"<|reserved_special_token_68|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128077: AddedToken(\"<|reserved_special_token_69|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128078: AddedToken(\"<|reserved_special_token_70|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128079: AddedToken(\"<|reserved_special_token_71|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128080: AddedToken(\"<|reserved_special_token_72|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128081: AddedToken(\"<|reserved_special_token_73|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128082: AddedToken(\"<|reserved_special_token_74|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128083: AddedToken(\"<|reserved_special_token_75|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128084: AddedToken(\"<|reserved_special_token_76|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128085: AddedToken(\"<|reserved_special_token_77|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128086: AddedToken(\"<|reserved_special_token_78|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128087: AddedToken(\"<|reserved_special_token_79|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128088: AddedToken(\"<|reserved_special_token_80|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128089: AddedToken(\"<|reserved_special_token_81|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128090: AddedToken(\"<|reserved_special_token_82|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128091: AddedToken(\"<|reserved_special_token_83|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128092: AddedToken(\"<|reserved_special_token_84|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128093: AddedToken(\"<|reserved_special_token_85|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128094: AddedToken(\"<|reserved_special_token_86|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128095: AddedToken(\"<|reserved_special_token_87|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128096: AddedToken(\"<|reserved_special_token_88|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128097: AddedToken(\"<|reserved_special_token_89|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128098: AddedToken(\"<|reserved_special_token_90|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128099: AddedToken(\"<|reserved_special_token_91|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128100: AddedToken(\"<|reserved_special_token_92|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128101: AddedToken(\"<|reserved_special_token_93|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128102: AddedToken(\"<|reserved_special_token_94|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128103: AddedToken(\"<|reserved_special_token_95|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128104: AddedToken(\"<|reserved_special_token_96|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128105: AddedToken(\"<|reserved_special_token_97|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128106: AddedToken(\"<|reserved_special_token_98|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128107: AddedToken(\"<|reserved_special_token_99|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128108: AddedToken(\"<|reserved_special_token_100|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128109: AddedToken(\"<|reserved_special_token_101|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128110: AddedToken(\"<|reserved_special_token_102|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128111: AddedToken(\"<|reserved_special_token_103|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128112: AddedToken(\"<|reserved_special_token_104|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128113: AddedToken(\"<|reserved_special_token_105|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128114: AddedToken(\"<|reserved_special_token_106|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128115: AddedToken(\"<|reserved_special_token_107|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128116: AddedToken(\"<|reserved_special_token_108|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128117: AddedToken(\"<|reserved_special_token_109|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128118: AddedToken(\"<|reserved_special_token_110|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128119: AddedToken(\"<|reserved_special_token_111|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128120: AddedToken(\"<|reserved_special_token_112|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128121: AddedToken(\"<|reserved_special_token_113|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128122: AddedToken(\"<|reserved_special_token_114|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128123: AddedToken(\"<|reserved_special_token_115|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128124: AddedToken(\"<|reserved_special_token_116|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128125: AddedToken(\"<|reserved_special_token_117|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128126: AddedToken(\"<|reserved_special_token_118|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128127: AddedToken(\"<|reserved_special_token_119|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128128: AddedToken(\"<|reserved_special_token_120|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128129: AddedToken(\"<|reserved_special_token_121|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128130: AddedToken(\"<|reserved_special_token_122|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128131: AddedToken(\"<|reserved_special_token_123|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128132: AddedToken(\"<|reserved_special_token_124|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128133: AddedToken(\"<|reserved_special_token_125|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128134: AddedToken(\"<|reserved_special_token_126|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128135: AddedToken(\"<|reserved_special_token_127|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128136: AddedToken(\"<|reserved_special_token_128|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128137: AddedToken(\"<|reserved_special_token_129|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128138: AddedToken(\"<|reserved_special_token_130|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128139: AddedToken(\"<|reserved_special_token_131|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128140: AddedToken(\"<|reserved_special_token_132|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128141: AddedToken(\"<|reserved_special_token_133|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128142: AddedToken(\"<|reserved_special_token_134|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128143: AddedToken(\"<|reserved_special_token_135|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128144: AddedToken(\"<|reserved_special_token_136|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128145: AddedToken(\"<|reserved_special_token_137|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128146: AddedToken(\"<|reserved_special_token_138|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128147: AddedToken(\"<|reserved_special_token_139|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128148: AddedToken(\"<|reserved_special_token_140|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128149: AddedToken(\"<|reserved_special_token_141|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128150: AddedToken(\"<|reserved_special_token_142|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128151: AddedToken(\"<|reserved_special_token_143|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128152: AddedToken(\"<|reserved_special_token_144|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128153: AddedToken(\"<|reserved_special_token_145|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128154: AddedToken(\"<|reserved_special_token_146|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128155: AddedToken(\"<|reserved_special_token_147|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128156: AddedToken(\"<|reserved_special_token_148|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128157: AddedToken(\"<|reserved_special_token_149|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128158: AddedToken(\"<|reserved_special_token_150|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128159: AddedToken(\"<|reserved_special_token_151|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128160: AddedToken(\"<|reserved_special_token_152|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128161: AddedToken(\"<|reserved_special_token_153|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128162: AddedToken(\"<|reserved_special_token_154|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128163: AddedToken(\"<|reserved_special_token_155|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128164: AddedToken(\"<|reserved_special_token_156|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128165: AddedToken(\"<|reserved_special_token_157|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128166: AddedToken(\"<|reserved_special_token_158|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128167: AddedToken(\"<|reserved_special_token_159|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128168: AddedToken(\"<|reserved_special_token_160|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128169: AddedToken(\"<|reserved_special_token_161|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128170: AddedToken(\"<|reserved_special_token_162|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128171: AddedToken(\"<|reserved_special_token_163|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128172: AddedToken(\"<|reserved_special_token_164|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128173: AddedToken(\"<|reserved_special_token_165|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128174: AddedToken(\"<|reserved_special_token_166|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128175: AddedToken(\"<|reserved_special_token_167|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128176: AddedToken(\"<|reserved_special_token_168|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128177: AddedToken(\"<|reserved_special_token_169|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128178: AddedToken(\"<|reserved_special_token_170|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128179: AddedToken(\"<|reserved_special_token_171|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128180: AddedToken(\"<|reserved_special_token_172|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128181: AddedToken(\"<|reserved_special_token_173|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128182: AddedToken(\"<|reserved_special_token_174|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128183: AddedToken(\"<|reserved_special_token_175|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128184: AddedToken(\"<|reserved_special_token_176|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128185: AddedToken(\"<|reserved_special_token_177|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128186: AddedToken(\"<|reserved_special_token_178|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128187: AddedToken(\"<|reserved_special_token_179|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128188: AddedToken(\"<|reserved_special_token_180|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128189: AddedToken(\"<|reserved_special_token_181|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128190: AddedToken(\"<|reserved_special_token_182|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128191: AddedToken(\"<|reserved_special_token_183|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128192: AddedToken(\"<|reserved_special_token_184|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128193: AddedToken(\"<|reserved_special_token_185|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128194: AddedToken(\"<|reserved_special_token_186|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128195: AddedToken(\"<|reserved_special_token_187|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128196: AddedToken(\"<|reserved_special_token_188|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128197: AddedToken(\"<|reserved_special_token_189|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128198: AddedToken(\"<|reserved_special_token_190|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128199: AddedToken(\"<|reserved_special_token_191|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128200: AddedToken(\"<|reserved_special_token_192|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128201: AddedToken(\"<|reserved_special_token_193|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128202: AddedToken(\"<|reserved_special_token_194|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128203: AddedToken(\"<|reserved_special_token_195|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128204: AddedToken(\"<|reserved_special_token_196|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128205: AddedToken(\"<|reserved_special_token_197|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128206: AddedToken(\"<|reserved_special_token_198|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128207: AddedToken(\"<|reserved_special_token_199|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128208: AddedToken(\"<|reserved_special_token_200|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128209: AddedToken(\"<|reserved_special_token_201|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128210: AddedToken(\"<|reserved_special_token_202|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128211: AddedToken(\"<|reserved_special_token_203|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128212: AddedToken(\"<|reserved_special_token_204|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128213: AddedToken(\"<|reserved_special_token_205|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128214: AddedToken(\"<|reserved_special_token_206|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128215: AddedToken(\"<|reserved_special_token_207|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128216: AddedToken(\"<|reserved_special_token_208|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128217: AddedToken(\"<|reserved_special_token_209|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128218: AddedToken(\"<|reserved_special_token_210|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128219: AddedToken(\"<|reserved_special_token_211|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128220: AddedToken(\"<|reserved_special_token_212|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128221: AddedToken(\"<|reserved_special_token_213|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128222: AddedToken(\"<|reserved_special_token_214|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128223: AddedToken(\"<|reserved_special_token_215|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128224: AddedToken(\"<|reserved_special_token_216|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128225: AddedToken(\"<|reserved_special_token_217|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128226: AddedToken(\"<|reserved_special_token_218|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128227: AddedToken(\"<|reserved_special_token_219|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128228: AddedToken(\"<|reserved_special_token_220|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128229: AddedToken(\"<|reserved_special_token_221|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128230: AddedToken(\"<|reserved_special_token_222|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128231: AddedToken(\"<|reserved_special_token_223|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128232: AddedToken(\"<|reserved_special_token_224|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128233: AddedToken(\"<|reserved_special_token_225|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128234: AddedToken(\"<|reserved_special_token_226|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128235: AddedToken(\"<|reserved_special_token_227|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128236: AddedToken(\"<|reserved_special_token_228|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128237: AddedToken(\"<|reserved_special_token_229|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128238: AddedToken(\"<|reserved_special_token_230|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128239: AddedToken(\"<|reserved_special_token_231|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128240: AddedToken(\"<|reserved_special_token_232|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128241: AddedToken(\"<|reserved_special_token_233|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128242: AddedToken(\"<|reserved_special_token_234|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128243: AddedToken(\"<|reserved_special_token_235|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128244: AddedToken(\"<|reserved_special_token_236|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128245: AddedToken(\"<|reserved_special_token_237|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128246: AddedToken(\"<|reserved_special_token_238|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128247: AddedToken(\"<|reserved_special_token_239|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128248: AddedToken(\"<|reserved_special_token_240|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128249: AddedToken(\"<|reserved_special_token_241|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128250: AddedToken(\"<|reserved_special_token_242|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128251: AddedToken(\"<|reserved_special_token_243|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128252: AddedToken(\"<|reserved_special_token_244|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128253: AddedToken(\"<|reserved_special_token_245|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128254: AddedToken(\"<|reserved_special_token_246|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128255: AddedToken(\"<|reserved_special_token_247|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128256: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128257: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128258: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "}\n", ")" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.add_special_tokens({'additional_special_tokens':['','','','']})" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PreTrainedTokenizerFast(name_or_path='/mnt/jeff/InCar/LLamaNemotronOmni/Llama-3.1-Nemotron-Nano-4B-v1.1', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<|begin_of_text|>', 'eos_token': '<|eot_id|>', 'pad_token': '<|eot_id|>', 'additional_special_tokens': ['', '', '', '']}, clean_up_tokenization_spaces=True, added_tokens_decoder={\n", "\t128000: AddedToken(\"<|begin_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128001: AddedToken(\"<|end_of_text|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128002: AddedToken(\"<|reserved_special_token_0|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128003: AddedToken(\"<|reserved_special_token_1|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128004: AddedToken(\"<|finetune_right_pad_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128005: AddedToken(\"<|reserved_special_token_2|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128006: AddedToken(\"<|start_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128007: AddedToken(\"<|end_header_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128008: AddedToken(\"<|eom_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128009: AddedToken(\"<|eot_id|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128010: AddedToken(\"<|python_tag|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128011: AddedToken(\"<|reserved_special_token_3|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128012: AddedToken(\"<|reserved_special_token_4|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128013: AddedToken(\"<|reserved_special_token_5|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128014: AddedToken(\"<|reserved_special_token_6|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128015: AddedToken(\"<|reserved_special_token_7|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128016: AddedToken(\"<|reserved_special_token_8|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128017: AddedToken(\"<|reserved_special_token_9|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128018: AddedToken(\"<|reserved_special_token_10|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128019: AddedToken(\"<|reserved_special_token_11|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128020: AddedToken(\"<|reserved_special_token_12|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128021: AddedToken(\"<|reserved_special_token_13|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128022: AddedToken(\"<|reserved_special_token_14|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128023: AddedToken(\"<|reserved_special_token_15|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128024: AddedToken(\"<|reserved_special_token_16|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128025: AddedToken(\"<|reserved_special_token_17|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128026: AddedToken(\"<|reserved_special_token_18|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128027: AddedToken(\"<|reserved_special_token_19|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128028: AddedToken(\"<|reserved_special_token_20|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128029: AddedToken(\"<|reserved_special_token_21|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128030: AddedToken(\"<|reserved_special_token_22|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128031: AddedToken(\"<|reserved_special_token_23|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128032: AddedToken(\"<|reserved_special_token_24|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128033: AddedToken(\"<|reserved_special_token_25|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128034: AddedToken(\"<|reserved_special_token_26|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128035: AddedToken(\"<|reserved_special_token_27|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128036: AddedToken(\"<|reserved_special_token_28|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128037: AddedToken(\"<|reserved_special_token_29|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128038: AddedToken(\"<|reserved_special_token_30|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128039: AddedToken(\"<|reserved_special_token_31|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128040: AddedToken(\"<|reserved_special_token_32|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128041: AddedToken(\"<|reserved_special_token_33|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128042: AddedToken(\"<|reserved_special_token_34|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128043: AddedToken(\"<|reserved_special_token_35|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128044: AddedToken(\"<|reserved_special_token_36|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128045: AddedToken(\"<|reserved_special_token_37|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128046: AddedToken(\"<|reserved_special_token_38|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128047: AddedToken(\"<|reserved_special_token_39|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128048: AddedToken(\"<|reserved_special_token_40|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128049: AddedToken(\"<|reserved_special_token_41|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128050: AddedToken(\"<|reserved_special_token_42|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128051: AddedToken(\"<|reserved_special_token_43|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128052: AddedToken(\"<|reserved_special_token_44|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128053: AddedToken(\"<|reserved_special_token_45|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128054: AddedToken(\"<|reserved_special_token_46|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128055: AddedToken(\"<|reserved_special_token_47|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128056: AddedToken(\"<|reserved_special_token_48|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128057: AddedToken(\"<|reserved_special_token_49|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128058: AddedToken(\"<|reserved_special_token_50|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128059: AddedToken(\"<|reserved_special_token_51|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128060: AddedToken(\"<|reserved_special_token_52|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128061: AddedToken(\"<|reserved_special_token_53|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128062: AddedToken(\"<|reserved_special_token_54|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128063: AddedToken(\"<|reserved_special_token_55|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128064: AddedToken(\"<|reserved_special_token_56|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128065: AddedToken(\"<|reserved_special_token_57|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128066: AddedToken(\"<|reserved_special_token_58|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128067: AddedToken(\"<|reserved_special_token_59|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128068: AddedToken(\"<|reserved_special_token_60|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128069: AddedToken(\"<|reserved_special_token_61|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128070: AddedToken(\"<|reserved_special_token_62|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128071: AddedToken(\"<|reserved_special_token_63|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128072: AddedToken(\"<|reserved_special_token_64|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128073: AddedToken(\"<|reserved_special_token_65|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128074: AddedToken(\"<|reserved_special_token_66|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128075: AddedToken(\"<|reserved_special_token_67|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128076: AddedToken(\"<|reserved_special_token_68|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128077: AddedToken(\"<|reserved_special_token_69|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128078: AddedToken(\"<|reserved_special_token_70|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128079: AddedToken(\"<|reserved_special_token_71|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128080: AddedToken(\"<|reserved_special_token_72|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128081: AddedToken(\"<|reserved_special_token_73|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128082: AddedToken(\"<|reserved_special_token_74|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128083: AddedToken(\"<|reserved_special_token_75|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128084: AddedToken(\"<|reserved_special_token_76|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128085: AddedToken(\"<|reserved_special_token_77|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128086: AddedToken(\"<|reserved_special_token_78|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128087: AddedToken(\"<|reserved_special_token_79|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128088: AddedToken(\"<|reserved_special_token_80|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128089: AddedToken(\"<|reserved_special_token_81|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128090: AddedToken(\"<|reserved_special_token_82|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128091: AddedToken(\"<|reserved_special_token_83|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128092: AddedToken(\"<|reserved_special_token_84|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128093: AddedToken(\"<|reserved_special_token_85|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128094: AddedToken(\"<|reserved_special_token_86|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128095: AddedToken(\"<|reserved_special_token_87|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128096: AddedToken(\"<|reserved_special_token_88|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128097: AddedToken(\"<|reserved_special_token_89|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128098: AddedToken(\"<|reserved_special_token_90|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128099: AddedToken(\"<|reserved_special_token_91|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128100: AddedToken(\"<|reserved_special_token_92|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128101: AddedToken(\"<|reserved_special_token_93|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128102: AddedToken(\"<|reserved_special_token_94|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128103: AddedToken(\"<|reserved_special_token_95|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128104: AddedToken(\"<|reserved_special_token_96|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128105: AddedToken(\"<|reserved_special_token_97|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128106: AddedToken(\"<|reserved_special_token_98|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128107: AddedToken(\"<|reserved_special_token_99|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128108: AddedToken(\"<|reserved_special_token_100|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128109: AddedToken(\"<|reserved_special_token_101|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128110: AddedToken(\"<|reserved_special_token_102|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128111: AddedToken(\"<|reserved_special_token_103|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128112: AddedToken(\"<|reserved_special_token_104|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128113: AddedToken(\"<|reserved_special_token_105|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128114: AddedToken(\"<|reserved_special_token_106|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128115: AddedToken(\"<|reserved_special_token_107|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128116: AddedToken(\"<|reserved_special_token_108|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128117: AddedToken(\"<|reserved_special_token_109|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128118: AddedToken(\"<|reserved_special_token_110|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128119: AddedToken(\"<|reserved_special_token_111|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128120: AddedToken(\"<|reserved_special_token_112|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128121: AddedToken(\"<|reserved_special_token_113|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128122: AddedToken(\"<|reserved_special_token_114|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128123: AddedToken(\"<|reserved_special_token_115|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128124: AddedToken(\"<|reserved_special_token_116|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128125: AddedToken(\"<|reserved_special_token_117|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128126: AddedToken(\"<|reserved_special_token_118|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128127: AddedToken(\"<|reserved_special_token_119|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128128: AddedToken(\"<|reserved_special_token_120|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128129: AddedToken(\"<|reserved_special_token_121|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128130: AddedToken(\"<|reserved_special_token_122|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128131: AddedToken(\"<|reserved_special_token_123|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128132: AddedToken(\"<|reserved_special_token_124|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128133: AddedToken(\"<|reserved_special_token_125|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128134: AddedToken(\"<|reserved_special_token_126|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128135: AddedToken(\"<|reserved_special_token_127|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128136: AddedToken(\"<|reserved_special_token_128|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128137: AddedToken(\"<|reserved_special_token_129|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128138: AddedToken(\"<|reserved_special_token_130|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128139: AddedToken(\"<|reserved_special_token_131|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128140: AddedToken(\"<|reserved_special_token_132|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128141: AddedToken(\"<|reserved_special_token_133|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128142: AddedToken(\"<|reserved_special_token_134|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128143: AddedToken(\"<|reserved_special_token_135|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128144: AddedToken(\"<|reserved_special_token_136|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128145: AddedToken(\"<|reserved_special_token_137|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128146: AddedToken(\"<|reserved_special_token_138|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128147: AddedToken(\"<|reserved_special_token_139|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128148: AddedToken(\"<|reserved_special_token_140|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128149: AddedToken(\"<|reserved_special_token_141|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128150: AddedToken(\"<|reserved_special_token_142|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128151: AddedToken(\"<|reserved_special_token_143|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128152: AddedToken(\"<|reserved_special_token_144|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128153: AddedToken(\"<|reserved_special_token_145|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128154: AddedToken(\"<|reserved_special_token_146|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128155: AddedToken(\"<|reserved_special_token_147|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128156: AddedToken(\"<|reserved_special_token_148|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128157: AddedToken(\"<|reserved_special_token_149|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128158: AddedToken(\"<|reserved_special_token_150|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128159: AddedToken(\"<|reserved_special_token_151|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128160: AddedToken(\"<|reserved_special_token_152|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128161: AddedToken(\"<|reserved_special_token_153|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128162: AddedToken(\"<|reserved_special_token_154|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128163: AddedToken(\"<|reserved_special_token_155|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128164: AddedToken(\"<|reserved_special_token_156|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128165: AddedToken(\"<|reserved_special_token_157|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128166: AddedToken(\"<|reserved_special_token_158|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128167: AddedToken(\"<|reserved_special_token_159|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128168: AddedToken(\"<|reserved_special_token_160|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128169: AddedToken(\"<|reserved_special_token_161|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128170: AddedToken(\"<|reserved_special_token_162|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128171: AddedToken(\"<|reserved_special_token_163|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128172: AddedToken(\"<|reserved_special_token_164|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128173: AddedToken(\"<|reserved_special_token_165|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128174: AddedToken(\"<|reserved_special_token_166|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128175: AddedToken(\"<|reserved_special_token_167|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128176: AddedToken(\"<|reserved_special_token_168|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128177: AddedToken(\"<|reserved_special_token_169|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128178: AddedToken(\"<|reserved_special_token_170|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128179: AddedToken(\"<|reserved_special_token_171|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128180: AddedToken(\"<|reserved_special_token_172|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128181: AddedToken(\"<|reserved_special_token_173|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128182: AddedToken(\"<|reserved_special_token_174|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128183: AddedToken(\"<|reserved_special_token_175|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128184: AddedToken(\"<|reserved_special_token_176|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128185: AddedToken(\"<|reserved_special_token_177|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128186: AddedToken(\"<|reserved_special_token_178|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128187: AddedToken(\"<|reserved_special_token_179|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128188: AddedToken(\"<|reserved_special_token_180|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128189: AddedToken(\"<|reserved_special_token_181|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128190: AddedToken(\"<|reserved_special_token_182|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128191: AddedToken(\"<|reserved_special_token_183|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128192: AddedToken(\"<|reserved_special_token_184|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128193: AddedToken(\"<|reserved_special_token_185|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128194: AddedToken(\"<|reserved_special_token_186|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128195: AddedToken(\"<|reserved_special_token_187|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128196: AddedToken(\"<|reserved_special_token_188|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128197: AddedToken(\"<|reserved_special_token_189|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128198: AddedToken(\"<|reserved_special_token_190|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128199: AddedToken(\"<|reserved_special_token_191|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128200: AddedToken(\"<|reserved_special_token_192|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128201: AddedToken(\"<|reserved_special_token_193|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128202: AddedToken(\"<|reserved_special_token_194|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128203: AddedToken(\"<|reserved_special_token_195|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128204: AddedToken(\"<|reserved_special_token_196|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128205: AddedToken(\"<|reserved_special_token_197|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128206: AddedToken(\"<|reserved_special_token_198|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128207: AddedToken(\"<|reserved_special_token_199|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128208: AddedToken(\"<|reserved_special_token_200|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128209: AddedToken(\"<|reserved_special_token_201|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128210: AddedToken(\"<|reserved_special_token_202|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128211: AddedToken(\"<|reserved_special_token_203|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128212: AddedToken(\"<|reserved_special_token_204|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128213: AddedToken(\"<|reserved_special_token_205|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128214: AddedToken(\"<|reserved_special_token_206|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128215: AddedToken(\"<|reserved_special_token_207|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128216: AddedToken(\"<|reserved_special_token_208|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128217: AddedToken(\"<|reserved_special_token_209|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128218: AddedToken(\"<|reserved_special_token_210|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128219: AddedToken(\"<|reserved_special_token_211|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128220: AddedToken(\"<|reserved_special_token_212|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128221: AddedToken(\"<|reserved_special_token_213|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128222: AddedToken(\"<|reserved_special_token_214|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128223: AddedToken(\"<|reserved_special_token_215|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128224: AddedToken(\"<|reserved_special_token_216|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128225: AddedToken(\"<|reserved_special_token_217|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128226: AddedToken(\"<|reserved_special_token_218|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128227: AddedToken(\"<|reserved_special_token_219|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128228: AddedToken(\"<|reserved_special_token_220|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128229: AddedToken(\"<|reserved_special_token_221|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128230: AddedToken(\"<|reserved_special_token_222|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128231: AddedToken(\"<|reserved_special_token_223|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128232: AddedToken(\"<|reserved_special_token_224|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128233: AddedToken(\"<|reserved_special_token_225|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128234: AddedToken(\"<|reserved_special_token_226|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128235: AddedToken(\"<|reserved_special_token_227|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128236: AddedToken(\"<|reserved_special_token_228|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128237: AddedToken(\"<|reserved_special_token_229|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128238: AddedToken(\"<|reserved_special_token_230|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128239: AddedToken(\"<|reserved_special_token_231|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128240: AddedToken(\"<|reserved_special_token_232|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128241: AddedToken(\"<|reserved_special_token_233|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128242: AddedToken(\"<|reserved_special_token_234|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128243: AddedToken(\"<|reserved_special_token_235|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128244: AddedToken(\"<|reserved_special_token_236|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128245: AddedToken(\"<|reserved_special_token_237|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128246: AddedToken(\"<|reserved_special_token_238|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128247: AddedToken(\"<|reserved_special_token_239|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128248: AddedToken(\"<|reserved_special_token_240|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128249: AddedToken(\"<|reserved_special_token_241|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128250: AddedToken(\"<|reserved_special_token_242|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128251: AddedToken(\"<|reserved_special_token_243|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128252: AddedToken(\"<|reserved_special_token_244|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128253: AddedToken(\"<|reserved_special_token_245|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128254: AddedToken(\"<|reserved_special_token_246|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128255: AddedToken(\"<|reserved_special_token_247|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128256: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128257: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "\t128258: AddedToken(\"\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n", "}\n", ")" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "tokenizer.audio_token = \"\"\n", "tokenizer.boa_token = \"\"\n", "tokenizer.eoa_token = \"\"\n", "tokenizer.audio_token_id = 128256" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('../tokenizer/tokenizer_config.json',\n", " '../tokenizer/special_tokens_map.json',\n", " '../tokenizer/tokenizer.json')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tokenizer.save_pretrained('../tokenizer')" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "You are using a model of type NemotronOmni to instantiate a model of type nemotronOmni. This is not supported for all configurations of models and can yield errors.\n" ] } ], "source": [ "from transformers import AutoConfig\n", "config = AutoConfig.from_pretrained('/mnt/jeff/InCar/LlamaNemotronOmni/Llama-3.1-NemotronOmni')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'llama'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "config.model_type" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }