yujiepan commited on
Commit
d3ee99d
·
verified ·
1 Parent(s): db1638b

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model:
4
+ - meituan-longcat/LongCat-Flash-Lite
5
+ ---
6
+
7
+ This tiny model is intended for debugging. It is randomly initialized using the configuration adapted from [meituan-longcat/LongCat-Flash-Lite](https://huggingface.co/meituan-longcat/LongCat-Flash-Lite).
8
+
9
+ | File path | Size |
10
+ |------|------|
11
+ | model.safetensors | 8.4MB |
12
+
13
+
14
+ ### Example usage:
15
+
16
+ ```python
17
+ import torch
18
+ import transformers
19
+
20
+ model_id = "tiny-random/longcat-flash-lite"
21
+ pipe = transformers.pipelines.pipeline(
22
+ 'text-generation',
23
+ model=model_id,
24
+ trust_remote_code=True,
25
+ device_map='cuda',
26
+ torch_dtype=torch.bfloat16,
27
+ )
28
+ past_key_values = transformers.DynamicCache(config=None) # set config to None
29
+ r = pipe('Hello, world!', past_key_values=past_key_values, max_new_tokens=32)
30
+ print(r)
31
+ ```
32
+
33
+ ### Codes to create this repo:
34
+
35
+ <details>
36
+ <summary>Python codes</summary>
37
+
38
+ ```python
39
+ import json
40
+ from copy import deepcopy
41
+ from pathlib import Path
42
+
43
+ import torch
44
+ import torch.nn as nn
45
+ from huggingface_hub import file_exists, hf_hub_download
46
+ from transformers import (
47
+ AutoConfig,
48
+ AutoModelForCausalLM,
49
+ AutoProcessor,
50
+ AutoTokenizer,
51
+ GenerationConfig,
52
+ set_seed,
53
+ )
54
+ from transformers.models.glm4_moe.modeling_glm4_moe import Glm4MoeRMSNorm
55
+ source_model_id = "meituan-longcat/LongCat-Flash-Lite"
56
+ save_folder = "/tmp/tiny-random/longcat-flash-lite"
57
+
58
+ Path(save_folder).mkdir(parents=True, exist_ok=True)
59
+ tokenizer = AutoTokenizer.from_pretrained(source_model_id, trust_remote_code=True)
60
+ tokenizer.save_pretrained(save_folder)
61
+
62
+ with open(hf_hub_download(source_model_id, filename='config.json', repo_type='model'), 'r', encoding='utf-8') as f:
63
+ config_json = json.load(f)
64
+ for k, v in config_json['auto_map'].items():
65
+ config_json['auto_map'][k] = f'{source_model_id}--{v}'
66
+ config_json.update({
67
+ 'num_layers': 2,
68
+ 'hidden_size': 8,
69
+ 'ffn_hidden_size': 32,
70
+ 'expert_ffn_hidden_size': 32,
71
+ 'num_attention_heads': 4,
72
+ 'kv_lora_rank': 384,
73
+ 'n_routed_experts': 32,
74
+ 'q_lora_rank': 32,
75
+ 'qk_nope_head_dim': 64,
76
+ 'qk_rope_head_dim': 192,
77
+ 'head_dim': 192,
78
+ 'qk_head_dim': 256,
79
+ 'v_head_dim': 64,
80
+ 'moe_topk': 12,
81
+ 'zero_expert_num': 16,
82
+ 'emb_split_num': 2,
83
+ 'emb_neighbor_num': 2,
84
+ 'ngram_vocab_size_ratio': 4,
85
+ })
86
+ # del config_json['quantization_config']
87
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
88
+ json.dump(config_json, f, indent=2)
89
+
90
+ config = AutoConfig.from_pretrained(
91
+ save_folder,
92
+ trust_remote_code=True,
93
+ )
94
+ print(config)
95
+ torch.set_default_dtype(torch.bfloat16)
96
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=True)
97
+ if file_exists(filename="generation_config.json", repo_id=source_model_id, repo_type='model'):
98
+ model.generation_config = GenerationConfig.from_pretrained(
99
+ source_model_id, trust_remote_code=True,
100
+ )
101
+ model = model.cpu()
102
+ # MTP
103
+ model.model.mtp = nn.ModuleDict({
104
+ "layers": nn.ModuleList([nn.ModuleDict(dict(
105
+ eh_proj=nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False),
106
+ enorm=nn.ModuleDict({"m": nn.RMSNorm(config.hidden_size)}),
107
+ hnorm=nn.ModuleDict({"m": nn.RMSNorm(config.hidden_size)}),
108
+ input_layernorm=nn.RMSNorm(config.hidden_size),
109
+ post_attention_layernorm=nn.RMSNorm(config.hidden_size),
110
+ self_attn=deepcopy(model.model.layers[0].self_attn[0]),
111
+ transformer_layer=nn.ModuleDict({"mlp": deepcopy(model.model.layers[0].mlps[0])}),
112
+ ))]),
113
+ "norm": nn.RMSNorm(config.hidden_size),
114
+ })
115
+ for i in range(config.num_layers):
116
+ model.model.layers[i].mlp.router = model.model.layers[i].mlp.router.float()
117
+ # model.model.layers[i].mlp.router.e_score_correction_bias = torch.zeros((config.n_routed_experts + config.zero_expert_num)).float()
118
+ set_seed(42)
119
+ with torch.no_grad():
120
+ for name, p in sorted(model.named_parameters()):
121
+ torch.nn.init.normal_(p, 0, 0.1)
122
+ print(name, p.shape, p.dtype)
123
+ model.model.mtp.embed_tokens = deepcopy(model.model.embed_tokens)
124
+ model.model.ngram_embeddings = None # avoid saving shared params
125
+
126
+ model.save_pretrained(save_folder)
127
+ torch.set_default_dtype(torch.float32)
128
+
129
+ print(model.model.rotary_emb.inv_freq.shape)
130
+ # 1 / 0
131
+ # for n, m in model.named_modules():
132
+ # if 'LongcatFlashMLA' in str(type(m)):
133
+ # print(n, m.layer_idx)
134
+
135
+ with open(f"{save_folder}/config.json", "r", encoding='utf-8') as f:
136
+ config_json = json.load(f)
137
+ config_json['auto_map'] = {k: v.split('--')[-1] for k, v in config_json['auto_map'].items()}
138
+ with open(f"{save_folder}/config.json", "w", encoding='utf-8') as f:
139
+ json.dump(config_json, f, indent=2)
140
+ ```
141
+
142
+ </details>
143
+
144
+ ### Printing the model:
145
+
146
+ <details><summary>Click to expand</summary>
147
+
148
+ ```text
149
+ LongcatFlashNgramForCausalLM(
150
+ (model): LongcatFlashNgramModel(
151
+ (embed_tokens): Embedding(131072, 8)
152
+ (layers): ModuleList(
153
+ (0-1): 2 x LongcatFlashDecoderLayer(
154
+ (mlp): LongcatFlashMoE(
155
+ (experts): ModuleList(
156
+ (0-31): 32 x LongcatFlashMLP(
157
+ (gate_proj): Linear(in_features=8, out_features=32, bias=False)
158
+ (up_proj): Linear(in_features=8, out_features=32, bias=False)
159
+ (down_proj): Linear(in_features=32, out_features=8, bias=False)
160
+ (act_fn): SiLUActivation()
161
+ )
162
+ (32-47): 16 x Identity()
163
+ )
164
+ (router): LongcatFlashTopkRouter(
165
+ (classifier): Linear(in_features=8, out_features=48, bias=False)
166
+ )
167
+ )
168
+ (self_attn): ModuleList(
169
+ (0-1): 2 x LongcatFlashMLA(
170
+ (q_a_proj): Linear(in_features=8, out_features=32, bias=False)
171
+ (q_a_layernorm): LongcatFlashRMSNorm((32,), eps=1e-06)
172
+ (q_b_proj): Linear(in_features=32, out_features=1024, bias=False)
173
+ (kv_a_proj_with_mqa): Linear(in_features=8, out_features=576, bias=False)
174
+ (kv_a_layernorm): LongcatFlashRMSNorm((384,), eps=1e-06)
175
+ (kv_b_proj): Linear(in_features=384, out_features=512, bias=False)
176
+ (o_proj): Linear(in_features=256, out_features=8, bias=False)
177
+ )
178
+ )
179
+ (mlps): ModuleList(
180
+ (0-1): 2 x LongcatFlashMLP(
181
+ (gate_proj): Linear(in_features=8, out_features=32, bias=False)
182
+ (up_proj): Linear(in_features=8, out_features=32, bias=False)
183
+ (down_proj): Linear(in_features=32, out_features=8, bias=False)
184
+ (act_fn): SiLUActivation()
185
+ )
186
+ )
187
+ (input_layernorm): ModuleList(
188
+ (0-1): 2 x LongcatFlashRMSNorm((8,), eps=1e-05)
189
+ )
190
+ (post_attention_layernorm): ModuleList(
191
+ (0-1): 2 x LongcatFlashRMSNorm((8,), eps=1e-05)
192
+ )
193
+ )
194
+ )
195
+ (norm): LongcatFlashRMSNorm((8,), eps=1e-05)
196
+ (rotary_emb): LongcatFlashRotaryEmbedding()
197
+ (ngram_embeddings): None
198
+ (mtp): ModuleDict(
199
+ (layers): ModuleList(
200
+ (0): ModuleDict(
201
+ (eh_proj): Linear(in_features=16, out_features=8, bias=False)
202
+ (enorm): ModuleDict(
203
+ (m): RMSNorm((8,), eps=None, elementwise_affine=True)
204
+ )
205
+ (hnorm): ModuleDict(
206
+ (m): RMSNorm((8,), eps=None, elementwise_affine=True)
207
+ )
208
+ (input_layernorm): RMSNorm((8,), eps=None, elementwise_affine=True)
209
+ (post_attention_layernorm): RMSNorm((8,), eps=None, elementwise_affine=True)
210
+ (self_attn): LongcatFlashMLA(
211
+ (q_a_proj): Linear(in_features=8, out_features=32, bias=False)
212
+ (q_a_layernorm): LongcatFlashRMSNorm((32,), eps=1e-06)
213
+ (q_b_proj): Linear(in_features=32, out_features=1024, bias=False)
214
+ (kv_a_proj_with_mqa): Linear(in_features=8, out_features=576, bias=False)
215
+ (kv_a_layernorm): LongcatFlashRMSNorm((384,), eps=1e-06)
216
+ (kv_b_proj): Linear(in_features=384, out_features=512, bias=False)
217
+ (o_proj): Linear(in_features=256, out_features=8, bias=False)
218
+ )
219
+ (transformer_layer): ModuleDict(
220
+ (mlp): LongcatFlashMLP(
221
+ (gate_proj): Linear(in_features=8, out_features=32, bias=False)
222
+ (up_proj): Linear(in_features=8, out_features=32, bias=False)
223
+ (down_proj): Linear(in_features=32, out_features=8, bias=False)
224
+ (act_fn): SiLUActivation()
225
+ )
226
+ )
227
+ )
228
+ )
229
+ (norm): RMSNorm((8,), eps=None, elementwise_affine=True)
230
+ (embed_tokens): Embedding(131072, 8)
231
+ )
232
+ )
233
+ (lm_head): Linear(in_features=8, out_features=131072, bias=False)
234
+ )
235
+ ```
236
+
237
+ </details>
chat_template.jinja ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set tool_choice = tool_choice | default('auto') %}
2
+ {%- set ns = namespace(tool_types = [], last_query_index = -1) %}
3
+
4
+ {%- if tools and tool_choice != 'none' %}
5
+ {{- "<longcat_tool_declare>\n"-}}
6
+ {{- "# Tools\n" }}
7
+ {{- "You have access to the following tools:\n\n" }}
8
+ {%- for tool in tools %}
9
+ {%- if tool.type not in ns.tool_types %}
10
+ {%- set ns.tool_types = ns.tool_types + [tool.type] %}
11
+ {{- "## Tool namespace: " ~ tool.type ~ "\n\n" }}
12
+ {%- endif %}
13
+ {%- if tool.type == 'code_interpreter' %}
14
+ {%- set tool = {"type":"code_interpreter","function":{"name":"code_interpreter_preview","description":"The code will be executed in a stateful Jupyter notebook sandbox environment, only supports local computation, data processing, and file operations.\nCode sandbox environment (network isolated) Any external network requests or online API calls are prohibited.\nIf online functionality is needed, please use other permitted tools.\nCode will respond with the output of the execution or time out after 60.0 seconds. ","parameters":{"type":"object","properties":{"language":{"type":"string","description":"The programming language of the code to be executed. Available values: python (Default), java, go, js, ts, c, c++."},"code":{"type":"string","description":"Python code to be executed must not include the following:\n- Importing network libraries such as requests, httplib, etc.\n- Any form of HTTP requests.\n- External API calls.\n- Network port operations. Example: ```python\nimport pandas as pd\npd.DataFrame({'A':[1,2]})\n```"},"timeout":{"type":"number","description":"The maximum execution time of the code, in seconds. Default is 60.0."}}},"required":["code"]}} %}
15
+ {%- endif %}
16
+ {{- "### Tool name: " + tool.function.name + "\n" }}
17
+ {{- "Description: " + tool.function.description + "\n\n" }}
18
+ {{- "InputSchema: " + tool.function.parameters | tojson(ensure_ascii=False) + "\n\n" }}
19
+ {%- endfor %}
20
+ {{- '**Note**: For each function call, output the function name and arguments within the following XML format:\n<longcat_tool_call>{function-name}\n<longcat_arg_key>{arg-key-1}</longcat_arg_key>\n<longcat_arg_value>{arg-value-1}</longcat_arg_value>\n<longcat_arg_key>{arg-key-2}</longcat_arg_key>\n<longcat_arg_value>{arg-value-2}</longcat_arg_value>\n...\n</longcat_tool_call>\n' }}
21
+ {{- "</longcat_tool_declare>"-}}
22
+ {%- for idx in range(messages|length - 1) %}
23
+ {%- set msg = messages[idx] %}
24
+ {%- if msg.role == 'assistant' and not msg.tool_calls %}
25
+ {%- set ns.last_query_index = idx %}
26
+ {%- endif %}
27
+ {%- endfor%}
28
+ {%- endif %}
29
+
30
+ {%- for msg in messages %}
31
+ {%- if msg.role == "system" %}
32
+ {{- "<longcat_system>" + msg.content }}
33
+ {%- elif msg.role == "user" %}
34
+ {{- "<longcat_user>" }}
35
+ {%- if msg["files"] %}
36
+ {{- '<longcat_files>\n' ~ msg.files | tojson(indent=2) ~ '\n</longcat_files>' }}
37
+ {%- endif %}
38
+ {{- msg.content }}
39
+ {%- elif msg.role == "assistant" %}
40
+ {{- "<longcat_assistant>" }}
41
+ {%- if enable_thinking == true and msg.reasoning_content and ns.tool_types != [] and loop.index0 > ns.last_query_index %}
42
+ {{- "\n<longcat_think>\n" ~ msg.reasoning_content ~ "\n</longcat_think>\n" }}
43
+ {%- endif %}
44
+ {%- if msg.content%}
45
+ {{- msg.content }}
46
+ {%- endif %}
47
+ {%- if msg.tool_calls %}
48
+ {%- for tool_call in msg.tool_calls -%}
49
+ {{- "<longcat_tool_call>" ~ tool_call.function.name ~ "\n" -}}
50
+ {% set _args = tool_call.function.arguments %}
51
+ {% for k, v in _args.items() %}
52
+ {{- "<longcat_arg_key>" ~ k ~ "</longcat_arg_key>\n" -}}
53
+ {{- "<longcat_arg_value>" ~ (v if v is string else v | tojson(ensure_ascii=False)) ~ "</longcat_arg_value>\n" -}}
54
+ {% endfor %}
55
+ {{- "</longcat_tool_call>\n" }}
56
+ {%- endfor %}
57
+ {%- endif %}
58
+ {{- "</longcat_s>" -}}
59
+ {%- elif msg.role == "tool" %}
60
+ {%- if messages[loop.index0 - 1].role != "tool"%}
61
+ {{- "<longcat_user>" -}}
62
+ {%- endif %}
63
+ {{- "<longcat_tool_response>" ~ msg.content ~ "</longcat_tool_response>"-}}
64
+ {%- endif %}
65
+ {%- endfor %}
66
+ {%- if add_generation_prompt %}
67
+ {%- if enable_thinking == true %}
68
+ {{- " /think_on" }}
69
+ {%- if thinking_budget %}
70
+ {%- if thinking_budget < 1024 %}
71
+ {%- set thinking_budget = 1024 %}
72
+ {%- endif%}
73
+ {{- "\nthinking_budget: < " ~ thinking_budget ~ "."}}
74
+ {%- endif %}
75
+ {{- " <longcat_assistant><longcat_think>\n"}}
76
+ {%- elif enable_thinking == false %}
77
+ {{- " /think_off <longcat_assistant><longcat_think>\n\n</longcat_think>\n" }}
78
+ {%- else %}
79
+ {{- "<longcat_assistant>" }}
80
+ {%- endif %}
81
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LongcatFlashNgramForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_longcat_ngram.LongcatFlashNgramConfig",
9
+ "AutoModel": "modeling_longcat_ngram.LongcatFlashNgramModel",
10
+ "AutoModelForCausalLM": "modeling_longcat_ngram.LongcatFlashNgramForCausalLM"
11
+ },
12
+ "bos_token_id": 1,
13
+ "dtype": "bfloat16",
14
+ "emb_neighbor_num": 2,
15
+ "emb_split_num": 2,
16
+ "eos_token_id": 2,
17
+ "expert_ffn_hidden_size": 32,
18
+ "ffn_hidden_size": 32,
19
+ "head_dim": 192,
20
+ "hidden_act": "silu",
21
+ "hidden_size": 8,
22
+ "initializer_range": 0.02,
23
+ "kv_lora_rank": 384,
24
+ "max_position_embeddings": 327680,
25
+ "mla_scale_kv_lora": true,
26
+ "mla_scale_q_lora": true,
27
+ "model_type": "longcat_flash_ngram",
28
+ "moe_topk": 12,
29
+ "n_routed_experts": 32,
30
+ "ngram_vocab_size_ratio": 4,
31
+ "num_attention_heads": 4,
32
+ "num_hidden_layers": 4,
33
+ "num_key_value_heads": 4,
34
+ "num_layers": 2,
35
+ "q_lora_rank": 32,
36
+ "qk_head_dim": 256,
37
+ "qk_nope_head_dim": 64,
38
+ "qk_rope_head_dim": 192,
39
+ "rms_norm_eps": 1e-05,
40
+ "rope_scaling": {
41
+ "beta_fast": 32.0,
42
+ "beta_slow": 1.0,
43
+ "factor": 10.0,
44
+ "mscale": 1,
45
+ "mscale_all_dim": 1,
46
+ "original_max_position_embeddings": 32768,
47
+ "rope_type": "yarn"
48
+ },
49
+ "rope_theta": 5000000.0,
50
+ "routed_scaling_factor": 6.0,
51
+ "tie_word_embeddings": false,
52
+ "transformers_version": "4.57.6",
53
+ "use_cache": true,
54
+ "v_head_dim": 64,
55
+ "vocab_size": 131072,
56
+ "zero_expert_num": 16,
57
+ "zero_expert_type": "identity"
58
+ }
configuration_longcat_ngram.py ADDED
@@ -0,0 +1,216 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.models.longcat_flash import LongcatFlashConfig
2
+
3
+
4
+ class LongcatFlashNgramConfig(LongcatFlashConfig):
5
+ r"""
6
+ This is the configuration class to store the configuration of a [`LongcatFlashNgramModel`]. It is used to instantiate
7
+ a LongCat Flash model with N-gram enhanced embeddings according to the specified arguments, defining the model architecture.
8
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
9
+ documentation from [`PretrainedConfig`] for more information.
10
+
11
+
12
+ Args:
13
+ vocab_size (`int`, *optional*, defaults to 131072):
14
+ Vocabulary size of the LongCat Flash model. Defines the number of different tokens that can be represented by the
15
+ `input_ids` passed when calling [`LongcatFlashNgramModel`]
16
+ hidden_size (`int`, *optional*, defaults to 6144):
17
+ Dimension of the hidden representations.
18
+ num_hidden_layers (`int`, *optional*, defaults to 56):
19
+ Number of hidden layers in the Transformer decoder.
20
+ num_layers (`int`, *optional*, defaults to 28):
21
+ Number of layers, each with 2 sublayers.
22
+ num_attention_heads (`int`, *optional*, defaults to 64):
23
+ Number of attention heads for each attention layer in the Transformer decoder.
24
+ num_key_value_heads (`int`, *optional*):
25
+ This is the number of key_value heads that should be used to implement Grouped Query Attention. If
26
+ `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if
27
+ `num_key_value_heads=1` the model will use Multi Query Attention (MQA) otherwise GQA is used. When
28
+ converting from a multi-head checkpoint to a GQA checkpoint, each group key and value head should be
29
+ constructed by meanpooling all the original heads within that group. For more details checkout [this
30
+ paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to
31
+ `num_attention_heads`.
32
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
33
+ The non-linear activation function (function or string) in the decoder.
34
+ max_position_embeddings (`int`, *optional*, defaults to 131072):
35
+ The maximum sequence length that this model might ever be used with. Typically set this to something large
36
+ just in case (e.g., 512 or 1024 or 2048).
37
+ initializer_range (`float`, *optional*, defaults to 0.02):
38
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
39
+ rms_norm_eps (`float`, *optional*, defaults to 1e-05):
40
+ The epsilon value used by the RMS normalization layers.
41
+ use_cache (`bool`, *optional*, defaults to `True`):
42
+ Whether or not the model should return the last key/values attentions (not used by all models). Only
43
+ relevant if `config.is_decoder=True`.
44
+ pad_token_id (`int`, *optional*):
45
+ Padding token id.
46
+ bos_token_id (`int`, *optional*, defaults to 1):
47
+ Beginning of stream token id.
48
+ eos_token_id (`int`, *optional*, defaults to 2):
49
+ End of stream token id.
50
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
51
+ Whether to tie input and output embeddings.
52
+ rope_theta (`float`, *optional*, defaults to 10000000.0):
53
+ The base period of the RoPE embeddings.
54
+ rope_scaling (`Dict`, *optional*):
55
+ Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling
56
+ strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is
57
+ `{"type": strategy name, "factor": scaling factor}`.
58
+ attention_bias (`bool`, *optional*, defaults to `False`):
59
+ Whether to use a bias in the query, key, value and output projection layers during self-attention.
60
+ attention_dropout (`float`, *optional*, defaults to 0.0):
61
+ The dropout ratio for the attention probabilities.
62
+ ffn_hidden_size (`int`, *optional*, defaults to 12288):
63
+ Dimension of the MLP representations.
64
+ q_lora_rank (`int`, *optional*, defaults to 1536):
65
+ The rank of the query LoRA projection in MLA (Multi-head Latent Attention).
66
+ kv_lora_rank (`int`, *optional*, defaults to 512):
67
+ The rank of the key-value LoRA projection in MLA.
68
+ qk_nope_head_dim (`int`, *optional*, defaults to 128):
69
+ The dimension of the non-position encoding part of query/key heads.
70
+ qk_rope_head_dim (`int`, *optional*, defaults to 64):
71
+ The dimension of the RoPE part of query/key heads.
72
+ head_dim (`int`, *optional*, defaults to 64):
73
+ Standard dimension of qk heads, unused except for CI.
74
+ v_head_dim (`int`, *optional*, defaults to 128):
75
+ The dimension of value heads.
76
+ qk_head_dim (`int`, *optional*):
77
+ The total dimension of query/key heads. If not specified, set to `qk_nope_head_dim + qk_rope_head_dim`.
78
+ moe_topk (`int`, *optional*, defaults to 12):
79
+ Number of experts to route to for each token in the MoE layer.
80
+ n_routed_experts (`int`, *optional*, defaults to 512):
81
+ Number of routed experts in the MoE layer.
82
+ zero_expert_num (`int`, *optional*, defaults to 256):
83
+ Number of zero experts (identity function) to add to the expert pool.
84
+ expert_ffn_hidden_size (`int`, *optional*, defaults to 2048):
85
+ Hidden size of individual expert FFN layers.
86
+ routed_scaling_factor (`float`, *optional*, defaults to 6.0):
87
+ Scaling factor applied to the routing weights.
88
+ emb_neighbor_num (`int`, *optional*):
89
+ Maximum N-gram length for N-gram embeddings. This parameter determines the context window size for N-gram computation. Higher values capture
90
+ longer-range lexical patterns but increase memory usage.
91
+ emb_split_num (`int`, *optional*):
92
+ Number of hash functions (or splits) to use for N-gram embeddings. Multiple hash functions help improve the quality of N-gram representations.
93
+ ngram_vocab_size_ratio (`float`, *optional*):
94
+ Ratio multiplier for N-gram vocabulary size relative to the base vocabulary size. The N-gram vocabulary
95
+ size is calculated as `vocab_size * ngram_vocab_size_ratio`.
96
+
97
+ Example:
98
+ ```python
99
+ >>> from transformers import LongcatFlashNgramModel, LongcatFlashNgramConfig
100
+
101
+ >>> # Initializing a LongCat Flash N-gram style configuration
102
+ >>> configuration = LongcatFlashNgramConfig(
103
+ ... emb_neighbor_num=3,
104
+ ... emb_split_num=4,
105
+ ... ngram_vocab_size_ratio=1.5
106
+ ... )
107
+
108
+ >>> # Initializing a model from the configuration
109
+ >>> model = LongcatFlashNgramModel(configuration)
110
+
111
+ >>> # Accessing the model configuration
112
+ >>> configuration = model.config
113
+ ```"""
114
+
115
+ model_type = "longcat_flash_ngram"
116
+ keys_to_ignore_at_inference = ["past_key_values"]
117
+ base_model_tp_plan = {
118
+ "layers.*.self_attn.*.q_b_proj": "colwise",
119
+ "layers.*.self_attn.*.kv_b_proj": "colwise",
120
+ "layers.*.self_attn.*.o_proj": "rowwise",
121
+ "layers.*.mlps.*.gate_proj": "colwise",
122
+ "layers.*.mlps.*.up_proj": "colwise",
123
+ "layers.*.mlps.*.down_proj": "rowwise",
124
+ "layers.*.mlp.experts.*.gate_proj": "colwise",
125
+ "layers.*.mlp.experts.*.up_proj": "colwise",
126
+ "layers.*.mlp.experts.*.down_proj": "rowwise",
127
+ }
128
+
129
+ base_model_pp_plan = {
130
+ "embed_tokens": (["input_ids"], ["inputs_embeds"]),
131
+ "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
132
+ "norm": (["hidden_states"], ["hidden_states"]),
133
+ }
134
+
135
+ def __init__(
136
+ self,
137
+ vocab_size=131072,
138
+ hidden_size=6144,
139
+ num_hidden_layers=56,
140
+ num_layers=28,
141
+ num_attention_heads=64,
142
+ num_key_value_heads=None,
143
+ hidden_act="silu",
144
+ max_position_embeddings=131072,
145
+ initializer_range=0.02,
146
+ rms_norm_eps=1e-5,
147
+ use_cache=True,
148
+ pad_token_id=None,
149
+ bos_token_id=1,
150
+ eos_token_id=2,
151
+ tie_word_embeddings=False,
152
+ rope_theta=10000000.0,
153
+ rope_scaling=None,
154
+ attention_bias=False,
155
+ attention_dropout=0.0,
156
+ ffn_hidden_size=12288,
157
+ q_lora_rank=1536,
158
+ kv_lora_rank=512,
159
+ qk_nope_head_dim=128,
160
+ qk_rope_head_dim=64,
161
+ head_dim=64,
162
+ v_head_dim=128,
163
+ qk_head_dim=None,
164
+ moe_topk=12,
165
+ n_routed_experts=512,
166
+ zero_expert_num=256,
167
+ expert_ffn_hidden_size=2048,
168
+ routed_scaling_factor=6.0,
169
+ emb_neighbor_num=None,
170
+ emb_split_num=None,
171
+ ngram_vocab_size_ratio=None,
172
+ **kwargs,
173
+ ):
174
+ # N-gram embedding specific parameters
175
+ self.emb_neighbor_num = emb_neighbor_num
176
+ self.emb_split_num = emb_split_num
177
+ self.ngram_vocab_size_ratio = ngram_vocab_size_ratio
178
+
179
+ super().__init__(
180
+ vocab_size=vocab_size,
181
+ hidden_size=hidden_size,
182
+ num_hidden_layers=num_hidden_layers,
183
+ num_layers=num_layers,
184
+ num_attention_heads=num_attention_heads,
185
+ num_key_value_heads=num_key_value_heads,
186
+ hidden_act=hidden_act,
187
+ max_position_embeddings=max_position_embeddings,
188
+ initializer_range=initializer_range,
189
+ rms_norm_eps=rms_norm_eps,
190
+ use_cache=use_cache,
191
+ pad_token_id=pad_token_id,
192
+ bos_token_id=bos_token_id,
193
+ eos_token_id=eos_token_id,
194
+ tie_word_embeddings=tie_word_embeddings,
195
+ rope_theta=rope_theta,
196
+ rope_scaling=rope_scaling,
197
+ attention_bias=attention_bias,
198
+ attention_dropout=attention_dropout,
199
+ ffn_hidden_size=ffn_hidden_size,
200
+ q_lora_rank=q_lora_rank,
201
+ kv_lora_rank=kv_lora_rank,
202
+ qk_nope_head_dim=qk_nope_head_dim,
203
+ qk_rope_head_dim=qk_rope_head_dim,
204
+ head_dim=head_dim,
205
+ v_head_dim=v_head_dim,
206
+ qk_head_dim=qk_head_dim,
207
+ moe_topk=moe_topk,
208
+ n_routed_experts=n_routed_experts,
209
+ zero_expert_num=zero_expert_num,
210
+ expert_ffn_hidden_size=expert_ffn_hidden_size,
211
+ routed_scaling_factor=routed_scaling_factor,
212
+ **kwargs,
213
+ )
214
+
215
+
216
+ __all__ = ["LongcatFlashNgramConfig"]
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 3,
6
+ "transformers_version": "4.57.6"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b07278fd0e88b73482a5ad791bdc6f68d086050dae57289ad19f2f9b48539848
3
+ size 8798040
modeling_longcat_ngram.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright (c) 2025 Meituan
3
+ # This code is licensed under the MIT License, for details, see the ./LICENSE file.
4
+
5
+ from typing import Optional, Tuple, Dict, List
6
+
7
+ import torch
8
+ from torch import nn
9
+
10
+ from transformers.cache_utils import Cache, DynamicCache
11
+ from transformers.masking_utils import create_causal_mask
12
+ from transformers.modeling_outputs import BaseModelOutputWithPast
13
+ from transformers.processing_utils import Unpack
14
+ from transformers.utils import auto_docstring, logging
15
+ from transformers.models.longcat_flash.modeling_longcat_flash import (
16
+ LongcatFlashForCausalLM,
17
+ LongcatFlashModel,
18
+ LongcatFlashRMSNorm,
19
+ LongcatFlashRotaryEmbedding,
20
+ LongcatFlashDecoderLayer,
21
+ LongcatFlashPreTrainedModel,
22
+ )
23
+ from .configuration_longcat_ngram import LongcatFlashNgramConfig
24
+
25
+ logger = logging.get_logger(__name__)
26
+
27
+
28
+ @auto_docstring
29
+ class LongcatFlashNgramPreTrainedModel(LongcatFlashPreTrainedModel):
30
+ pass
31
+
32
+
33
+ class NgramCache(DynamicCache):
34
+ """
35
+ Extended DynamicCache for storing N-gram context alongside KV cache.
36
+ """
37
+ def __init__(self, config=None):
38
+ super().__init__()
39
+ self.ngram_context = None
40
+ # Keep only n-1 tokens (minimum needed for N-gram computation)
41
+ self.max_context_len = config.emb_neighbor_num - 1
42
+
43
+ def update_ngram_context(self, new_tokens: torch.Tensor) -> None:
44
+ """
45
+ Update N-gram context with window management.
46
+
47
+ Args:
48
+ new_tokens: New tokens to append, shape (batch_size, seq_len)
49
+ """
50
+ if self.ngram_context is None:
51
+ self.ngram_context = new_tokens.clone()
52
+ else:
53
+ self.ngram_context = torch.cat([self.ngram_context, new_tokens], dim=-1)
54
+
55
+ # Truncate to maintain constant memory footprint
56
+ if self.ngram_context.size(-1) > self.max_context_len:
57
+ self.ngram_context = self.ngram_context[..., -self.max_context_len:]
58
+
59
+ def reorder_cache(self, beam_idx: torch.LongTensor) -> "Cache":
60
+ """Reorder cache for beam search."""
61
+ # Reorder parent's KV cache
62
+ super().reorder_cache(beam_idx)
63
+
64
+ # Reorder N-gram context
65
+ if self.ngram_context is not None:
66
+ self.ngram_context = self.ngram_context.index_select(0, beam_idx.to(self.ngram_context.device))
67
+
68
+ return self
69
+
70
+
71
+ class NgramEmbedding(nn.Module):
72
+ """
73
+ Computes embeddings enriched with N-gram features without maintaining internal state.
74
+ """
75
+ def __init__(self, config, base_embeddings):
76
+ super().__init__()
77
+ self.config = config
78
+ self.word_embeddings = base_embeddings
79
+
80
+ self.m = config.ngram_vocab_size_ratio * config.vocab_size
81
+ self.k = config.emb_split_num
82
+ self.n = config.emb_neighbor_num
83
+
84
+ self._init_ngram_embeddings()
85
+ self._vocab_mods_cache = None
86
+
87
+ def _init_ngram_embeddings(self) -> None:
88
+ """Initialize N-gram embedding and projection layers."""
89
+ num_embedders = self.k * (self.n - 1)
90
+ emb_dim = self.config.hidden_size // num_embedders
91
+
92
+ embedders = []
93
+ post_projs = []
94
+
95
+ for i in range(num_embedders):
96
+ vocab_size = int(self.m + i * 2 + 1)
97
+ emb = nn.Embedding(vocab_size, emb_dim, padding_idx=self.config.pad_token_id)
98
+ proj = nn.Linear(emb_dim, self.config.hidden_size, bias=False)
99
+ embedders.append(emb)
100
+ post_projs.append(proj)
101
+
102
+ self.embedders = nn.ModuleList(embedders)
103
+ self.post_projs = nn.ModuleList(post_projs)
104
+
105
+ def _shift_right_ignore_eos(self, tensor: torch.Tensor, n: int, eos_token_id: int = 2) -> torch.Tensor:
106
+ """Shift tensor right by n positions, resetting at EOS tokens."""
107
+ batch_size, seq_len = tensor.shape
108
+ result = torch.zeros_like(tensor)
109
+ eos_mask = (tensor == eos_token_id)
110
+
111
+ for i in range(batch_size):
112
+ eos_positions = eos_mask[i].nonzero(as_tuple=True)[0]
113
+ prev_idx = 0
114
+
115
+ for eos_idx in eos_positions:
116
+ end_idx = eos_idx.item() + 1
117
+ if end_idx - prev_idx > n:
118
+ result[i, prev_idx+n:end_idx] = tensor[i, prev_idx:end_idx-n]
119
+ prev_idx = end_idx
120
+
121
+ if prev_idx < seq_len and seq_len - prev_idx > n:
122
+ result[i, prev_idx+n:seq_len] = tensor[i, prev_idx:seq_len-n]
123
+
124
+ return result
125
+
126
+ def _precompute_vocab_mods(self) -> Dict[Tuple[int, int], List[int]]:
127
+ """Precompute modular arithmetic values for vocabulary."""
128
+ if self._vocab_mods_cache is not None:
129
+ return self._vocab_mods_cache
130
+
131
+ vocab_mods = {}
132
+ vocab_size = self.config.vocab_size
133
+
134
+ for i in range(2, self.n + 1):
135
+ for j in range(self.k):
136
+ index = (i - 2) * self.k + j
137
+ emb_vocab_dim = int(self.m + index * 2 + 1)
138
+
139
+ mods = []
140
+ power_mod = 1
141
+ for _ in range(i - 1):
142
+ power_mod = (power_mod * vocab_size) % emb_vocab_dim
143
+ mods.append(power_mod)
144
+
145
+ vocab_mods[(i, j)] = mods
146
+
147
+ self._vocab_mods_cache = vocab_mods
148
+ return vocab_mods
149
+
150
+ def _get_ngram_ids(
151
+ self,
152
+ input_ids: torch.Tensor,
153
+ shifted_ids: Dict[int, torch.Tensor],
154
+ vocab_mods: List[int],
155
+ ngram: int
156
+ ) -> torch.Tensor:
157
+ """Compute N-gram hash IDs using polynomial rolling hash."""
158
+ ngram_ids = input_ids.clone()
159
+ for k in range(2, ngram + 1):
160
+ ngram_ids = ngram_ids + shifted_ids[k] * vocab_mods[k - 2]
161
+ return ngram_ids
162
+
163
+ def forward(
164
+ self,
165
+ input_ids: torch.Tensor,
166
+ ngram_context: Optional[torch.Tensor] = None
167
+ ) -> torch.Tensor:
168
+ """
169
+ Stateless forward pass.
170
+
171
+ Args:
172
+ input_ids: Current input token IDs of shape (batch_size, seq_len)
173
+ ngram_context: Optional historical context of shape (batch_size, context_len)
174
+
175
+ Returns:
176
+ Embedding tensor of shape (batch_size, seq_len, hidden_size)
177
+ """
178
+ seq_len = input_ids.size(-1)
179
+
180
+ # Determine complete context
181
+ if ngram_context is not None:
182
+ context = torch.cat([ngram_context[..., -(self.n-1):], input_ids], dim=-1)
183
+ else:
184
+ context = input_ids
185
+
186
+ # Base word embeddings
187
+ device = self.word_embeddings.weight.device
188
+ x = self.word_embeddings(input_ids.to(device)).clone()
189
+
190
+ # Precompute modular values
191
+ vocab_mods = self._precompute_vocab_mods()
192
+
193
+ # Compute shifted IDs
194
+ shifted_ids = {}
195
+ for i in range(2, self.n + 1):
196
+ shifted_ids[i] = self._shift_right_ignore_eos(
197
+ context, i - 1, eos_token_id=self.config.eos_token_id
198
+ )
199
+
200
+ # Add N-gram embeddings
201
+ for i in range(2, self.n + 1):
202
+ for j in range(self.k):
203
+ index = (i - 2) * self.k + j
204
+ emb_vocab_dim = int(self.m + index * 2 + 1)
205
+
206
+ ngram_ids = self._get_ngram_ids(context, shifted_ids, vocab_mods[(i, j)], ngram=i)
207
+ new_ids = (ngram_ids % emb_vocab_dim)[..., -seq_len:]
208
+
209
+ embedder_device = self.embedders[index].weight.device
210
+ x_ngram = self.embedders[index](new_ids.to(embedder_device))
211
+
212
+ proj_device = self.post_projs[index].weight.device
213
+ x_proj = self.post_projs[index](x_ngram.to(proj_device))
214
+ x = x + x_proj.to(x.device)
215
+
216
+ # Normalize
217
+ x = x / (1 + self.k * (self.n - 1))
218
+
219
+ return x
220
+
221
+
222
+ class LongcatFlashNgramModel(LongcatFlashModel):
223
+ """LongcatFlash model with N-gram enhanced embeddings."""
224
+ _keys_to_ignore_on_load_unexpected = [r"model\.mtp.*"]
225
+ config_class = LongcatFlashNgramConfig
226
+
227
+ def __init__(self, config):
228
+ super().__init__(config)
229
+
230
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
231
+ self.ngram_embeddings = NgramEmbedding(config, self.embed_tokens)
232
+
233
+ self.layers = nn.ModuleList(
234
+ [LongcatFlashDecoderLayer(config, layer_idx) for layer_idx in range(config.num_layers)]
235
+ )
236
+
237
+ self.head_dim = config.head_dim
238
+ self.config.num_hidden_layers = 2 * config.num_layers
239
+ self.norm = LongcatFlashRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
240
+ self.rotary_emb = LongcatFlashRotaryEmbedding(config=config)
241
+ self.gradient_checkpointing = False
242
+
243
+ self.post_init()
244
+
245
+ def forward(
246
+ self,
247
+ input_ids: Optional[torch.LongTensor] = None,
248
+ attention_mask: Optional[torch.Tensor] = None,
249
+ position_ids: Optional[torch.LongTensor] = None,
250
+ past_key_values: Optional[Cache] = None,
251
+ inputs_embeds: Optional[torch.FloatTensor] = None,
252
+ cache_position: Optional[torch.LongTensor] = None,
253
+ use_cache: Optional[bool] = None,
254
+ **kwargs
255
+ ) -> BaseModelOutputWithPast:
256
+ if (input_ids is None) ^ (inputs_embeds is not None):
257
+ raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
258
+
259
+ # Extract N-gram context if available
260
+ ngram_context = None
261
+ if isinstance(past_key_values, NgramCache) and past_key_values.ngram_context is not None:
262
+ ngram_context = past_key_values.ngram_context
263
+
264
+ if inputs_embeds is None:
265
+ inputs_embeds = self.ngram_embeddings(input_ids, ngram_context=ngram_context)
266
+
267
+ # Initialize NgramCache if needed
268
+ if use_cache and past_key_values is None:
269
+ past_key_values = NgramCache(config=self.config)
270
+
271
+ # Update N-gram context
272
+ if use_cache and isinstance(past_key_values, NgramCache):
273
+ past_key_values.update_ngram_context(input_ids)
274
+
275
+ # Prepare cache position
276
+ if cache_position is None:
277
+ past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
278
+ cache_position = torch.arange(
279
+ inputs_embeds.shape[1], device=inputs_embeds.device
280
+ ) + past_seen_tokens
281
+
282
+ if position_ids is None:
283
+ position_ids = cache_position.unsqueeze(0)
284
+
285
+ # Create causal mask
286
+ causal_mask = create_causal_mask(
287
+ config=self.config,
288
+ input_embeds=inputs_embeds,
289
+ attention_mask=attention_mask,
290
+ cache_position=cache_position,
291
+ past_key_values=past_key_values,
292
+ position_ids=position_ids,
293
+ )
294
+
295
+ # Forward through decoder layers
296
+ hidden_states = inputs_embeds
297
+ position_embeddings = self.rotary_emb(hidden_states, position_ids)
298
+
299
+ for decoder_layer in self.layers[: self.config.num_layers]:
300
+ hidden_states = decoder_layer(
301
+ hidden_states,
302
+ attention_mask=causal_mask,
303
+ position_ids=position_ids,
304
+ past_key_values=past_key_values,
305
+ cache_position=cache_position,
306
+ position_embeddings=position_embeddings,
307
+ **kwargs,
308
+ )
309
+
310
+ hidden_states = self.norm(hidden_states)
311
+
312
+ return BaseModelOutputWithPast(
313
+ last_hidden_state=hidden_states,
314
+ past_key_values=past_key_values,
315
+ hidden_states=None,
316
+ attentions=None,
317
+ )
318
+
319
+
320
+ class LongcatFlashNgramForCausalLM(LongcatFlashForCausalLM):
321
+ """LongcatFlash model for causal language modeling with N-gram embeddings."""
322
+ _keys_to_ignore_on_load_unexpected = [r"model\.mtp.*"]
323
+ config_class = LongcatFlashNgramConfig
324
+
325
+ def __init__(self, config):
326
+ super().__init__(config)
327
+ self.model = LongcatFlashNgramModel(config)
328
+
329
+ @torch.no_grad()
330
+ def generate(self, inputs=None, generation_config=None, **kwargs):
331
+ """Override to ensure NgramCache is used."""
332
+
333
+ if "past_key_values" not in kwargs or kwargs["past_key_values"] is None:
334
+ kwargs["past_key_values"] = NgramCache(config=self.config)
335
+
336
+ return super().generate(inputs=inputs, generation_config=generation_config, **kwargs)
337
+
338
+ __all__ = ["LongcatFlashNgramPreTrainedModel", "LongcatFlashNgramModel", "LongcatFlashNgramForCausalLM"]
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<longcat_s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</longcat_s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<longcat_pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<longcat_unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,1810 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": true,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<longcat_unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<longcat_s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</longcat_s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "3": {
31
+ "content": "<longcat_pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "4": {
39
+ "content": "<shift_unk>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "5": {
47
+ "content": "<shift_s>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "6": {
55
+ "content": "</shift_s>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "7": {
63
+ "content": "<shift_pad>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "8": {
71
+ "content": "<mask_0>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "9": {
79
+ "content": "<reponame>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "10": {
87
+ "content": "<filename>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "11": {
95
+ "content": "<gh_stars>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "12": {
103
+ "content": "<issue_start>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "13": {
111
+ "content": "<issue_comment>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "14": {
119
+ "content": "<issue_closed>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
+ },
126
+ "15": {
127
+ "content": "<jupyter_start>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
+ "16": {
135
+ "content": "<jupyter_text>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": true
141
+ },
142
+ "17": {
143
+ "content": "<jupyter_code>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": true
149
+ },
150
+ "18": {
151
+ "content": "<jupyter_output>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": true
157
+ },
158
+ "19": {
159
+ "content": "<empty_output>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": true
165
+ },
166
+ "20": {
167
+ "content": "<commit_before>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": true
173
+ },
174
+ "21": {
175
+ "content": "<commit_msg>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": true
181
+ },
182
+ "22": {
183
+ "content": "<commit_after>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": true
189
+ },
190
+ "23": {
191
+ "content": "<program_lang>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": true
197
+ },
198
+ "24": {
199
+ "content": "<|image_placeholder|>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": true
205
+ },
206
+ "25": {
207
+ "content": "<|url_placeholder|>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "26": {
215
+ "content": "<|hyperlink_placeholder|>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "27": {
223
+ "content": "<|table_placeholder|>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "28": {
231
+ "content": "<|equation_placeholder|>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "29": {
239
+ "content": "<|code_placeholder|>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ },
246
+ "30": {
247
+ "content": "<|reference_placeholder|>",
248
+ "lstrip": false,
249
+ "normalized": false,
250
+ "rstrip": false,
251
+ "single_word": false,
252
+ "special": true
253
+ },
254
+ "31": {
255
+ "content": "<|endoftext|>",
256
+ "lstrip": false,
257
+ "normalized": false,
258
+ "rstrip": false,
259
+ "single_word": false,
260
+ "special": true
261
+ },
262
+ "32": {
263
+ "content": "<fim_prefix>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false,
268
+ "special": true
269
+ },
270
+ "33": {
271
+ "content": "<fim_middle>",
272
+ "lstrip": false,
273
+ "normalized": false,
274
+ "rstrip": false,
275
+ "single_word": false,
276
+ "special": true
277
+ },
278
+ "34": {
279
+ "content": "<fim_suffix>",
280
+ "lstrip": false,
281
+ "normalized": false,
282
+ "rstrip": false,
283
+ "single_word": false,
284
+ "special": true
285
+ },
286
+ "35": {
287
+ "content": "<fim_pad>",
288
+ "lstrip": false,
289
+ "normalized": false,
290
+ "rstrip": false,
291
+ "single_word": false,
292
+ "special": true
293
+ },
294
+ "36": {
295
+ "content": "<longcat_think>",
296
+ "lstrip": false,
297
+ "normalized": false,
298
+ "rstrip": false,
299
+ "single_word": false,
300
+ "special": false
301
+ },
302
+ "37": {
303
+ "content": "</longcat_think>",
304
+ "lstrip": false,
305
+ "normalized": false,
306
+ "rstrip": false,
307
+ "single_word": false,
308
+ "special": false
309
+ },
310
+ "38": {
311
+ "content": "<longcat_answer>",
312
+ "lstrip": false,
313
+ "normalized": false,
314
+ "rstrip": false,
315
+ "single_word": false,
316
+ "special": false
317
+ },
318
+ "39": {
319
+ "content": "</longcat_answer>",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false,
324
+ "special": false
325
+ },
326
+ "40": {
327
+ "content": "<longcat_files>",
328
+ "lstrip": false,
329
+ "normalized": false,
330
+ "rstrip": false,
331
+ "single_word": false,
332
+ "special": false
333
+ },
334
+ "41": {
335
+ "content": "</longcat_files>",
336
+ "lstrip": false,
337
+ "normalized": false,
338
+ "rstrip": false,
339
+ "single_word": false,
340
+ "special": false
341
+ },
342
+ "42": {
343
+ "content": "<longcat_tool_call>",
344
+ "lstrip": false,
345
+ "normalized": false,
346
+ "rstrip": false,
347
+ "single_word": false,
348
+ "special": false
349
+ },
350
+ "43": {
351
+ "content": "</longcat_tool_call>",
352
+ "lstrip": false,
353
+ "normalized": false,
354
+ "rstrip": false,
355
+ "single_word": false,
356
+ "special": false
357
+ },
358
+ "44": {
359
+ "content": "<longcat_tool_declare>",
360
+ "lstrip": false,
361
+ "normalized": false,
362
+ "rstrip": false,
363
+ "single_word": false,
364
+ "special": true
365
+ },
366
+ "45": {
367
+ "content": "</longcat_tool_declare>",
368
+ "lstrip": false,
369
+ "normalized": false,
370
+ "rstrip": false,
371
+ "single_word": false,
372
+ "special": true
373
+ },
374
+ "46": {
375
+ "content": "<longcat_system>",
376
+ "lstrip": false,
377
+ "normalized": false,
378
+ "rstrip": false,
379
+ "single_word": false,
380
+ "special": true
381
+ },
382
+ "47": {
383
+ "content": "<longcat_user>",
384
+ "lstrip": false,
385
+ "normalized": false,
386
+ "rstrip": false,
387
+ "single_word": false,
388
+ "special": true
389
+ },
390
+ "48": {
391
+ "content": "<longcat_assistant>",
392
+ "lstrip": false,
393
+ "normalized": false,
394
+ "rstrip": false,
395
+ "single_word": false,
396
+ "special": true
397
+ },
398
+ "49": {
399
+ "content": "<longcat_tool_response>",
400
+ "lstrip": false,
401
+ "normalized": false,
402
+ "rstrip": false,
403
+ "single_word": false,
404
+ "special": false
405
+ },
406
+ "50": {
407
+ "content": "</longcat_tool_response>",
408
+ "lstrip": false,
409
+ "normalized": false,
410
+ "rstrip": false,
411
+ "single_word": false,
412
+ "special": false
413
+ },
414
+ "51": {
415
+ "content": "<longcat_arg_key>",
416
+ "lstrip": false,
417
+ "normalized": false,
418
+ "rstrip": false,
419
+ "single_word": false,
420
+ "special": false
421
+ },
422
+ "52": {
423
+ "content": "</longcat_arg_key>",
424
+ "lstrip": false,
425
+ "normalized": false,
426
+ "rstrip": false,
427
+ "single_word": false,
428
+ "special": false
429
+ },
430
+ "53": {
431
+ "content": "<longcat_arg_value>",
432
+ "lstrip": false,
433
+ "normalized": false,
434
+ "rstrip": false,
435
+ "single_word": false,
436
+ "special": false
437
+ },
438
+ "54": {
439
+ "content": "</longcat_arg_value>",
440
+ "lstrip": false,
441
+ "normalized": false,
442
+ "rstrip": false,
443
+ "single_word": false,
444
+ "special": false
445
+ },
446
+ "55": {
447
+ "content": "<mask_31>",
448
+ "lstrip": false,
449
+ "normalized": false,
450
+ "rstrip": false,
451
+ "single_word": false,
452
+ "special": true
453
+ },
454
+ "56": {
455
+ "content": "<mask_32>",
456
+ "lstrip": false,
457
+ "normalized": false,
458
+ "rstrip": false,
459
+ "single_word": false,
460
+ "special": true
461
+ },
462
+ "57": {
463
+ "content": "<mask_33>",
464
+ "lstrip": false,
465
+ "normalized": false,
466
+ "rstrip": false,
467
+ "single_word": false,
468
+ "special": true
469
+ },
470
+ "58": {
471
+ "content": "<mask_34>",
472
+ "lstrip": false,
473
+ "normalized": false,
474
+ "rstrip": false,
475
+ "single_word": false,
476
+ "special": true
477
+ },
478
+ "59": {
479
+ "content": "<mask_35>",
480
+ "lstrip": false,
481
+ "normalized": false,
482
+ "rstrip": false,
483
+ "single_word": false,
484
+ "special": true
485
+ },
486
+ "60": {
487
+ "content": "<mask_36>",
488
+ "lstrip": false,
489
+ "normalized": false,
490
+ "rstrip": false,
491
+ "single_word": false,
492
+ "special": true
493
+ },
494
+ "61": {
495
+ "content": "<mask_37>",
496
+ "lstrip": false,
497
+ "normalized": false,
498
+ "rstrip": false,
499
+ "single_word": false,
500
+ "special": true
501
+ },
502
+ "62": {
503
+ "content": "<mask_38>",
504
+ "lstrip": false,
505
+ "normalized": false,
506
+ "rstrip": false,
507
+ "single_word": false,
508
+ "special": true
509
+ },
510
+ "63": {
511
+ "content": "<mask_39>",
512
+ "lstrip": false,
513
+ "normalized": false,
514
+ "rstrip": false,
515
+ "single_word": false,
516
+ "special": true
517
+ },
518
+ "64": {
519
+ "content": "<mask_40>",
520
+ "lstrip": false,
521
+ "normalized": false,
522
+ "rstrip": false,
523
+ "single_word": false,
524
+ "special": true
525
+ },
526
+ "65": {
527
+ "content": "<mask_41>",
528
+ "lstrip": false,
529
+ "normalized": false,
530
+ "rstrip": false,
531
+ "single_word": false,
532
+ "special": true
533
+ },
534
+ "66": {
535
+ "content": "<mask_42>",
536
+ "lstrip": false,
537
+ "normalized": false,
538
+ "rstrip": false,
539
+ "single_word": false,
540
+ "special": true
541
+ },
542
+ "67": {
543
+ "content": "<mask_43>",
544
+ "lstrip": false,
545
+ "normalized": false,
546
+ "rstrip": false,
547
+ "single_word": false,
548
+ "special": true
549
+ },
550
+ "68": {
551
+ "content": "<mask_44>",
552
+ "lstrip": false,
553
+ "normalized": false,
554
+ "rstrip": false,
555
+ "single_word": false,
556
+ "special": true
557
+ },
558
+ "69": {
559
+ "content": "<mask_45>",
560
+ "lstrip": false,
561
+ "normalized": false,
562
+ "rstrip": false,
563
+ "single_word": false,
564
+ "special": true
565
+ },
566
+ "70": {
567
+ "content": "<mask_46>",
568
+ "lstrip": false,
569
+ "normalized": false,
570
+ "rstrip": false,
571
+ "single_word": false,
572
+ "special": true
573
+ },
574
+ "71": {
575
+ "content": "<mask_47>",
576
+ "lstrip": false,
577
+ "normalized": false,
578
+ "rstrip": false,
579
+ "single_word": false,
580
+ "special": true
581
+ },
582
+ "72": {
583
+ "content": "<mask_48>",
584
+ "lstrip": false,
585
+ "normalized": false,
586
+ "rstrip": false,
587
+ "single_word": false,
588
+ "special": true
589
+ },
590
+ "73": {
591
+ "content": "<mask_49>",
592
+ "lstrip": false,
593
+ "normalized": false,
594
+ "rstrip": false,
595
+ "single_word": false,
596
+ "special": true
597
+ },
598
+ "74": {
599
+ "content": "<mask_50>",
600
+ "lstrip": false,
601
+ "normalized": false,
602
+ "rstrip": false,
603
+ "single_word": false,
604
+ "special": true
605
+ },
606
+ "75": {
607
+ "content": "<mask_51>",
608
+ "lstrip": false,
609
+ "normalized": false,
610
+ "rstrip": false,
611
+ "single_word": false,
612
+ "special": true
613
+ },
614
+ "76": {
615
+ "content": "<mask_52>",
616
+ "lstrip": false,
617
+ "normalized": false,
618
+ "rstrip": false,
619
+ "single_word": false,
620
+ "special": true
621
+ },
622
+ "77": {
623
+ "content": "<mask_53>",
624
+ "lstrip": false,
625
+ "normalized": false,
626
+ "rstrip": false,
627
+ "single_word": false,
628
+ "special": true
629
+ },
630
+ "78": {
631
+ "content": "<mask_54>",
632
+ "lstrip": false,
633
+ "normalized": false,
634
+ "rstrip": false,
635
+ "single_word": false,
636
+ "special": true
637
+ },
638
+ "79": {
639
+ "content": "<mask_55>",
640
+ "lstrip": false,
641
+ "normalized": false,
642
+ "rstrip": false,
643
+ "single_word": false,
644
+ "special": true
645
+ },
646
+ "80": {
647
+ "content": "<mask_56>",
648
+ "lstrip": false,
649
+ "normalized": false,
650
+ "rstrip": false,
651
+ "single_word": false,
652
+ "special": true
653
+ },
654
+ "81": {
655
+ "content": "<mask_57>",
656
+ "lstrip": false,
657
+ "normalized": false,
658
+ "rstrip": false,
659
+ "single_word": false,
660
+ "special": true
661
+ },
662
+ "82": {
663
+ "content": "<mask_58>",
664
+ "lstrip": false,
665
+ "normalized": false,
666
+ "rstrip": false,
667
+ "single_word": false,
668
+ "special": true
669
+ },
670
+ "83": {
671
+ "content": "<mask_59>",
672
+ "lstrip": false,
673
+ "normalized": false,
674
+ "rstrip": false,
675
+ "single_word": false,
676
+ "special": true
677
+ },
678
+ "84": {
679
+ "content": "<mask_60>",
680
+ "lstrip": false,
681
+ "normalized": false,
682
+ "rstrip": false,
683
+ "single_word": false,
684
+ "special": true
685
+ },
686
+ "85": {
687
+ "content": "<mask_61>",
688
+ "lstrip": false,
689
+ "normalized": false,
690
+ "rstrip": false,
691
+ "single_word": false,
692
+ "special": true
693
+ },
694
+ "86": {
695
+ "content": "<mask_62>",
696
+ "lstrip": false,
697
+ "normalized": false,
698
+ "rstrip": false,
699
+ "single_word": false,
700
+ "special": true
701
+ },
702
+ "87": {
703
+ "content": "<mask_63>",
704
+ "lstrip": false,
705
+ "normalized": false,
706
+ "rstrip": false,
707
+ "single_word": false,
708
+ "special": true
709
+ },
710
+ "88": {
711
+ "content": "<mask_64>",
712
+ "lstrip": false,
713
+ "normalized": false,
714
+ "rstrip": false,
715
+ "single_word": false,
716
+ "special": true
717
+ },
718
+ "89": {
719
+ "content": "<mask_65>",
720
+ "lstrip": false,
721
+ "normalized": false,
722
+ "rstrip": false,
723
+ "single_word": false,
724
+ "special": true
725
+ },
726
+ "90": {
727
+ "content": "<mask_66>",
728
+ "lstrip": false,
729
+ "normalized": false,
730
+ "rstrip": false,
731
+ "single_word": false,
732
+ "special": true
733
+ },
734
+ "91": {
735
+ "content": "<mask_67>",
736
+ "lstrip": false,
737
+ "normalized": false,
738
+ "rstrip": false,
739
+ "single_word": false,
740
+ "special": true
741
+ },
742
+ "92": {
743
+ "content": "<mask_68>",
744
+ "lstrip": false,
745
+ "normalized": false,
746
+ "rstrip": false,
747
+ "single_word": false,
748
+ "special": true
749
+ },
750
+ "93": {
751
+ "content": "<mask_69>",
752
+ "lstrip": false,
753
+ "normalized": false,
754
+ "rstrip": false,
755
+ "single_word": false,
756
+ "special": true
757
+ },
758
+ "94": {
759
+ "content": "<mask_70>",
760
+ "lstrip": false,
761
+ "normalized": false,
762
+ "rstrip": false,
763
+ "single_word": false,
764
+ "special": true
765
+ },
766
+ "95": {
767
+ "content": "<mask_71>",
768
+ "lstrip": false,
769
+ "normalized": false,
770
+ "rstrip": false,
771
+ "single_word": false,
772
+ "special": true
773
+ },
774
+ "96": {
775
+ "content": "<mask_72>",
776
+ "lstrip": false,
777
+ "normalized": false,
778
+ "rstrip": false,
779
+ "single_word": false,
780
+ "special": true
781
+ },
782
+ "97": {
783
+ "content": "<mask_73>",
784
+ "lstrip": false,
785
+ "normalized": false,
786
+ "rstrip": false,
787
+ "single_word": false,
788
+ "special": true
789
+ },
790
+ "98": {
791
+ "content": "<mask_74>",
792
+ "lstrip": false,
793
+ "normalized": false,
794
+ "rstrip": false,
795
+ "single_word": false,
796
+ "special": true
797
+ },
798
+ "99": {
799
+ "content": "<mask_75>",
800
+ "lstrip": false,
801
+ "normalized": false,
802
+ "rstrip": false,
803
+ "single_word": false,
804
+ "special": true
805
+ },
806
+ "100": {
807
+ "content": "<mask_76>",
808
+ "lstrip": false,
809
+ "normalized": false,
810
+ "rstrip": false,
811
+ "single_word": false,
812
+ "special": true
813
+ },
814
+ "101": {
815
+ "content": "<mask_77>",
816
+ "lstrip": false,
817
+ "normalized": false,
818
+ "rstrip": false,
819
+ "single_word": false,
820
+ "special": true
821
+ },
822
+ "102": {
823
+ "content": "<mask_78>",
824
+ "lstrip": false,
825
+ "normalized": false,
826
+ "rstrip": false,
827
+ "single_word": false,
828
+ "special": true
829
+ },
830
+ "103": {
831
+ "content": "<mask_79>",
832
+ "lstrip": false,
833
+ "normalized": false,
834
+ "rstrip": false,
835
+ "single_word": false,
836
+ "special": true
837
+ },
838
+ "104": {
839
+ "content": "<mask_80>",
840
+ "lstrip": false,
841
+ "normalized": false,
842
+ "rstrip": false,
843
+ "single_word": false,
844
+ "special": true
845
+ },
846
+ "105": {
847
+ "content": "<mask_81>",
848
+ "lstrip": false,
849
+ "normalized": false,
850
+ "rstrip": false,
851
+ "single_word": false,
852
+ "special": true
853
+ },
854
+ "106": {
855
+ "content": "<mask_82>",
856
+ "lstrip": false,
857
+ "normalized": false,
858
+ "rstrip": false,
859
+ "single_word": false,
860
+ "special": true
861
+ },
862
+ "107": {
863
+ "content": "<mask_83>",
864
+ "lstrip": false,
865
+ "normalized": false,
866
+ "rstrip": false,
867
+ "single_word": false,
868
+ "special": true
869
+ },
870
+ "108": {
871
+ "content": "<mask_84>",
872
+ "lstrip": false,
873
+ "normalized": false,
874
+ "rstrip": false,
875
+ "single_word": false,
876
+ "special": true
877
+ },
878
+ "109": {
879
+ "content": "<mask_85>",
880
+ "lstrip": false,
881
+ "normalized": false,
882
+ "rstrip": false,
883
+ "single_word": false,
884
+ "special": true
885
+ },
886
+ "110": {
887
+ "content": "<mask_86>",
888
+ "lstrip": false,
889
+ "normalized": false,
890
+ "rstrip": false,
891
+ "single_word": false,
892
+ "special": true
893
+ },
894
+ "111": {
895
+ "content": "<mask_87>",
896
+ "lstrip": false,
897
+ "normalized": false,
898
+ "rstrip": false,
899
+ "single_word": false,
900
+ "special": true
901
+ },
902
+ "112": {
903
+ "content": "<mask_88>",
904
+ "lstrip": false,
905
+ "normalized": false,
906
+ "rstrip": false,
907
+ "single_word": false,
908
+ "special": true
909
+ },
910
+ "113": {
911
+ "content": "<mask_89>",
912
+ "lstrip": false,
913
+ "normalized": false,
914
+ "rstrip": false,
915
+ "single_word": false,
916
+ "special": true
917
+ },
918
+ "114": {
919
+ "content": "<mask_90>",
920
+ "lstrip": false,
921
+ "normalized": false,
922
+ "rstrip": false,
923
+ "single_word": false,
924
+ "special": true
925
+ },
926
+ "115": {
927
+ "content": "<mask_91>",
928
+ "lstrip": false,
929
+ "normalized": false,
930
+ "rstrip": false,
931
+ "single_word": false,
932
+ "special": true
933
+ },
934
+ "116": {
935
+ "content": "<mask_92>",
936
+ "lstrip": false,
937
+ "normalized": false,
938
+ "rstrip": false,
939
+ "single_word": false,
940
+ "special": true
941
+ },
942
+ "117": {
943
+ "content": "<mask_93>",
944
+ "lstrip": false,
945
+ "normalized": false,
946
+ "rstrip": false,
947
+ "single_word": false,
948
+ "special": true
949
+ },
950
+ "118": {
951
+ "content": "<mask_94>",
952
+ "lstrip": false,
953
+ "normalized": false,
954
+ "rstrip": false,
955
+ "single_word": false,
956
+ "special": true
957
+ },
958
+ "119": {
959
+ "content": "<mask_95>",
960
+ "lstrip": false,
961
+ "normalized": false,
962
+ "rstrip": false,
963
+ "single_word": false,
964
+ "special": true
965
+ },
966
+ "120": {
967
+ "content": "<mask_96>",
968
+ "lstrip": false,
969
+ "normalized": false,
970
+ "rstrip": false,
971
+ "single_word": false,
972
+ "special": true
973
+ },
974
+ "121": {
975
+ "content": "<mask_97>",
976
+ "lstrip": false,
977
+ "normalized": false,
978
+ "rstrip": false,
979
+ "single_word": false,
980
+ "special": true
981
+ },
982
+ "122": {
983
+ "content": "<mask_98>",
984
+ "lstrip": false,
985
+ "normalized": false,
986
+ "rstrip": false,
987
+ "single_word": false,
988
+ "special": true
989
+ },
990
+ "123": {
991
+ "content": "<mask_99>",
992
+ "lstrip": false,
993
+ "normalized": false,
994
+ "rstrip": false,
995
+ "single_word": false,
996
+ "special": true
997
+ },
998
+ "124": {
999
+ "content": "<mask_100>",
1000
+ "lstrip": false,
1001
+ "normalized": false,
1002
+ "rstrip": false,
1003
+ "single_word": false,
1004
+ "special": true
1005
+ },
1006
+ "125": {
1007
+ "content": "<mask_101>",
1008
+ "lstrip": false,
1009
+ "normalized": false,
1010
+ "rstrip": false,
1011
+ "single_word": false,
1012
+ "special": true
1013
+ },
1014
+ "126": {
1015
+ "content": "<mask_102>",
1016
+ "lstrip": false,
1017
+ "normalized": false,
1018
+ "rstrip": false,
1019
+ "single_word": false,
1020
+ "special": true
1021
+ },
1022
+ "127": {
1023
+ "content": "<mask_103>",
1024
+ "lstrip": false,
1025
+ "normalized": false,
1026
+ "rstrip": false,
1027
+ "single_word": false,
1028
+ "special": true
1029
+ },
1030
+ "128": {
1031
+ "content": "<mask_104>",
1032
+ "lstrip": false,
1033
+ "normalized": false,
1034
+ "rstrip": false,
1035
+ "single_word": false,
1036
+ "special": true
1037
+ },
1038
+ "129": {
1039
+ "content": "<mask_105>",
1040
+ "lstrip": false,
1041
+ "normalized": false,
1042
+ "rstrip": false,
1043
+ "single_word": false,
1044
+ "special": true
1045
+ },
1046
+ "130": {
1047
+ "content": "<mask_106>",
1048
+ "lstrip": false,
1049
+ "normalized": false,
1050
+ "rstrip": false,
1051
+ "single_word": false,
1052
+ "special": true
1053
+ },
1054
+ "131": {
1055
+ "content": "<mask_107>",
1056
+ "lstrip": false,
1057
+ "normalized": false,
1058
+ "rstrip": false,
1059
+ "single_word": false,
1060
+ "special": true
1061
+ },
1062
+ "132": {
1063
+ "content": "<mask_108>",
1064
+ "lstrip": false,
1065
+ "normalized": false,
1066
+ "rstrip": false,
1067
+ "single_word": false,
1068
+ "special": true
1069
+ },
1070
+ "133": {
1071
+ "content": "<mask_109>",
1072
+ "lstrip": false,
1073
+ "normalized": false,
1074
+ "rstrip": false,
1075
+ "single_word": false,
1076
+ "special": true
1077
+ },
1078
+ "134": {
1079
+ "content": "<mask_110>",
1080
+ "lstrip": false,
1081
+ "normalized": false,
1082
+ "rstrip": false,
1083
+ "single_word": false,
1084
+ "special": true
1085
+ },
1086
+ "135": {
1087
+ "content": "<mask_111>",
1088
+ "lstrip": false,
1089
+ "normalized": false,
1090
+ "rstrip": false,
1091
+ "single_word": false,
1092
+ "special": true
1093
+ },
1094
+ "136": {
1095
+ "content": "<mask_112>",
1096
+ "lstrip": false,
1097
+ "normalized": false,
1098
+ "rstrip": false,
1099
+ "single_word": false,
1100
+ "special": true
1101
+ },
1102
+ "137": {
1103
+ "content": "<mask_113>",
1104
+ "lstrip": false,
1105
+ "normalized": false,
1106
+ "rstrip": false,
1107
+ "single_word": false,
1108
+ "special": true
1109
+ },
1110
+ "138": {
1111
+ "content": "<mask_114>",
1112
+ "lstrip": false,
1113
+ "normalized": false,
1114
+ "rstrip": false,
1115
+ "single_word": false,
1116
+ "special": true
1117
+ },
1118
+ "139": {
1119
+ "content": "<mask_115>",
1120
+ "lstrip": false,
1121
+ "normalized": false,
1122
+ "rstrip": false,
1123
+ "single_word": false,
1124
+ "special": true
1125
+ },
1126
+ "140": {
1127
+ "content": "<mask_116>",
1128
+ "lstrip": false,
1129
+ "normalized": false,
1130
+ "rstrip": false,
1131
+ "single_word": false,
1132
+ "special": true
1133
+ },
1134
+ "141": {
1135
+ "content": "<mask_117>",
1136
+ "lstrip": false,
1137
+ "normalized": false,
1138
+ "rstrip": false,
1139
+ "single_word": false,
1140
+ "special": true
1141
+ },
1142
+ "142": {
1143
+ "content": "<mask_118>",
1144
+ "lstrip": false,
1145
+ "normalized": false,
1146
+ "rstrip": false,
1147
+ "single_word": false,
1148
+ "special": true
1149
+ },
1150
+ "143": {
1151
+ "content": "<mask_119>",
1152
+ "lstrip": false,
1153
+ "normalized": false,
1154
+ "rstrip": false,
1155
+ "single_word": false,
1156
+ "special": true
1157
+ },
1158
+ "144": {
1159
+ "content": "<mask_120>",
1160
+ "lstrip": false,
1161
+ "normalized": false,
1162
+ "rstrip": false,
1163
+ "single_word": false,
1164
+ "special": true
1165
+ },
1166
+ "145": {
1167
+ "content": "<mask_121>",
1168
+ "lstrip": false,
1169
+ "normalized": false,
1170
+ "rstrip": false,
1171
+ "single_word": false,
1172
+ "special": true
1173
+ },
1174
+ "146": {
1175
+ "content": "<mask_122>",
1176
+ "lstrip": false,
1177
+ "normalized": false,
1178
+ "rstrip": false,
1179
+ "single_word": false,
1180
+ "special": true
1181
+ },
1182
+ "147": {
1183
+ "content": "<mask_123>",
1184
+ "lstrip": false,
1185
+ "normalized": false,
1186
+ "rstrip": false,
1187
+ "single_word": false,
1188
+ "special": true
1189
+ },
1190
+ "148": {
1191
+ "content": "<mask_124>",
1192
+ "lstrip": false,
1193
+ "normalized": false,
1194
+ "rstrip": false,
1195
+ "single_word": false,
1196
+ "special": true
1197
+ },
1198
+ "149": {
1199
+ "content": "<mask_125>",
1200
+ "lstrip": false,
1201
+ "normalized": false,
1202
+ "rstrip": false,
1203
+ "single_word": false,
1204
+ "special": true
1205
+ },
1206
+ "150": {
1207
+ "content": "<mask_126>",
1208
+ "lstrip": false,
1209
+ "normalized": false,
1210
+ "rstrip": false,
1211
+ "single_word": false,
1212
+ "special": true
1213
+ },
1214
+ "151": {
1215
+ "content": "<mask_127>",
1216
+ "lstrip": false,
1217
+ "normalized": false,
1218
+ "rstrip": false,
1219
+ "single_word": false,
1220
+ "special": true
1221
+ },
1222
+ "152": {
1223
+ "content": "<mask_128>",
1224
+ "lstrip": false,
1225
+ "normalized": false,
1226
+ "rstrip": false,
1227
+ "single_word": false,
1228
+ "special": true
1229
+ },
1230
+ "153": {
1231
+ "content": "<mask_129>",
1232
+ "lstrip": false,
1233
+ "normalized": false,
1234
+ "rstrip": false,
1235
+ "single_word": false,
1236
+ "special": true
1237
+ },
1238
+ "154": {
1239
+ "content": "<mask_130>",
1240
+ "lstrip": false,
1241
+ "normalized": false,
1242
+ "rstrip": false,
1243
+ "single_word": false,
1244
+ "special": true
1245
+ },
1246
+ "155": {
1247
+ "content": "<mask_131>",
1248
+ "lstrip": false,
1249
+ "normalized": false,
1250
+ "rstrip": false,
1251
+ "single_word": false,
1252
+ "special": true
1253
+ },
1254
+ "156": {
1255
+ "content": "<mask_132>",
1256
+ "lstrip": false,
1257
+ "normalized": false,
1258
+ "rstrip": false,
1259
+ "single_word": false,
1260
+ "special": true
1261
+ },
1262
+ "157": {
1263
+ "content": "<mask_133>",
1264
+ "lstrip": false,
1265
+ "normalized": false,
1266
+ "rstrip": false,
1267
+ "single_word": false,
1268
+ "special": true
1269
+ },
1270
+ "158": {
1271
+ "content": "<mask_134>",
1272
+ "lstrip": false,
1273
+ "normalized": false,
1274
+ "rstrip": false,
1275
+ "single_word": false,
1276
+ "special": true
1277
+ },
1278
+ "159": {
1279
+ "content": "<mask_135>",
1280
+ "lstrip": false,
1281
+ "normalized": false,
1282
+ "rstrip": false,
1283
+ "single_word": false,
1284
+ "special": true
1285
+ },
1286
+ "160": {
1287
+ "content": "<mask_136>",
1288
+ "lstrip": false,
1289
+ "normalized": false,
1290
+ "rstrip": false,
1291
+ "single_word": false,
1292
+ "special": true
1293
+ },
1294
+ "161": {
1295
+ "content": "<mask_137>",
1296
+ "lstrip": false,
1297
+ "normalized": false,
1298
+ "rstrip": false,
1299
+ "single_word": false,
1300
+ "special": true
1301
+ },
1302
+ "162": {
1303
+ "content": "<mask_138>",
1304
+ "lstrip": false,
1305
+ "normalized": false,
1306
+ "rstrip": false,
1307
+ "single_word": false,
1308
+ "special": true
1309
+ },
1310
+ "163": {
1311
+ "content": "<mask_139>",
1312
+ "lstrip": false,
1313
+ "normalized": false,
1314
+ "rstrip": false,
1315
+ "single_word": false,
1316
+ "special": true
1317
+ },
1318
+ "164": {
1319
+ "content": "<mask_140>",
1320
+ "lstrip": false,
1321
+ "normalized": false,
1322
+ "rstrip": false,
1323
+ "single_word": false,
1324
+ "special": true
1325
+ },
1326
+ "165": {
1327
+ "content": "<mask_141>",
1328
+ "lstrip": false,
1329
+ "normalized": false,
1330
+ "rstrip": false,
1331
+ "single_word": false,
1332
+ "special": true
1333
+ },
1334
+ "166": {
1335
+ "content": "<mask_142>",
1336
+ "lstrip": false,
1337
+ "normalized": false,
1338
+ "rstrip": false,
1339
+ "single_word": false,
1340
+ "special": true
1341
+ },
1342
+ "167": {
1343
+ "content": "<mask_143>",
1344
+ "lstrip": false,
1345
+ "normalized": false,
1346
+ "rstrip": false,
1347
+ "single_word": false,
1348
+ "special": true
1349
+ },
1350
+ "168": {
1351
+ "content": "<mask_144>",
1352
+ "lstrip": false,
1353
+ "normalized": false,
1354
+ "rstrip": false,
1355
+ "single_word": false,
1356
+ "special": true
1357
+ },
1358
+ "169": {
1359
+ "content": "<mask_145>",
1360
+ "lstrip": false,
1361
+ "normalized": false,
1362
+ "rstrip": false,
1363
+ "single_word": false,
1364
+ "special": true
1365
+ },
1366
+ "170": {
1367
+ "content": "<mask_146>",
1368
+ "lstrip": false,
1369
+ "normalized": false,
1370
+ "rstrip": false,
1371
+ "single_word": false,
1372
+ "special": true
1373
+ },
1374
+ "171": {
1375
+ "content": "<mask_147>",
1376
+ "lstrip": false,
1377
+ "normalized": false,
1378
+ "rstrip": false,
1379
+ "single_word": false,
1380
+ "special": true
1381
+ },
1382
+ "172": {
1383
+ "content": "<mask_148>",
1384
+ "lstrip": false,
1385
+ "normalized": false,
1386
+ "rstrip": false,
1387
+ "single_word": false,
1388
+ "special": true
1389
+ },
1390
+ "173": {
1391
+ "content": "<mask_149>",
1392
+ "lstrip": false,
1393
+ "normalized": false,
1394
+ "rstrip": false,
1395
+ "single_word": false,
1396
+ "special": true
1397
+ },
1398
+ "174": {
1399
+ "content": "<mask_150>",
1400
+ "lstrip": false,
1401
+ "normalized": false,
1402
+ "rstrip": false,
1403
+ "single_word": false,
1404
+ "special": true
1405
+ },
1406
+ "175": {
1407
+ "content": "<mask_151>",
1408
+ "lstrip": false,
1409
+ "normalized": false,
1410
+ "rstrip": false,
1411
+ "single_word": false,
1412
+ "special": true
1413
+ },
1414
+ "176": {
1415
+ "content": "<mask_152>",
1416
+ "lstrip": false,
1417
+ "normalized": false,
1418
+ "rstrip": false,
1419
+ "single_word": false,
1420
+ "special": true
1421
+ },
1422
+ "177": {
1423
+ "content": "<mask_153>",
1424
+ "lstrip": false,
1425
+ "normalized": false,
1426
+ "rstrip": false,
1427
+ "single_word": false,
1428
+ "special": true
1429
+ },
1430
+ "178": {
1431
+ "content": "<mask_154>",
1432
+ "lstrip": false,
1433
+ "normalized": false,
1434
+ "rstrip": false,
1435
+ "single_word": false,
1436
+ "special": true
1437
+ },
1438
+ "179": {
1439
+ "content": "<mask_155>",
1440
+ "lstrip": false,
1441
+ "normalized": false,
1442
+ "rstrip": false,
1443
+ "single_word": false,
1444
+ "special": true
1445
+ },
1446
+ "180": {
1447
+ "content": "<mask_156>",
1448
+ "lstrip": false,
1449
+ "normalized": false,
1450
+ "rstrip": false,
1451
+ "single_word": false,
1452
+ "special": true
1453
+ },
1454
+ "181": {
1455
+ "content": "<mask_157>",
1456
+ "lstrip": false,
1457
+ "normalized": false,
1458
+ "rstrip": false,
1459
+ "single_word": false,
1460
+ "special": true
1461
+ },
1462
+ "182": {
1463
+ "content": "<mask_158>",
1464
+ "lstrip": false,
1465
+ "normalized": false,
1466
+ "rstrip": false,
1467
+ "single_word": false,
1468
+ "special": true
1469
+ },
1470
+ "183": {
1471
+ "content": "<mask_159>",
1472
+ "lstrip": false,
1473
+ "normalized": false,
1474
+ "rstrip": false,
1475
+ "single_word": false,
1476
+ "special": true
1477
+ },
1478
+ "184": {
1479
+ "content": "<mask_160>",
1480
+ "lstrip": false,
1481
+ "normalized": false,
1482
+ "rstrip": false,
1483
+ "single_word": false,
1484
+ "special": true
1485
+ },
1486
+ "185": {
1487
+ "content": "<mask_161>",
1488
+ "lstrip": false,
1489
+ "normalized": false,
1490
+ "rstrip": false,
1491
+ "single_word": false,
1492
+ "special": true
1493
+ },
1494
+ "186": {
1495
+ "content": "<mask_162>",
1496
+ "lstrip": false,
1497
+ "normalized": false,
1498
+ "rstrip": false,
1499
+ "single_word": false,
1500
+ "special": true
1501
+ },
1502
+ "187": {
1503
+ "content": "<mask_163>",
1504
+ "lstrip": false,
1505
+ "normalized": false,
1506
+ "rstrip": false,
1507
+ "single_word": false,
1508
+ "special": true
1509
+ },
1510
+ "188": {
1511
+ "content": "<mask_164>",
1512
+ "lstrip": false,
1513
+ "normalized": false,
1514
+ "rstrip": false,
1515
+ "single_word": false,
1516
+ "special": true
1517
+ },
1518
+ "189": {
1519
+ "content": "<mask_165>",
1520
+ "lstrip": false,
1521
+ "normalized": false,
1522
+ "rstrip": false,
1523
+ "single_word": false,
1524
+ "special": true
1525
+ },
1526
+ "190": {
1527
+ "content": "<mask_166>",
1528
+ "lstrip": false,
1529
+ "normalized": false,
1530
+ "rstrip": false,
1531
+ "single_word": false,
1532
+ "special": true
1533
+ },
1534
+ "191": {
1535
+ "content": "<mask_167>",
1536
+ "lstrip": false,
1537
+ "normalized": false,
1538
+ "rstrip": false,
1539
+ "single_word": false,
1540
+ "special": true
1541
+ },
1542
+ "192": {
1543
+ "content": "<mask_168>",
1544
+ "lstrip": false,
1545
+ "normalized": false,
1546
+ "rstrip": false,
1547
+ "single_word": false,
1548
+ "special": true
1549
+ },
1550
+ "193": {
1551
+ "content": "<mask_169>",
1552
+ "lstrip": false,
1553
+ "normalized": false,
1554
+ "rstrip": false,
1555
+ "single_word": false,
1556
+ "special": true
1557
+ },
1558
+ "194": {
1559
+ "content": "<mask_170>",
1560
+ "lstrip": false,
1561
+ "normalized": false,
1562
+ "rstrip": false,
1563
+ "single_word": false,
1564
+ "special": true
1565
+ },
1566
+ "195": {
1567
+ "content": "<mask_171>",
1568
+ "lstrip": false,
1569
+ "normalized": false,
1570
+ "rstrip": false,
1571
+ "single_word": false,
1572
+ "special": true
1573
+ },
1574
+ "196": {
1575
+ "content": "<mask_172>",
1576
+ "lstrip": false,
1577
+ "normalized": false,
1578
+ "rstrip": false,
1579
+ "single_word": false,
1580
+ "special": true
1581
+ },
1582
+ "197": {
1583
+ "content": "<mask_173>",
1584
+ "lstrip": false,
1585
+ "normalized": false,
1586
+ "rstrip": false,
1587
+ "single_word": false,
1588
+ "special": true
1589
+ },
1590
+ "198": {
1591
+ "content": "<mask_174>",
1592
+ "lstrip": false,
1593
+ "normalized": false,
1594
+ "rstrip": false,
1595
+ "single_word": false,
1596
+ "special": true
1597
+ },
1598
+ "199": {
1599
+ "content": "<mask_175>",
1600
+ "lstrip": false,
1601
+ "normalized": false,
1602
+ "rstrip": false,
1603
+ "single_word": false,
1604
+ "special": true
1605
+ },
1606
+ "200": {
1607
+ "content": "<mask_176>",
1608
+ "lstrip": false,
1609
+ "normalized": false,
1610
+ "rstrip": false,
1611
+ "single_word": false,
1612
+ "special": true
1613
+ },
1614
+ "201": {
1615
+ "content": "<mask_177>",
1616
+ "lstrip": false,
1617
+ "normalized": false,
1618
+ "rstrip": false,
1619
+ "single_word": false,
1620
+ "special": true
1621
+ },
1622
+ "202": {
1623
+ "content": "<mask_178>",
1624
+ "lstrip": false,
1625
+ "normalized": false,
1626
+ "rstrip": false,
1627
+ "single_word": false,
1628
+ "special": true
1629
+ },
1630
+ "203": {
1631
+ "content": "<mask_179>",
1632
+ "lstrip": false,
1633
+ "normalized": false,
1634
+ "rstrip": false,
1635
+ "single_word": false,
1636
+ "special": true
1637
+ },
1638
+ "204": {
1639
+ "content": "<mask_180>",
1640
+ "lstrip": false,
1641
+ "normalized": false,
1642
+ "rstrip": false,
1643
+ "single_word": false,
1644
+ "special": true
1645
+ },
1646
+ "205": {
1647
+ "content": "<mask_181>",
1648
+ "lstrip": false,
1649
+ "normalized": false,
1650
+ "rstrip": false,
1651
+ "single_word": false,
1652
+ "special": true
1653
+ },
1654
+ "206": {
1655
+ "content": "<mask_182>",
1656
+ "lstrip": false,
1657
+ "normalized": false,
1658
+ "rstrip": false,
1659
+ "single_word": false,
1660
+ "special": true
1661
+ },
1662
+ "207": {
1663
+ "content": "<mask_183>",
1664
+ "lstrip": false,
1665
+ "normalized": false,
1666
+ "rstrip": false,
1667
+ "single_word": false,
1668
+ "special": true
1669
+ },
1670
+ "208": {
1671
+ "content": "<mask_184>",
1672
+ "lstrip": false,
1673
+ "normalized": false,
1674
+ "rstrip": false,
1675
+ "single_word": false,
1676
+ "special": true
1677
+ },
1678
+ "209": {
1679
+ "content": "<mask_185>",
1680
+ "lstrip": false,
1681
+ "normalized": false,
1682
+ "rstrip": false,
1683
+ "single_word": false,
1684
+ "special": true
1685
+ },
1686
+ "210": {
1687
+ "content": "<mask_186>",
1688
+ "lstrip": false,
1689
+ "normalized": false,
1690
+ "rstrip": false,
1691
+ "single_word": false,
1692
+ "special": true
1693
+ },
1694
+ "211": {
1695
+ "content": "<mask_187>",
1696
+ "lstrip": false,
1697
+ "normalized": false,
1698
+ "rstrip": false,
1699
+ "single_word": false,
1700
+ "special": true
1701
+ },
1702
+ "212": {
1703
+ "content": "<mask_188>",
1704
+ "lstrip": false,
1705
+ "normalized": false,
1706
+ "rstrip": false,
1707
+ "single_word": false,
1708
+ "special": true
1709
+ },
1710
+ "213": {
1711
+ "content": "<mask_189>",
1712
+ "lstrip": false,
1713
+ "normalized": false,
1714
+ "rstrip": false,
1715
+ "single_word": false,
1716
+ "special": true
1717
+ },
1718
+ "214": {
1719
+ "content": "<mask_190>",
1720
+ "lstrip": false,
1721
+ "normalized": false,
1722
+ "rstrip": false,
1723
+ "single_word": false,
1724
+ "special": true
1725
+ },
1726
+ "215": {
1727
+ "content": "<mask_191>",
1728
+ "lstrip": false,
1729
+ "normalized": false,
1730
+ "rstrip": false,
1731
+ "single_word": false,
1732
+ "special": true
1733
+ },
1734
+ "216": {
1735
+ "content": "<mask_192>",
1736
+ "lstrip": false,
1737
+ "normalized": false,
1738
+ "rstrip": false,
1739
+ "single_word": false,
1740
+ "special": true
1741
+ },
1742
+ "217": {
1743
+ "content": "<mask_193>",
1744
+ "lstrip": false,
1745
+ "normalized": false,
1746
+ "rstrip": false,
1747
+ "single_word": false,
1748
+ "special": true
1749
+ },
1750
+ "218": {
1751
+ "content": "<mask_194>",
1752
+ "lstrip": false,
1753
+ "normalized": false,
1754
+ "rstrip": false,
1755
+ "single_word": false,
1756
+ "special": true
1757
+ },
1758
+ "219": {
1759
+ "content": "<mask_195>",
1760
+ "lstrip": false,
1761
+ "normalized": false,
1762
+ "rstrip": false,
1763
+ "single_word": false,
1764
+ "special": true
1765
+ },
1766
+ "220": {
1767
+ "content": "<mask_196>",
1768
+ "lstrip": false,
1769
+ "normalized": false,
1770
+ "rstrip": false,
1771
+ "single_word": false,
1772
+ "special": true
1773
+ },
1774
+ "221": {
1775
+ "content": "<mask_197>",
1776
+ "lstrip": false,
1777
+ "normalized": false,
1778
+ "rstrip": false,
1779
+ "single_word": false,
1780
+ "special": true
1781
+ },
1782
+ "222": {
1783
+ "content": "<mask_198>",
1784
+ "lstrip": false,
1785
+ "normalized": false,
1786
+ "rstrip": false,
1787
+ "single_word": false,
1788
+ "special": true
1789
+ },
1790
+ "223": {
1791
+ "content": "<mask_199>",
1792
+ "lstrip": false,
1793
+ "normalized": false,
1794
+ "rstrip": false,
1795
+ "single_word": false,
1796
+ "special": true
1797
+ }
1798
+ },
1799
+ "bos_token": "<longcat_s>",
1800
+ "clean_up_tokenization_spaces": false,
1801
+ "eos_token": "</longcat_s>",
1802
+ "extra_special_tokens": {},
1803
+ "merges_file": null,
1804
+ "model_max_length": 131072,
1805
+ "pad_token": "<longcat_pad>",
1806
+ "sp_model_kwargs": {},
1807
+ "tokenizer_class": "BloomTokenizer",
1808
+ "unk_token": "<longcat_unk>",
1809
+ "vocab_file": null
1810
+ }