kernelpool commited on
Commit
8be665a
·
verified ·
1 Parent(s): 296b643

Add files using upload-large-folder tool

Browse files
README.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: iquestcoder
4
+ license_link: https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Instruct/blob/main/LICENSE
5
+ language:
6
+ - en
7
+ library_name: mlx
8
+ pipeline_tag: text-generation
9
+ tags:
10
+ - mlx
11
+ base_model: IQuestLab/IQuest-Coder-V1-40B-Instruct
12
+ ---
13
+
14
+ # mlx-community/IQuest-Coder-V1-40B-Instruct-8bit
15
+
16
+ This model [mlx-community/IQuest-Coder-V1-40B-Instruct-8bit](https://huggingface.co/mlx-community/IQuest-Coder-V1-40B-Instruct-8bit) was
17
+ converted to MLX format from [IQuestLab/IQuest-Coder-V1-40B-Instruct](https://huggingface.co/IQuestLab/IQuest-Coder-V1-40B-Instruct)
18
+ using mlx-lm version **0.30.0**.
19
+
20
+ ## Use with mlx
21
+
22
+ ```bash
23
+ pip install mlx-lm
24
+ ```
25
+
26
+ ```python
27
+ from mlx_lm import load, generate
28
+
29
+ model, tokenizer = load("mlx-community/IQuest-Coder-V1-40B-Instruct-8bit")
30
+
31
+ prompt = "hello"
32
+
33
+ if tokenizer.chat_template is not None:
34
+ messages = [{"role": "user", "content": prompt}]
35
+ prompt = tokenizer.apply_chat_template(
36
+ messages, add_generation_prompt=True, return_dict=False,
37
+ )
38
+
39
+ response = generate(model, tokenizer, prompt=prompt, verbose=True)
40
+ ```
__init__.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """IQuestCoder model package."""
2
+
3
+ from .configuration_iquestcoder import IQuestCoderConfig
4
+ from .modeling_iquestcoder import (
5
+ IQuestCoderPreTrainedModel,
6
+ IQuestCoderModel,
7
+ IQuestCoderForCausalLM,
8
+ IQuestCoderForSequenceClassification,
9
+ IQuestCoderForTokenClassification,
10
+ IQuestCoderForQuestionAnswering,
11
+ )
12
+ from .tokenization_iquestcoder import IQuestCoderTokenizer
13
+
14
+ try:
15
+ from .tokenization_iquestcoder import IQuestCoderTokenizerFast
16
+ except ImportError:
17
+ IQuestCoderTokenizerFast = None
18
+
19
+ __all__ = [
20
+ "IQuestCoderConfig",
21
+ "IQuestCoderPreTrainedModel",
22
+ "IQuestCoderModel",
23
+ "IQuestCoderForCausalLM",
24
+ "IQuestCoderForSequenceClassification",
25
+ "IQuestCoderForTokenClassification",
26
+ "IQuestCoderForQuestionAnswering",
27
+ "IQuestCoderTokenizer",
28
+ "IQuestCoderTokenizerFast",
29
+ ]
30
+
added_tokens.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 75873,
3
+ "</tool_call>": 75877,
4
+ "</tool_response>": 75879,
5
+ "</tools>": 75875,
6
+ "<CLS>": 75858,
7
+ "<EOD>": 75860,
8
+ "<MASK>": 75861,
9
+ "<PAD>": 75862,
10
+ "<SEP>": 75859,
11
+ "<think>": 75872,
12
+ "<tool_call>": 75876,
13
+ "<tool_response>": 75878,
14
+ "<tools>": 75874,
15
+ "<|CLS|>": 75880,
16
+ "<|EOD|>": 75882,
17
+ "<|MASK|>": 75883,
18
+ "<|PAD|>": 75884,
19
+ "<|SEP|>": 75881,
20
+ "<|endoftext|>": 75869,
21
+ "<|file_sep|>": 75871,
22
+ "<|fim_middle|>": 75866,
23
+ "<|fim_pad|>": 75868,
24
+ "<|fim_prefix|>": 75865,
25
+ "<|fim_suffix|>": 75867,
26
+ "<|im_end|>": 75864,
27
+ "<|im_start|>": 75863,
28
+ "<|repo_name|>": 75870
29
+ }
chat_template.jinja ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- else %}
6
+ {{- 'You are LoopCoder, a helpful assistant developed by IQuest.' }}
7
+ {%- endif %}
8
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0].role == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are LoopCoder, a helpful assistant developed by IQuest.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
22
+ {%- for message in messages[::-1] %}
23
+ {%- set index = (messages|length - 1) - loop.index0 %}
24
+ {%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
25
+ {%- set ns.multi_step_tool = false %}
26
+ {%- set ns.last_query_index = index %}
27
+ {%- endif %}
28
+ {%- endfor %}
29
+ {%- for message in messages %}
30
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
31
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
32
+ {%- elif message.role == "assistant" %}
33
+ {%- set content = message.content %}
34
+ {{- '<|im_start|>' + message.role + '\n' + content }}
35
+ {%- if message.tool_calls %}
36
+ {%- for tool_call in message.tool_calls %}
37
+ {%- if (loop.first and content) or (not loop.first) %}
38
+ {{- '\n' }}
39
+ {%- endif %}
40
+ {%- if tool_call.function %}
41
+ {%- set tool_call = tool_call.function %}
42
+ {%- endif %}
43
+ {{- '<tool_call>\n{"name": "' }}
44
+ {{- tool_call.name }}
45
+ {{- '", "arguments": ' }}
46
+ {%- if tool_call.arguments is string %}
47
+ {{- tool_call.arguments }}
48
+ {%- else %}
49
+ {{- tool_call.arguments | tojson }}
50
+ {%- endif %}
51
+ {{- '}\n</tool_call>' }}
52
+ {%- endfor %}
53
+ {%- endif %}
54
+ {{- '<|im_end|>\n' }}
55
+ {%- elif message.role == "tool" %}
56
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
57
+ {{- '<|im_start|>user' }}
58
+ {%- endif %}
59
+ {{- '\n<tool_response>\n' }}
60
+ {{- message.content }}
61
+ {{- '\n</tool_response>' }}
62
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
63
+ {{- '<|im_end|>\n' }}
64
+ {%- endif %}
65
+ {%- endif %}
66
+ {%- endfor %}
67
+ {%- if add_generation_prompt %}
68
+ {{- '<|im_start|>assistant\n' }}
69
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "IQuestCoderForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "configuration_iquestcoder.IQuestCoderConfig",
9
+ "AutoModel": "modeling_iquestcoder.IQuestCoderModel",
10
+ "AutoModelForCausalLM": "modeling_iquestcoder.IQuestCoderForCausalLM",
11
+ "AutoModelForSequenceClassification": "modeling_iquestcoder.IQuestCoderForSequenceClassification",
12
+ "AutoModelForTokenClassification": "modeling_iquestcoder.IQuestCoderForTokenClassification",
13
+ "AutoModelForQuestionAnswering": "modeling_iquestcoder.IQuestCoderForQuestionAnswering"
14
+ },
15
+ "bos_token_id": 1,
16
+ "clip_qkv": null,
17
+ "eos_token_id": [
18
+ 2,
19
+ 75864,
20
+ 75869
21
+ ],
22
+ "head_dim": 128,
23
+ "hidden_act": "silu",
24
+ "hidden_size": 5120,
25
+ "initializer_range": 0.02,
26
+ "intermediate_size": 27648,
27
+ "max_position_embeddings": 131072,
28
+ "max_window_layers": 0,
29
+ "mlp_bias": false,
30
+ "model_type": "iquestcoder",
31
+ "num_attention_heads": 40,
32
+ "num_hidden_layers": 80,
33
+ "num_key_value_heads": 8,
34
+ "pretraining_tp": 1,
35
+ "quantization": {
36
+ "group_size": 64,
37
+ "bits": 8,
38
+ "mode": "affine"
39
+ },
40
+ "quantization_config": {
41
+ "group_size": 64,
42
+ "bits": 8,
43
+ "mode": "affine"
44
+ },
45
+ "rms_norm_eps": 1e-05,
46
+ "rope_scaling": null,
47
+ "rope_theta": 500000.0,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": false,
50
+ "torch_dtype": "bfloat16",
51
+ "transformers_version": "4.55.4",
52
+ "use_cache": true,
53
+ "use_sliding_window": false,
54
+ "vocab_size": 76800
55
+ }
configuration_iquestcoder.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """IQuestCoder model configuration."""
2
+
3
+ from transformers.configuration_utils import PretrainedConfig
4
+ from transformers.utils import logging
5
+
6
+
7
+ logger = logging.get_logger(__name__)
8
+
9
+
10
+ class IQuestCoderConfig(PretrainedConfig):
11
+ r"""
12
+ This is the configuration class to store the configuration of a [`IQuestCoderModel`]. It is used to instantiate
13
+ an IQuestCoder model according to the specified arguments, defining the model architecture.
14
+
15
+ Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
16
+ documentation from [`PretrainedConfig`] for more information.
17
+
18
+ Args:
19
+ vocab_size (`int`, *optional*, defaults to 76800):
20
+ Vocabulary size of the IQuestCoder model. Defines the number of different tokens that can be represented
21
+ by the `inputs_ids` passed when calling [`IQuestCoderModel`].
22
+ hidden_size (`int`, *optional*, defaults to 5120):
23
+ Dimension of the hidden representations.
24
+ intermediate_size (`int`, *optional*, defaults to 27648):
25
+ Dimension of the MLP representations.
26
+ num_hidden_layers (`int`, *optional*, defaults to 80):
27
+ Number of hidden layers in the Transformer decoder.
28
+ num_attention_heads (`int`, *optional*, defaults to 40):
29
+ Number of attention heads for each attention layer in the Transformer decoder.
30
+ num_key_value_heads (`int`, *optional*, defaults to 8):
31
+ This is the number of key_value heads that should be used to implement Grouped Query Attention (GQA).
32
+ If `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA).
33
+ If `num_key_value_heads=1`, the model will use Multi Query Attention (MQA).
34
+ head_dim (`int`, *optional*, defaults to 128):
35
+ The dimension of each attention head. If not specified, defaults to `hidden_size // num_attention_heads`.
36
+ hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
37
+ The non-linear activation function (function or string) in the decoder.
38
+ max_position_embeddings (`int`, *optional*, defaults to 16384):
39
+ The maximum sequence length that this model might ever be used with.
40
+ initializer_range (`float`, *optional*, defaults to 0.02):
41
+ The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
42
+ rms_norm_eps (`float`, *optional*, defaults to 1e-05):
43
+ The epsilon used by the rms normalization layers.
44
+ use_cache (`bool`, *optional*, defaults to `True`):
45
+ Whether or not the model should return the last key/values attentions (not used by all models).
46
+ pad_token_id (`int`, *optional*):
47
+ Padding token id.
48
+ bos_token_id (`int`, *optional*, defaults to 1):
49
+ Beginning of stream token id.
50
+ eos_token_id (`int`, *optional*, defaults to 2):
51
+ End of stream token id.
52
+ tie_word_embeddings (`bool`, *optional*, defaults to `False`):
53
+ Whether to tie weight embeddings.
54
+ rope_theta (`float`, *optional*, defaults to 500000.0):
55
+ The base period of the RoPE embeddings.
56
+ rope_scaling (`Dict`, *optional*):
57
+ Dictionary containing the scaling configuration for the RoPE embeddings. Supports various RoPE scaling
58
+ types including "linear", "dynamic", "yarn", "longrope", etc.
59
+ attention_bias (`bool`, *optional*, defaults to `False`):
60
+ Whether to use a bias in the query, key, value and output projection layers during self-attention.
61
+ attention_dropout (`float`, *optional*, defaults to 0.0):
62
+ The dropout ratio for the attention probabilities.
63
+ mlp_bias (`bool`, *optional*, defaults to `False`):
64
+ Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.
65
+ clip_qkv (`float`, *optional*):
66
+ If set, clip the query, key, and value tensors to this value. Borrowed from OLMo for training stability.
67
+ use_sliding_window (`bool`, *optional*, defaults to `False`):
68
+ Whether to use sliding window attention. Borrowed from Qwen2.
69
+ sliding_window (`int`, *optional*):
70
+ The sliding window size. Only effective when `use_sliding_window=True`.
71
+ max_window_layers (`int`, *optional*, defaults to 0):
72
+ The number of layers that don't use sliding window attention. Borrowed from Qwen2.
73
+
74
+ Example:
75
+ ```python
76
+ >>> from configuration_iquestcoder import IQuestCoderConfig
77
+ >>> from modeling_iquestcoder import IQuestCoderModel
78
+
79
+ >>> # Initializing a IQuestCoder configuration
80
+ >>> configuration = IQuestCoderConfig()
81
+
82
+ >>> # Initializing a model from the configuration
83
+ >>> model = IQuestCoderModel(configuration)
84
+
85
+ >>> # Accessing the model configuration
86
+ >>> configuration = model.config
87
+ ```
88
+ """
89
+
90
+ model_type = "iquestcoder"
91
+ keys_to_ignore_at_inference = ["past_key_values"]
92
+
93
+ def __init__(
94
+ self,
95
+ vocab_size=76800,
96
+ hidden_size=5120,
97
+ intermediate_size=27648,
98
+ num_hidden_layers=80,
99
+ num_attention_heads=40,
100
+ num_key_value_heads=8,
101
+ head_dim=128,
102
+ hidden_act="silu",
103
+ max_position_embeddings=16384,
104
+ initializer_range=0.02,
105
+ rms_norm_eps=1e-5,
106
+ use_cache=True,
107
+ pad_token_id=None,
108
+ bos_token_id=1,
109
+ eos_token_id=2,
110
+ tie_word_embeddings=False,
111
+ rope_theta=500000.0,
112
+ rope_scaling=None,
113
+ attention_bias=False,
114
+ attention_dropout=0.0,
115
+ mlp_bias=False,
116
+ # IQuestCoder specific (borrowed from OLMo)
117
+ clip_qkv=None,
118
+ # IQuestCoder specific (borrowed from Qwen2)
119
+ use_sliding_window=False,
120
+ sliding_window=None,
121
+ max_window_layers=0,
122
+ **kwargs,
123
+ ):
124
+ self.vocab_size = vocab_size
125
+ self.max_position_embeddings = max_position_embeddings
126
+ self.hidden_size = hidden_size
127
+ self.intermediate_size = intermediate_size
128
+ self.num_hidden_layers = num_hidden_layers
129
+ self.num_attention_heads = num_attention_heads
130
+ self.num_key_value_heads = num_key_value_heads
131
+ self.head_dim = head_dim
132
+ self.hidden_act = hidden_act
133
+ self.initializer_range = initializer_range
134
+ self.rms_norm_eps = rms_norm_eps
135
+ self.use_cache = use_cache
136
+ self.rope_theta = rope_theta
137
+ self.rope_scaling = rope_scaling
138
+ self.attention_bias = attention_bias
139
+ self.attention_dropout = attention_dropout
140
+ self.mlp_bias = mlp_bias
141
+ # IQuestCoder specific
142
+ self.clip_qkv = clip_qkv
143
+ self.use_sliding_window = use_sliding_window
144
+ self.sliding_window = sliding_window
145
+ self.max_window_layers = max_window_layers
146
+
147
+ # Validate rope_scaling configuration
148
+ self._rope_scaling_validation()
149
+
150
+ super().__init__(
151
+ pad_token_id=pad_token_id,
152
+ bos_token_id=bos_token_id,
153
+ eos_token_id=eos_token_id,
154
+ tie_word_embeddings=tie_word_embeddings,
155
+ **kwargs,
156
+ )
157
+
158
+ def _rope_scaling_validation(self):
159
+ """Validate the `rope_scaling` configuration."""
160
+ if self.rope_scaling is None:
161
+ return
162
+
163
+ if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) < 1:
164
+ raise ValueError(
165
+ "`rope_scaling` must be a dictionary with a minimum of one field, `type` or `rope_type`."
166
+ )
167
+
168
+ rope_scaling_type = self.rope_scaling.get("type", None) or self.rope_scaling.get("rope_type", None)
169
+ if rope_scaling_type is None:
170
+ raise ValueError(
171
+ "`rope_scaling` must have a `type` or `rope_type` field."
172
+ )
173
+
174
+ valid_rope_types = ["linear", "dynamic", "yarn", "longrope", "llama3"]
175
+ if rope_scaling_type not in valid_rope_types:
176
+ raise ValueError(
177
+ f"`rope_scaling`'s type field must be one of {valid_rope_types}, got {rope_scaling_type}"
178
+ )
179
+
180
+
181
+ __all__ = ["IQuestCoderConfig"]
182
+
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [2, 75864, 75869],
5
+ "transformers_version": "4.55.4"
6
+ }
model-00001-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfdc0a6d6c6c42c1d0cf32f28108b582c25716b5214fa38ccb4a7fea99074e4e
3
+ size 5297812520
model-00002-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3632dd49583033e84a6d1d45856eec3fbf627dceb1bcec5e3c0a85050838891c
3
+ size 5331257520
model-00003-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:225d3131c7a9bee9789857535db93fa6493d1fbc1ebe1a3bc81abcce54336baf
3
+ size 5364702244
model-00004-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4505389e4a5fae88e3db63f0f41092f58a7b436623e4041348d1ba0df7b215b
3
+ size 5364681559
model-00005-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6402fb9a1d4784987de8401c5d0ec1c58d6c9ea77612d639112d57f8004685bd
3
+ size 5331257502
model-00006-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e1308d7684a32bbb68c832de460f4fdba9ef6d8ef28f8f7a3657524c660e110
3
+ size 5364702242
model-00007-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4c1cc3a2549a6619e2f8a3360c0d86403b81ecf820447879c4e5d46d6cfaa0
3
+ size 5364681485
model-00008-of-00008.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3132851e51b497db022558c6df28810939d296cb5fdecbf0e06514bee6779835
3
+ size 4863315824
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
modeling_iquestcoder.py ADDED
@@ -0,0 +1,1068 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Modified MIT License
3
+
4
+ Software Copyright© 2025 IQuest Research
5
+
6
+ Our only modification is that, if the Software (or any derivative works
7
+ thereof) is used for any of your commercial products or services, you shall
8
+ prominently display "IQuest Coder" on the user interface of such product or
9
+ service.
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
+ """
27
+
28
+ from typing import Callable, List, Optional, Tuple, Union
29
+
30
+ import torch
31
+ import torch.nn as nn
32
+ import torch.nn.functional as F
33
+
34
+ from transformers.activations import ACT2FN
35
+ from transformers.cache_utils import Cache, DynamicCache, SlidingWindowCache, StaticCache
36
+ from transformers.generation import GenerationMixin
37
+ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
38
+ from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
39
+ from transformers.modeling_layers import GradientCheckpointingLayer
40
+ from transformers.modeling_outputs import (
41
+ BaseModelOutputWithPast,
42
+ CausalLMOutputWithPast,
43
+ QuestionAnsweringModelOutput,
44
+ SequenceClassifierOutputWithPast,
45
+ TokenClassifierOutput,
46
+ )
47
+ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
48
+ from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
49
+ from transformers.processing_utils import Unpack
50
+ from transformers.utils import (
51
+ LossKwargs,
52
+ auto_docstring,
53
+ can_return_tuple,
54
+ is_torch_flex_attn_available,
55
+ logging,
56
+ )
57
+
58
+ from .configuration_iquestcoder import IQuestCoderConfig
59
+
60
+
61
+ if is_torch_flex_attn_available():
62
+ from torch.nn.attention.flex_attention import BlockMask
63
+ from transformers.integrations.flex_attention import make_flex_block_causal_mask
64
+
65
+
66
+ logger = logging.get_logger(__name__)
67
+
68
+
69
+ # =============================================================================
70
+ # Helper Functions
71
+ # =============================================================================
72
+
73
+ def rotate_half(x: torch.Tensor) -> torch.Tensor:
74
+ """Rotates half the hidden dims of the input."""
75
+ x1 = x[..., : x.shape[-1] // 2]
76
+ x2 = x[..., x.shape[-1] // 2 :]
77
+ return torch.cat((-x2, x1), dim=-1)
78
+
79
+
80
+ def apply_rotary_pos_emb(
81
+ q: torch.Tensor,
82
+ k: torch.Tensor,
83
+ cos: torch.Tensor,
84
+ sin: torch.Tensor,
85
+ position_ids: Optional[torch.Tensor] = None,
86
+ unsqueeze_dim: int = 1,
87
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
88
+ """Applies Rotary Position Embedding to the query and key tensors.
89
+
90
+ Args:
91
+ q: The query tensor.
92
+ k: The key tensor.
93
+ cos: The cosine part of the rotary embedding.
94
+ sin: The sine part of the rotary embedding.
95
+ position_ids: Deprecated and unused.
96
+ unsqueeze_dim: The dimension along which to unsqueeze cos and sin.
97
+
98
+ Returns:
99
+ Tuple of query and key tensors rotated using the Rotary Position Embedding.
100
+ """
101
+ # Borrowed from OLMo: preserve original dtypes for numerical stability
102
+ q_dtype, k_dtype = q.dtype, k.dtype
103
+ cos = cos.unsqueeze(unsqueeze_dim)
104
+ sin = sin.unsqueeze(unsqueeze_dim)
105
+ q_embed = (q * cos) + (rotate_half(q) * sin)
106
+ k_embed = (k * cos) + (rotate_half(k) * sin)
107
+ return q_embed.to(q_dtype), k_embed.to(k_dtype)
108
+
109
+
110
+ def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
111
+ """
112
+ Expands key/value heads for Grouped Query Attention.
113
+
114
+ This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep).
115
+ The hidden states go from (batch, num_key_value_heads, seqlen, head_dim) to
116
+ (batch, num_attention_heads, seqlen, head_dim).
117
+ """
118
+ batch, num_key_value_heads, slen, head_dim = hidden_states.shape
119
+ if n_rep == 1:
120
+ return hidden_states
121
+ hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim)
122
+ return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
123
+
124
+
125
+ def eager_attention_forward(
126
+ module: nn.Module,
127
+ query: torch.Tensor,
128
+ key: torch.Tensor,
129
+ value: torch.Tensor,
130
+ attention_mask: Optional[torch.Tensor],
131
+ scaling: float,
132
+ dropout: float = 0.0,
133
+ **kwargs,
134
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
135
+ """Standard eager attention implementation."""
136
+ key_states = repeat_kv(key, module.num_key_value_groups)
137
+ value_states = repeat_kv(value, module.num_key_value_groups)
138
+
139
+ attn_weights = torch.matmul(query, key_states.transpose(2, 3)) * scaling
140
+ if attention_mask is not None:
141
+ causal_mask = attention_mask[:, :, :, : key_states.shape[-2]]
142
+ attn_weights = attn_weights + causal_mask
143
+
144
+ attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype)
145
+ attn_weights = F.dropout(attn_weights, p=dropout, training=module.training)
146
+ attn_output = torch.matmul(attn_weights, value_states)
147
+ attn_output = attn_output.transpose(1, 2).contiguous()
148
+
149
+ return attn_output, attn_weights
150
+
151
+
152
+ # =============================================================================
153
+ # Model Components
154
+ # =============================================================================
155
+
156
+ class IQuestCoderRMSNorm(nn.Module):
157
+ """Root Mean Square Layer Normalization.
158
+
159
+ RMSNorm is computationally simpler than LayerNorm while achieving similar
160
+ performance. It normalizes the input by its RMS value.
161
+ """
162
+
163
+ def __init__(self, hidden_size: int, eps: float = 1e-6):
164
+ super().__init__()
165
+ self.weight = nn.Parameter(torch.ones(hidden_size))
166
+ self.variance_epsilon = eps
167
+
168
+ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
169
+ input_dtype = hidden_states.dtype
170
+ hidden_states = hidden_states.to(torch.float32)
171
+ variance = hidden_states.pow(2).mean(-1, keepdim=True)
172
+ hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
173
+ return self.weight * hidden_states.to(input_dtype)
174
+
175
+ def extra_repr(self) -> str:
176
+ return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"
177
+
178
+
179
+ class IQuestCoderRotaryEmbedding(nn.Module):
180
+ """Rotary Position Embedding (RoPE).
181
+
182
+ Implements rotary positional embeddings as described in the RoFormer paper.
183
+ Supports various RoPE scaling methods for extended context lengths.
184
+ """
185
+
186
+ def __init__(self, config: IQuestCoderConfig, device=None):
187
+ super().__init__()
188
+ # BC: "rope_type" was originally "type"
189
+ if hasattr(config, "rope_scaling") and config.rope_scaling is not None:
190
+ self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type"))
191
+ else:
192
+ self.rope_type = "default"
193
+ self.max_seq_len_cached = config.max_position_embeddings
194
+ self.original_max_seq_len = config.max_position_embeddings
195
+
196
+ self.config = config
197
+ self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type]
198
+
199
+ inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
200
+ self.register_buffer("inv_freq", inv_freq, persistent=False)
201
+ self.original_inv_freq = self.inv_freq
202
+
203
+ @torch.no_grad()
204
+ @dynamic_rope_update
205
+ def forward(self, x: torch.Tensor, position_ids: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
206
+ inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device)
207
+ position_ids_expanded = position_ids[:, None, :].float()
208
+
209
+ device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu"
210
+ with torch.autocast(device_type=device_type, enabled=False):
211
+ freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2)
212
+ emb = torch.cat((freqs, freqs), dim=-1)
213
+ cos = emb.cos() * self.attention_scaling
214
+ sin = emb.sin() * self.attention_scaling
215
+
216
+ return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)
217
+
218
+
219
+ class IQuestCoderMLP(nn.Module):
220
+ """Feed-forward network with SwiGLU activation.
221
+
222
+ Uses the gated linear unit variant with SiLU activation for improved
223
+ performance compared to standard FFN.
224
+ """
225
+
226
+ def __init__(self, config: IQuestCoderConfig):
227
+ super().__init__()
228
+ self.config = config
229
+ self.hidden_size = config.hidden_size
230
+ self.intermediate_size = config.intermediate_size
231
+ self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=config.mlp_bias)
232
+ self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=config.mlp_bias)
233
+ self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=config.mlp_bias)
234
+ self.act_fn = ACT2FN[config.hidden_act]
235
+
236
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
237
+ # SwiGLU: down_proj(act_fn(gate_proj(x)) * up_proj(x))
238
+ return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x))
239
+
240
+
241
+ class IQuestCoderAttention(nn.Module):
242
+ """Multi-headed attention with support for Grouped Query Attention (GQA).
243
+
244
+ Features:
245
+ - Grouped Query Attention for memory efficiency
246
+ - Optional QKV clipping for training stability (from OLMo)
247
+ - Optional sliding window attention (from Qwen2)
248
+ - Rotary Position Embeddings
249
+ """
250
+
251
+ def __init__(self, config: IQuestCoderConfig, layer_idx: int):
252
+ super().__init__()
253
+ self.config = config
254
+ self.layer_idx = layer_idx
255
+ self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
256
+ self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
257
+ self.scaling = self.head_dim ** -0.5
258
+ self.attention_dropout = config.attention_dropout
259
+ self.is_causal = True
260
+
261
+ # Projection layers
262
+ self.q_proj = nn.Linear(
263
+ config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.attention_bias
264
+ )
265
+ self.k_proj = nn.Linear(
266
+ config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
267
+ )
268
+ self.v_proj = nn.Linear(
269
+ config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias
270
+ )
271
+ self.o_proj = nn.Linear(
272
+ config.num_attention_heads * self.head_dim, config.hidden_size, bias=config.attention_bias
273
+ )
274
+
275
+ def forward(
276
+ self,
277
+ hidden_states: torch.Tensor,
278
+ position_embeddings: Tuple[torch.Tensor, torch.Tensor],
279
+ attention_mask: Optional[torch.Tensor],
280
+ past_key_value: Optional[Cache] = None,
281
+ cache_position: Optional[torch.LongTensor] = None,
282
+ **kwargs: Unpack[FlashAttentionKwargs],
283
+ ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
284
+ input_shape = hidden_states.shape[:-1]
285
+ hidden_shape = (*input_shape, -1, self.head_dim)
286
+
287
+ # Compute Q, K, V projections
288
+ query_states = self.q_proj(hidden_states)
289
+ key_states = self.k_proj(hidden_states)
290
+ value_states = self.v_proj(hidden_states)
291
+
292
+ # [OLMo Feature] Optional QKV clipping for training stability
293
+ if self.config.clip_qkv is not None:
294
+ query_states = query_states.clamp(min=-self.config.clip_qkv, max=self.config.clip_qkv)
295
+ key_states = key_states.clamp(min=-self.config.clip_qkv, max=self.config.clip_qkv)
296
+ value_states = value_states.clamp(min=-self.config.clip_qkv, max=self.config.clip_qkv)
297
+
298
+ # Reshape to (batch, heads, seq_len, head_dim)
299
+ query_states = query_states.view(hidden_shape).transpose(1, 2)
300
+ key_states = key_states.view(hidden_shape).transpose(1, 2)
301
+ value_states = value_states.view(hidden_shape).transpose(1, 2)
302
+
303
+ # Apply rotary position embeddings
304
+ cos, sin = position_embeddings
305
+ query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin)
306
+
307
+ # Update KV cache if provided
308
+ if past_key_value is not None:
309
+ cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position}
310
+ key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs)
311
+
312
+ # [Qwen2 Feature] Sliding window attention
313
+ sliding_window = None
314
+ if (
315
+ self.config.use_sliding_window
316
+ and getattr(self.config, "sliding_window", None) is not None
317
+ and self.layer_idx >= self.config.max_window_layers
318
+ ):
319
+ sliding_window = self.config.sliding_window
320
+
321
+ # Select attention implementation
322
+ attention_interface: Callable = eager_attention_forward
323
+ if self.config._attn_implementation != "eager":
324
+ if self.config._attn_implementation == "sdpa" and kwargs.get("output_attentions", False):
325
+ logger.warning_once(
326
+ "`torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True`. "
327
+ 'Falling back to eager attention. This warning can be removed using the argument '
328
+ '`attn_implementation="eager"` when loading the model.'
329
+ )
330
+ else:
331
+ attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
332
+
333
+ # Compute attention
334
+ attn_output, attn_weights = attention_interface(
335
+ self,
336
+ query_states,
337
+ key_states,
338
+ value_states,
339
+ attention_mask,
340
+ dropout=0.0 if not self.training else self.attention_dropout,
341
+ scaling=self.scaling,
342
+ sliding_window=sliding_window,
343
+ **kwargs,
344
+ )
345
+
346
+ # Reshape and project output
347
+ attn_output = attn_output.reshape(*input_shape, -1).contiguous()
348
+ attn_output = self.o_proj(attn_output)
349
+
350
+ return attn_output, attn_weights
351
+
352
+
353
+ class IQuestCoderDecoderLayer(GradientCheckpointingLayer):
354
+ """Transformer decoder layer with pre-normalization.
355
+
356
+ Architecture: Pre-RMSNorm -> Attention -> Residual -> Pre-RMSNorm -> MLP -> Residual
357
+ """
358
+
359
+ def __init__(self, config: IQuestCoderConfig, layer_idx: int):
360
+ super().__init__()
361
+ self.hidden_size = config.hidden_size
362
+ self.self_attn = IQuestCoderAttention(config=config, layer_idx=layer_idx)
363
+ self.mlp = IQuestCoderMLP(config)
364
+ self.input_layernorm = IQuestCoderRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
365
+ self.post_attention_layernorm = IQuestCoderRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
366
+
367
+ # Warn if sliding window is enabled but not properly supported
368
+ if config.use_sliding_window and config._attn_implementation != "flash_attention_2":
369
+ logger.warning_once(
370
+ f"Sliding Window Attention is enabled but not implemented for `{config._attn_implementation}`; "
371
+ "unexpected results may be encountered."
372
+ )
373
+
374
+ def forward(
375
+ self,
376
+ hidden_states: torch.Tensor,
377
+ attention_mask: Optional[torch.Tensor] = None,
378
+ position_ids: Optional[torch.LongTensor] = None,
379
+ past_key_value: Optional[Cache] = None,
380
+ output_attentions: Optional[bool] = False,
381
+ use_cache: Optional[bool] = False,
382
+ cache_position: Optional[torch.LongTensor] = None,
383
+ position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
384
+ **kwargs: Unpack[FlashAttentionKwargs],
385
+ ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, torch.FloatTensor]]]:
386
+ # Pre-norm + Self Attention
387
+ residual = hidden_states
388
+ hidden_states = self.input_layernorm(hidden_states)
389
+
390
+ hidden_states, self_attn_weights = self.self_attn(
391
+ hidden_states=hidden_states,
392
+ attention_mask=attention_mask,
393
+ position_ids=position_ids,
394
+ past_key_value=past_key_value,
395
+ output_attentions=output_attentions,
396
+ use_cache=use_cache,
397
+ cache_position=cache_position,
398
+ position_embeddings=position_embeddings,
399
+ **kwargs,
400
+ )
401
+ hidden_states = residual + hidden_states
402
+
403
+ # Pre-norm + MLP
404
+ residual = hidden_states
405
+ hidden_states = self.post_attention_layernorm(hidden_states)
406
+ hidden_states = self.mlp(hidden_states)
407
+ hidden_states = residual + hidden_states
408
+
409
+ outputs = (hidden_states,)
410
+ if output_attentions:
411
+ outputs += (self_attn_weights,)
412
+
413
+ return outputs
414
+
415
+
416
+ # =============================================================================
417
+ # Base Model
418
+ # =============================================================================
419
+
420
+ @auto_docstring
421
+ class IQuestCoderPreTrainedModel(PreTrainedModel):
422
+ """Base class for IQuestCoder models."""
423
+
424
+ config_class = IQuestCoderConfig
425
+ base_model_prefix = "model"
426
+ supports_gradient_checkpointing = True
427
+ _no_split_modules = ["IQuestCoderDecoderLayer"]
428
+ _skip_keys_device_placement = ["past_key_values"]
429
+ _supports_flash_attn_2 = True
430
+ _supports_sdpa = True
431
+ _supports_flex_attn = True
432
+ _supports_cache_class = True
433
+ _supports_quantized_cache = True
434
+ _supports_static_cache = True
435
+ _supports_attention_backend = True
436
+
437
+ def _init_weights(self, module: nn.Module):
438
+ std = self.config.initializer_range
439
+ if isinstance(module, nn.Linear):
440
+ module.weight.data.normal_(mean=0.0, std=std)
441
+ if module.bias is not None:
442
+ module.bias.data.zero_()
443
+ elif isinstance(module, nn.Embedding):
444
+ module.weight.data.normal_(mean=0.0, std=std)
445
+ if module.padding_idx is not None:
446
+ module.weight.data[module.padding_idx].zero_()
447
+ elif isinstance(module, IQuestCoderRMSNorm):
448
+ module.weight.data.fill_(1.0)
449
+
450
+
451
+ @auto_docstring
452
+ class IQuestCoderModel(IQuestCoderPreTrainedModel):
453
+ """
454
+ IQuestCoder Model outputting raw hidden-states without any specific head on top.
455
+
456
+ This model is compatible with LLaMA weights while incorporating features from OLMo and Qwen2.
457
+ """
458
+
459
+ def __init__(self, config: IQuestCoderConfig):
460
+ super().__init__(config)
461
+ self.padding_idx = config.pad_token_id
462
+ self.vocab_size = config.vocab_size
463
+
464
+ self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx)
465
+ self.layers = nn.ModuleList(
466
+ [IQuestCoderDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
467
+ )
468
+ self.norm = IQuestCoderRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
469
+ self.rotary_emb = IQuestCoderRotaryEmbedding(config=config)
470
+ self.gradient_checkpointing = False
471
+
472
+ # Initialize weights and apply final processing
473
+ self.post_init()
474
+
475
+ def get_input_embeddings(self) -> nn.Embedding:
476
+ return self.embed_tokens
477
+
478
+ def set_input_embeddings(self, value: nn.Embedding):
479
+ self.embed_tokens = value
480
+
481
+ @can_return_tuple
482
+ @auto_docstring
483
+ def forward(
484
+ self,
485
+ input_ids: Optional[torch.LongTensor] = None,
486
+ attention_mask: Optional[torch.Tensor] = None,
487
+ position_ids: Optional[torch.LongTensor] = None,
488
+ past_key_values: Optional[Cache] = None,
489
+ inputs_embeds: Optional[torch.FloatTensor] = None,
490
+ use_cache: Optional[bool] = None,
491
+ output_attentions: Optional[bool] = None,
492
+ output_hidden_states: Optional[bool] = None,
493
+ cache_position: Optional[torch.LongTensor] = None,
494
+ **flash_attn_kwargs: Unpack[FlashAttentionKwargs],
495
+ ) -> BaseModelOutputWithPast:
496
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
497
+ output_hidden_states = (
498
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
499
+ )
500
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
501
+
502
+ if (input_ids is None) ^ (inputs_embeds is not None):
503
+ raise ValueError("You must specify exactly one of input_ids or inputs_embeds")
504
+
505
+ if self.gradient_checkpointing and self.training and use_cache:
506
+ logger.warning_once(
507
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`."
508
+ )
509
+ use_cache = False
510
+
511
+ if not isinstance(past_key_values, (type(None), Cache)):
512
+ raise ValueError("The `past_key_values` should be either a `Cache` object or `None`.")
513
+
514
+ if inputs_embeds is None:
515
+ inputs_embeds = self.embed_tokens(input_ids)
516
+
517
+ if use_cache and past_key_values is None:
518
+ past_key_values = DynamicCache()
519
+
520
+ if cache_position is None:
521
+ past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
522
+ cache_position = torch.arange(
523
+ past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
524
+ )
525
+
526
+ if position_ids is None:
527
+ position_ids = cache_position.unsqueeze(0)
528
+
529
+ causal_mask = self._update_causal_mask(
530
+ attention_mask, inputs_embeds, cache_position, past_key_values, output_attentions
531
+ )
532
+
533
+ hidden_states = inputs_embeds
534
+
535
+ # Create position embeddings to be shared across the decoder layers
536
+ position_embeddings = self.rotary_emb(hidden_states, position_ids)
537
+
538
+ # Decoder layers
539
+ all_hidden_states = () if output_hidden_states else None
540
+ all_self_attns = () if output_attentions else None
541
+
542
+ for decoder_layer in self.layers[: self.config.num_hidden_layers]:
543
+ if output_hidden_states:
544
+ all_hidden_states += (hidden_states,)
545
+
546
+ layer_outputs = decoder_layer(
547
+ hidden_states,
548
+ attention_mask=causal_mask,
549
+ position_ids=position_ids,
550
+ past_key_value=past_key_values,
551
+ output_attentions=output_attentions,
552
+ use_cache=use_cache,
553
+ cache_position=cache_position,
554
+ position_embeddings=position_embeddings,
555
+ **flash_attn_kwargs,
556
+ )
557
+
558
+ hidden_states = layer_outputs[0]
559
+
560
+ if output_attentions:
561
+ all_self_attns += (layer_outputs[1],)
562
+
563
+ hidden_states = self.norm(hidden_states)
564
+
565
+ # Add hidden states from the last decoder layer
566
+ if output_hidden_states:
567
+ all_hidden_states += (hidden_states,)
568
+
569
+ return BaseModelOutputWithPast(
570
+ last_hidden_state=hidden_states,
571
+ past_key_values=past_key_values if use_cache else None,
572
+ hidden_states=all_hidden_states,
573
+ attentions=all_self_attns,
574
+ )
575
+
576
+ def _update_causal_mask(
577
+ self,
578
+ attention_mask: Union[torch.Tensor, "BlockMask"],
579
+ input_tensor: torch.Tensor,
580
+ cache_position: torch.Tensor,
581
+ past_key_values: Cache,
582
+ output_attentions: bool = False,
583
+ ):
584
+ if self.config._attn_implementation == "flash_attention_2":
585
+ if attention_mask is not None and past_key_values is not None:
586
+ is_padding_right = attention_mask[:, -1].sum().item() != input_tensor.size()[0]
587
+ if is_padding_right:
588
+ raise ValueError(
589
+ "You are attempting to perform batched generation with padding_side='right'. "
590
+ "This may lead to unexpected behaviour for Flash Attention version of IQuestCoder. "
591
+ "Make sure to call `tokenizer.padding_side = 'left'` before tokenizing the input."
592
+ )
593
+ if attention_mask is not None and 0.0 in attention_mask:
594
+ return attention_mask
595
+ return None
596
+
597
+ if self.config._attn_implementation == "flex_attention":
598
+ if isinstance(attention_mask, torch.Tensor):
599
+ attention_mask = make_flex_block_causal_mask(attention_mask)
600
+ return attention_mask
601
+
602
+ past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
603
+ using_static_cache = isinstance(past_key_values, StaticCache)
604
+ using_sliding_window_cache = isinstance(past_key_values, SlidingWindowCache)
605
+
606
+ if (
607
+ self.config._attn_implementation == "sdpa"
608
+ and not (using_static_cache or using_sliding_window_cache)
609
+ and not output_attentions
610
+ ):
611
+ if AttentionMaskConverter._ignore_causal_mask_sdpa(
612
+ attention_mask,
613
+ inputs_embeds=input_tensor,
614
+ past_key_values_length=past_seen_tokens,
615
+ sliding_window=self.config.sliding_window if self.config.use_sliding_window else None,
616
+ is_training=self.training,
617
+ ):
618
+ return None
619
+
620
+ dtype = input_tensor.dtype
621
+ min_dtype = torch.finfo(dtype).min
622
+ sequence_length = input_tensor.shape[1]
623
+
624
+ if using_sliding_window_cache or using_static_cache:
625
+ target_length = past_key_values.get_max_cache_shape()
626
+ else:
627
+ target_length = (
628
+ attention_mask.shape[-1]
629
+ if isinstance(attention_mask, torch.Tensor)
630
+ else past_seen_tokens + sequence_length + 1
631
+ )
632
+
633
+ causal_mask = self._prepare_4d_causal_attention_mask_with_cache_position(
634
+ attention_mask,
635
+ sequence_length=sequence_length,
636
+ target_length=target_length,
637
+ dtype=dtype,
638
+ cache_position=cache_position,
639
+ batch_size=input_tensor.shape[0],
640
+ config=self.config,
641
+ past_key_values=past_key_values,
642
+ )
643
+
644
+ if (
645
+ self.config._attn_implementation == "sdpa"
646
+ and attention_mask is not None
647
+ and attention_mask.device.type in ["cuda", "xpu", "npu"]
648
+ and not output_attentions
649
+ ):
650
+ causal_mask = AttentionMaskConverter._unmask_unattended(causal_mask, min_dtype)
651
+
652
+ return causal_mask
653
+
654
+ @staticmethod
655
+ def _prepare_4d_causal_attention_mask_with_cache_position(
656
+ attention_mask: torch.Tensor,
657
+ sequence_length: int,
658
+ target_length: int,
659
+ dtype: torch.dtype,
660
+ cache_position: torch.Tensor,
661
+ batch_size: int,
662
+ config: IQuestCoderConfig,
663
+ past_key_values: Cache,
664
+ ):
665
+ """Creates a causal 4D mask from a 2D mask, or returns the 4D mask if already provided."""
666
+ if attention_mask is not None and attention_mask.dim() == 4:
667
+ causal_mask = attention_mask
668
+ else:
669
+ min_dtype = torch.finfo(dtype).min
670
+ causal_mask = torch.full(
671
+ (sequence_length, target_length), fill_value=min_dtype, dtype=dtype, device=cache_position.device
672
+ )
673
+ diagonal_attend_mask = torch.arange(target_length, device=cache_position.device) > cache_position.reshape(
674
+ -1, 1
675
+ )
676
+
677
+ # [Qwen2 Feature] Handle sliding window mask
678
+ if getattr(config, "use_sliding_window", False) and config.sliding_window is not None:
679
+ if not isinstance(past_key_values, SlidingWindowCache) or sequence_length > target_length:
680
+ sliding_attend_mask = torch.arange(target_length, device=cache_position.device) <= (
681
+ cache_position.reshape(-1, 1) - config.sliding_window
682
+ )
683
+ diagonal_attend_mask.bitwise_or_(sliding_attend_mask)
684
+
685
+ causal_mask *= diagonal_attend_mask
686
+ causal_mask = causal_mask[None, None, :, :].expand(batch_size, 1, -1, -1)
687
+
688
+ if attention_mask is not None:
689
+ causal_mask = causal_mask.clone()
690
+ if attention_mask.shape[-1] > target_length:
691
+ attention_mask = attention_mask[:, :target_length]
692
+ mask_length = attention_mask.shape[-1]
693
+ padding_mask = causal_mask[:, :, :, :mask_length] + attention_mask[:, None, None, :].to(
694
+ causal_mask.device
695
+ )
696
+ padding_mask = padding_mask == 0
697
+ causal_mask[:, :, :, :mask_length] = causal_mask[:, :, :, :mask_length].masked_fill(
698
+ padding_mask, min_dtype
699
+ )
700
+
701
+ return causal_mask
702
+
703
+
704
+ # =============================================================================
705
+ # Model Heads
706
+ # =============================================================================
707
+
708
+ class KwargsForCausalLM(FlashAttentionKwargs, LossKwargs):
709
+ ...
710
+
711
+
712
+ @auto_docstring
713
+ class IQuestCoderForCausalLM(IQuestCoderPreTrainedModel, GenerationMixin):
714
+ """IQuestCoder Model with a language modeling head on top for causal LM."""
715
+
716
+ _tied_weights_keys = ["lm_head.weight"]
717
+ _tp_plan = {"lm_head": "colwise_rep"}
718
+ _pp_plan = {"lm_head": (["hidden_states"], ["logits"])}
719
+
720
+ def __init__(self, config: IQuestCoderConfig):
721
+ super().__init__(config)
722
+ self.model = IQuestCoderModel(config)
723
+ self.vocab_size = config.vocab_size
724
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
725
+
726
+ # Initialize weights and apply final processing
727
+ self.post_init()
728
+
729
+ def get_input_embeddings(self) -> nn.Embedding:
730
+ return self.model.embed_tokens
731
+
732
+ def set_input_embeddings(self, value: nn.Embedding):
733
+ self.model.embed_tokens = value
734
+
735
+ def get_output_embeddings(self) -> nn.Linear:
736
+ return self.lm_head
737
+
738
+ def set_output_embeddings(self, new_embeddings: nn.Linear):
739
+ self.lm_head = new_embeddings
740
+
741
+ def set_decoder(self, decoder: IQuestCoderModel):
742
+ self.model = decoder
743
+
744
+ def get_decoder(self) -> IQuestCoderModel:
745
+ return self.model
746
+
747
+ @can_return_tuple
748
+ @auto_docstring
749
+ def forward(
750
+ self,
751
+ input_ids: Optional[torch.LongTensor] = None,
752
+ attention_mask: Optional[torch.Tensor] = None,
753
+ position_ids: Optional[torch.LongTensor] = None,
754
+ past_key_values: Optional[Cache] = None,
755
+ inputs_embeds: Optional[torch.FloatTensor] = None,
756
+ labels: Optional[torch.LongTensor] = None,
757
+ use_cache: Optional[bool] = None,
758
+ output_attentions: Optional[bool] = None,
759
+ output_hidden_states: Optional[bool] = None,
760
+ cache_position: Optional[torch.LongTensor] = None,
761
+ logits_to_keep: Union[int, torch.Tensor] = 0,
762
+ **kwargs: Unpack[KwargsForCausalLM],
763
+ ) -> CausalLMOutputWithPast:
764
+ r"""
765
+ Args:
766
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
767
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
768
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
769
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
770
+
771
+ Example:
772
+ ```python
773
+ >>> from transformers import AutoTokenizer
774
+ >>> from modeling_iquestcoder import IQuestCoderForCausalLM
775
+
776
+ >>> model = IQuestCoderForCausalLM.from_pretrained("path/to/IQuestCoder")
777
+ >>> tokenizer = AutoTokenizer.from_pretrained("path/to/IQuestCoder")
778
+
779
+ >>> prompt = "Hey, are you conscious? Can you talk to me?"
780
+ >>> inputs = tokenizer(prompt, return_tensors="pt")
781
+
782
+ >>> # Generate
783
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
784
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
785
+ "Hey, are you conscious? Can you talk to me?\\nI'm not conscious, but I can talk to you."
786
+ ```
787
+ """
788
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
789
+ output_hidden_states = (
790
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
791
+ )
792
+
793
+ # Decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
794
+ outputs: BaseModelOutputWithPast = self.model(
795
+ input_ids=input_ids,
796
+ attention_mask=attention_mask,
797
+ position_ids=position_ids,
798
+ past_key_values=past_key_values,
799
+ inputs_embeds=inputs_embeds,
800
+ use_cache=use_cache,
801
+ output_attentions=output_attentions,
802
+ output_hidden_states=output_hidden_states,
803
+ cache_position=cache_position,
804
+ **kwargs,
805
+ )
806
+
807
+ hidden_states = outputs.last_hidden_state
808
+ # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
809
+ slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
810
+ logits = self.lm_head(hidden_states[:, slice_indices, :])
811
+
812
+ loss = None
813
+ if labels is not None:
814
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
815
+
816
+ return CausalLMOutputWithPast(
817
+ loss=loss,
818
+ logits=logits,
819
+ past_key_values=outputs.past_key_values,
820
+ hidden_states=outputs.hidden_states,
821
+ attentions=outputs.attentions,
822
+ )
823
+
824
+
825
+ @auto_docstring(
826
+ custom_intro="""
827
+ The IQuestCoder Model transformer with a sequence classification head on top (linear layer).
828
+
829
+ [`IQuestCoderForSequenceClassification`] uses the last token in order to do the classification, as other causal
830
+ models (e.g. GPT-2) do.
831
+
832
+ Since it does classification on the last token, it requires to know the position of the last token. If a
833
+ `pad_token_id` is defined in the configuration, it finds the last token that is not a padding token in each row.
834
+ If no `pad_token_id` is defined, it simply takes the last value in each row of the batch.
835
+ """
836
+ )
837
+ class IQuestCoderForSequenceClassification(IQuestCoderPreTrainedModel):
838
+ """IQuestCoder Model with a sequence classification head."""
839
+
840
+ def __init__(self, config: IQuestCoderConfig):
841
+ super().__init__(config)
842
+ self.num_labels = config.num_labels
843
+ self.model = IQuestCoderModel(config)
844
+ self.score = nn.Linear(config.hidden_size, self.num_labels, bias=False)
845
+
846
+ # Initialize weights and apply final processing
847
+ self.post_init()
848
+
849
+ def get_input_embeddings(self) -> nn.Embedding:
850
+ return self.model.embed_tokens
851
+
852
+ def set_input_embeddings(self, value: nn.Embedding):
853
+ self.model.embed_tokens = value
854
+
855
+ @can_return_tuple
856
+ @auto_docstring
857
+ def forward(
858
+ self,
859
+ input_ids: Optional[torch.LongTensor] = None,
860
+ attention_mask: Optional[torch.Tensor] = None,
861
+ position_ids: Optional[torch.LongTensor] = None,
862
+ past_key_values: Optional[Cache] = None,
863
+ inputs_embeds: Optional[torch.FloatTensor] = None,
864
+ labels: Optional[torch.LongTensor] = None,
865
+ use_cache: Optional[bool] = None,
866
+ output_attentions: Optional[bool] = None,
867
+ output_hidden_states: Optional[bool] = None,
868
+ ) -> SequenceClassifierOutputWithPast:
869
+ r"""
870
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
871
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
872
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss),
873
+ If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
874
+ """
875
+ transformer_outputs: BaseModelOutputWithPast = self.model(
876
+ input_ids,
877
+ attention_mask=attention_mask,
878
+ position_ids=position_ids,
879
+ past_key_values=past_key_values,
880
+ inputs_embeds=inputs_embeds,
881
+ use_cache=use_cache,
882
+ output_attentions=output_attentions,
883
+ output_hidden_states=output_hidden_states,
884
+ )
885
+ hidden_states = transformer_outputs.last_hidden_state
886
+ logits = self.score(hidden_states)
887
+
888
+ if input_ids is not None:
889
+ batch_size = input_ids.shape[0]
890
+ else:
891
+ batch_size = inputs_embeds.shape[0]
892
+
893
+ if self.config.pad_token_id is None and batch_size != 1:
894
+ raise ValueError("Cannot handle batch sizes > 1 if no padding token is defined.")
895
+ if self.config.pad_token_id is None:
896
+ last_non_pad_token = -1
897
+ elif input_ids is not None:
898
+ non_pad_mask = (input_ids != self.config.pad_token_id).to(logits.device, torch.int32)
899
+ token_indices = torch.arange(input_ids.shape[-1], device=logits.device, dtype=torch.int32)
900
+ last_non_pad_token = (token_indices * non_pad_mask).argmax(-1)
901
+ else:
902
+ last_non_pad_token = -1
903
+ logger.warning_once(
904
+ f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
905
+ "unexpected if using padding tokens in conjunction with `inputs_embeds.`"
906
+ )
907
+
908
+ pooled_logits = logits[torch.arange(batch_size, device=logits.device), last_non_pad_token]
909
+
910
+ loss = None
911
+ if labels is not None:
912
+ loss = self.loss_function(logits=logits, labels=labels, pooled_logits=pooled_logits, config=self.config)
913
+
914
+ return SequenceClassifierOutputWithPast(
915
+ loss=loss,
916
+ logits=pooled_logits,
917
+ past_key_values=transformer_outputs.past_key_values,
918
+ hidden_states=transformer_outputs.hidden_states,
919
+ attentions=transformer_outputs.attentions,
920
+ )
921
+
922
+
923
+ @auto_docstring
924
+ class IQuestCoderForTokenClassification(IQuestCoderPreTrainedModel):
925
+ """IQuestCoder Model with a token classification head."""
926
+
927
+ def __init__(self, config: IQuestCoderConfig):
928
+ super().__init__(config)
929
+ self.num_labels = config.num_labels
930
+ self.model = IQuestCoderModel(config)
931
+ if getattr(config, "classifier_dropout", None) is not None:
932
+ classifier_dropout = config.classifier_dropout
933
+ elif getattr(config, "hidden_dropout", None) is not None:
934
+ classifier_dropout = config.hidden_dropout
935
+ else:
936
+ classifier_dropout = 0.1
937
+ self.dropout = nn.Dropout(classifier_dropout)
938
+ self.score = nn.Linear(config.hidden_size, config.num_labels)
939
+
940
+ # Initialize weights and apply final processing
941
+ self.post_init()
942
+
943
+ def get_input_embeddings(self) -> nn.Embedding:
944
+ return self.model.embed_tokens
945
+
946
+ def set_input_embeddings(self, value: nn.Embedding):
947
+ self.model.embed_tokens = value
948
+
949
+ @can_return_tuple
950
+ @auto_docstring
951
+ def forward(
952
+ self,
953
+ input_ids: Optional[torch.LongTensor] = None,
954
+ attention_mask: Optional[torch.Tensor] = None,
955
+ position_ids: Optional[torch.LongTensor] = None,
956
+ past_key_values: Optional[Cache] = None,
957
+ inputs_embeds: Optional[torch.FloatTensor] = None,
958
+ labels: Optional[torch.LongTensor] = None,
959
+ use_cache: Optional[bool] = None,
960
+ output_attentions: Optional[bool] = None,
961
+ output_hidden_states: Optional[bool] = None,
962
+ ) -> TokenClassifierOutput:
963
+ r"""
964
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
965
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
966
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss),
967
+ If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
968
+ """
969
+ outputs: BaseModelOutputWithPast = self.model(
970
+ input_ids,
971
+ attention_mask=attention_mask,
972
+ position_ids=position_ids,
973
+ past_key_values=past_key_values,
974
+ inputs_embeds=inputs_embeds,
975
+ use_cache=use_cache,
976
+ output_attentions=output_attentions,
977
+ output_hidden_states=output_hidden_states,
978
+ )
979
+ sequence_output = outputs.last_hidden_state
980
+ sequence_output = self.dropout(sequence_output)
981
+ logits = self.score(sequence_output)
982
+
983
+ loss = None
984
+ if labels is not None:
985
+ loss = self.loss_function(logits, labels, self.config)
986
+
987
+ return TokenClassifierOutput(
988
+ loss=loss,
989
+ logits=logits,
990
+ hidden_states=outputs.hidden_states,
991
+ attentions=outputs.attentions,
992
+ )
993
+
994
+
995
+ @auto_docstring
996
+ class IQuestCoderForQuestionAnswering(IQuestCoderPreTrainedModel):
997
+ """IQuestCoder Model with a span classification head for extractive question-answering."""
998
+
999
+ base_model_prefix = "transformer"
1000
+
1001
+ def __init__(self, config: IQuestCoderConfig):
1002
+ super().__init__(config)
1003
+ self.transformer = IQuestCoderModel(config)
1004
+ self.qa_outputs = nn.Linear(config.hidden_size, 2)
1005
+
1006
+ # Initialize weights and apply final processing
1007
+ self.post_init()
1008
+
1009
+ def get_input_embeddings(self) -> nn.Embedding:
1010
+ return self.transformer.embed_tokens
1011
+
1012
+ def set_input_embeddings(self, value: nn.Embedding):
1013
+ self.transformer.embed_tokens = value
1014
+
1015
+ @can_return_tuple
1016
+ @auto_docstring
1017
+ def forward(
1018
+ self,
1019
+ input_ids: Optional[torch.LongTensor] = None,
1020
+ attention_mask: Optional[torch.Tensor] = None,
1021
+ position_ids: Optional[torch.LongTensor] = None,
1022
+ past_key_values: Optional[Cache] = None,
1023
+ inputs_embeds: Optional[torch.FloatTensor] = None,
1024
+ start_positions: Optional[torch.LongTensor] = None,
1025
+ end_positions: Optional[torch.LongTensor] = None,
1026
+ output_attentions: Optional[bool] = None,
1027
+ output_hidden_states: Optional[bool] = None,
1028
+ **kwargs,
1029
+ ) -> QuestionAnsweringModelOutput:
1030
+ outputs: BaseModelOutputWithPast = self.transformer(
1031
+ input_ids,
1032
+ attention_mask=attention_mask,
1033
+ position_ids=position_ids,
1034
+ past_key_values=past_key_values,
1035
+ inputs_embeds=inputs_embeds,
1036
+ output_attentions=output_attentions,
1037
+ output_hidden_states=output_hidden_states,
1038
+ )
1039
+
1040
+ sequence_output = outputs.last_hidden_state
1041
+
1042
+ logits = self.qa_outputs(sequence_output)
1043
+ start_logits, end_logits = logits.split(1, dim=-1)
1044
+ start_logits = start_logits.squeeze(-1).contiguous()
1045
+ end_logits = end_logits.squeeze(-1).contiguous()
1046
+
1047
+ loss = None
1048
+ if start_positions is not None and end_positions is not None:
1049
+ loss = self.loss_function(start_logits, end_logits, start_positions, end_positions, **kwargs)
1050
+
1051
+ return QuestionAnsweringModelOutput(
1052
+ loss=loss,
1053
+ start_logits=start_logits,
1054
+ end_logits=end_logits,
1055
+ hidden_states=outputs.hidden_states,
1056
+ attentions=outputs.attentions,
1057
+ )
1058
+
1059
+
1060
+ __all__ = [
1061
+ "IQuestCoderPreTrainedModel",
1062
+ "IQuestCoderModel",
1063
+ "IQuestCoderForCausalLM",
1064
+ "IQuestCoderForSequenceClassification",
1065
+ "IQuestCoderForTokenClassification",
1066
+ "IQuestCoderForQuestionAnswering",
1067
+ ]
1068
+
special_tokens_map.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|CLS|>",
4
+ "<|SEP|>",
5
+ "<|EOD|>",
6
+ "<|MASK|>",
7
+ "<|PAD|>",
8
+ "<|fim_prefix|>",
9
+ "<|fim_middle|>",
10
+ "<|fim_suffix|>",
11
+ "<|im_start|>",
12
+ "<|im_end|>",
13
+ "<|fim_pad|>",
14
+ "<|endoftext|>",
15
+ "<|repo_name|>",
16
+ "<|file_sep|>",
17
+ "<think>",
18
+ "</think>"
19
+ ],
20
+ "bos_token": {
21
+ "content": "<s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "eos_token": {
28
+ "content": "<|im_end|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "pad_token": {
35
+ "content": "<|endoftext|>",
36
+ "lstrip": false,
37
+ "normalized": false,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ },
41
+ "unk_token": {
42
+ "content": "<unk>",
43
+ "lstrip": false,
44
+ "normalized": true,
45
+ "rstrip": false,
46
+ "single_word": true
47
+ }
48
+ }
tokenization_iquestcoder.py ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tokenization classes for IQuestCoder."""
2
+
3
+ import os
4
+ from shutil import copyfile
5
+ from typing import Any, Dict, List, Optional, Tuple, Union
6
+
7
+ import sentencepiece as spm
8
+
9
+ from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
10
+ from transformers.utils import logging
11
+
12
+
13
+ logger = logging.get_logger(__name__)
14
+
15
+ VOCAB_FILES_NAMES = {"vocab_file": "tokenizer.model"}
16
+
17
+ PRETRAINED_VOCAB_FILES_MAP = {
18
+ "vocab_file": {},
19
+ "tokenizer_file": {},
20
+ }
21
+ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {}
22
+
23
+
24
+
25
+ class IQuestCoderTokenizer(PreTrainedTokenizer):
26
+
27
+ vocab_files_names = VOCAB_FILES_NAMES
28
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
29
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
30
+ model_input_names = ["input_ids", "attention_mask"]
31
+
32
+ def __init__(
33
+ self,
34
+ vocab_file,
35
+ unk_token="<unk>",
36
+ bos_token="<s>",
37
+ eos_token="</s>",
38
+ pad_token=None,
39
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
40
+ add_bos_token=True,
41
+ add_eos_token=False,
42
+ clean_up_tokenization_spaces=False,
43
+ add_prefix_space=False,
44
+ legacy=None,
45
+ use_default_system_prompt=False,
46
+ chat_template=None,
47
+ **kwargs,
48
+ ):
49
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
50
+ bos_token = AddedToken(bos_token, lstrip=False, rstrip=False) if isinstance(bos_token, str) else bos_token
51
+ eos_token = AddedToken(eos_token, lstrip=False, rstrip=False) if isinstance(eos_token, str) else eos_token
52
+ unk_token = AddedToken(unk_token, lstrip=False, rstrip=False) if isinstance(unk_token, str) else unk_token
53
+ pad_token = AddedToken(pad_token, lstrip=False, rstrip=False) if isinstance(pad_token, str) else pad_token
54
+
55
+ # Legacy behavior handling
56
+ if legacy is None:
57
+ logger.warning_once(
58
+ f"You are using the default legacy behaviour of the {self.__class__.__name__}. This is"
59
+ " expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you."
60
+ " If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it"
61
+ " means, and thoroughly read the reason why this was added as explained in"
62
+ " https://github.com/huggingface/transformers/pull/24565"
63
+ )
64
+ legacy = True
65
+
66
+ self.legacy = legacy
67
+ self.vocab_file = vocab_file
68
+ self.add_bos_token = add_bos_token
69
+ self.add_eos_token = add_eos_token
70
+ self.add_prefix_space = add_prefix_space
71
+ self.use_default_system_prompt = use_default_system_prompt
72
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
73
+ self.sp_model.Load(vocab_file)
74
+
75
+
76
+
77
+ super().__init__(
78
+ bos_token=bos_token,
79
+ eos_token=eos_token,
80
+ unk_token=unk_token,
81
+ pad_token=pad_token,
82
+ add_bos_token=add_bos_token,
83
+ add_eos_token=add_eos_token,
84
+ sp_model_kwargs=self.sp_model_kwargs,
85
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
86
+ add_prefix_space=add_prefix_space,
87
+ legacy=legacy,
88
+ use_default_system_prompt=use_default_system_prompt,
89
+ chat_template=chat_template,
90
+ **kwargs,
91
+ )
92
+
93
+ def __getstate__(self):
94
+ state = self.__dict__.copy()
95
+ state["sp_model"] = None
96
+ return state
97
+
98
+ def __setstate__(self, d):
99
+ self.__dict__ = d
100
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
101
+ self.sp_model.Load(self.vocab_file)
102
+
103
+ @property
104
+ def vocab_size(self) -> int:
105
+ """Returns the vocabulary size."""
106
+ return self.sp_model.get_piece_size()
107
+
108
+ def get_vocab(self) -> Dict[str, int]:
109
+ """Returns the vocabulary as a dictionary of token to index."""
110
+ vocab = {self.convert_ids_to_tokens(i): i for i in range(self.vocab_size)}
111
+ vocab.update(self.added_tokens_encoder)
112
+ return vocab
113
+
114
+ def _tokenize(self, text: str) -> List[str]:
115
+ """
116
+ Tokenize a string.
117
+
118
+ Args:
119
+ text (`str`): The text to tokenize.
120
+
121
+ Returns:
122
+ `List[str]`: The list of tokens.
123
+ """
124
+ if self.add_prefix_space:
125
+ text = " " + text
126
+
127
+ if self.legacy:
128
+ return self.sp_model.encode(text, out_type=str)
129
+
130
+ # Non-legacy behavior: handle special tokens properly
131
+ return self.sp_model.encode(text, out_type=str)
132
+
133
+ def _convert_token_to_id(self, token: str) -> int:
134
+ """Converts a token (str) to an id using the vocab."""
135
+ return self.sp_model.piece_to_id(token)
136
+
137
+ def _convert_id_to_token(self, index: int) -> str:
138
+ """Converts an index (integer) to a token (str) using the vocab."""
139
+ token = self.sp_model.IdToPiece(index)
140
+ return token
141
+
142
+ def convert_tokens_to_string(self, tokens: List[str]) -> str:
143
+ """
144
+ Converts a sequence of tokens (strings) to a single string.
145
+
146
+ This method handles special tokens separately to ensure they are not
147
+ decoded using the SentencePiece model.
148
+
149
+ Args:
150
+ tokens (`List[str]`): The list of tokens to convert.
151
+
152
+ Returns:
153
+ `str`: The decoded string.
154
+ """
155
+ current_sub_tokens = []
156
+ out_string = ""
157
+ prev_is_special = False
158
+ for i, token in enumerate(tokens):
159
+ # make sure that special tokens are not decoded using sentencepiece model
160
+ if token in self.all_special_tokens:
161
+ if not prev_is_special and i != 0:
162
+ out_string += " "
163
+ out_string += self.sp_model.decode(current_sub_tokens) + token
164
+ prev_is_special = True
165
+ current_sub_tokens = []
166
+ else:
167
+ current_sub_tokens.append(token)
168
+ prev_is_special = False
169
+ out_string += self.sp_model.decode(current_sub_tokens)
170
+ return out_string
171
+
172
+ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
173
+ """
174
+ Save the vocabulary and special tokens file to a directory.
175
+
176
+ Args:
177
+ save_directory (`str`):
178
+ The directory in which to save the vocabulary.
179
+ filename_prefix (`str`, *optional*):
180
+ An optional prefix to add to the named of the saved files.
181
+
182
+ Returns:
183
+ `Tuple(str)`: Paths to the files saved.
184
+ """
185
+ if not os.path.isdir(save_directory):
186
+ logger.error(f"Vocabulary path ({save_directory}) should be a directory")
187
+ return
188
+ out_vocab_file = os.path.join(
189
+ save_directory, (filename_prefix + "-" if filename_prefix else "") + VOCAB_FILES_NAMES["vocab_file"]
190
+ )
191
+
192
+ if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
193
+ copyfile(self.vocab_file, out_vocab_file)
194
+ elif not os.path.isfile(self.vocab_file):
195
+ with open(out_vocab_file, "wb") as fi:
196
+ content_spiece_model = self.sp_model.serialized_model_proto()
197
+ fi.write(content_spiece_model)
198
+
199
+ return (out_vocab_file,)
200
+
201
+ def build_inputs_with_special_tokens(
202
+ self,
203
+ token_ids_0: List[int],
204
+ token_ids_1: Optional[List[int]] = None
205
+ ) -> List[int]:
206
+ """
207
+ Build model inputs from a sequence or a pair of sequences for sequence classification tasks by concatenating
208
+ and adding special tokens.
209
+
210
+ An IQuestCoder sequence has the following format:
211
+
212
+ - single sequence: `<s> X </s>` (if add_eos_token is True) or `<s> X` (default)
213
+ - pair of sequences: `<s> A </s> <s> B </s>` (if add_eos_token is True) or `<s> A <s> B` (default)
214
+
215
+ Args:
216
+ token_ids_0 (`List[int]`):
217
+ List of IDs to which the special tokens will be added.
218
+ token_ids_1 (`List[int]`, *optional*):
219
+ Optional second list of IDs for sequence pairs.
220
+
221
+ Returns:
222
+ `List[int]`: List of input IDs with the appropriate special tokens.
223
+ """
224
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
225
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
226
+
227
+ output = bos_token_id + token_ids_0 + eos_token_id
228
+
229
+ if token_ids_1 is not None:
230
+ output = output + bos_token_id + token_ids_1 + eos_token_id
231
+
232
+ return output
233
+
234
+ def get_special_tokens_mask(
235
+ self,
236
+ token_ids_0: List[int],
237
+ token_ids_1: Optional[List[int]] = None,
238
+ already_has_special_tokens: bool = False
239
+ ) -> List[int]:
240
+ """
241
+ Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
242
+ special tokens using the tokenizer `prepare_for_model` method.
243
+
244
+ Args:
245
+ token_ids_0 (`List[int]`):
246
+ List of IDs.
247
+ token_ids_1 (`List[int]`, *optional*):
248
+ Optional second list of IDs for sequence pairs.
249
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
250
+ Whether or not the token list is already formatted with special tokens for the model.
251
+
252
+ Returns:
253
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
254
+ """
255
+ if already_has_special_tokens:
256
+ return super().get_special_tokens_mask(
257
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
258
+ )
259
+
260
+ bos_token_id = [1] if self.add_bos_token else []
261
+ eos_token_id = [1] if self.add_eos_token else []
262
+
263
+ if token_ids_1 is None:
264
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
265
+ return (
266
+ bos_token_id
267
+ + ([0] * len(token_ids_0))
268
+ + eos_token_id
269
+ + bos_token_id
270
+ + ([0] * len(token_ids_1))
271
+ + eos_token_id
272
+ )
273
+
274
+ def create_token_type_ids_from_sequences(
275
+ self,
276
+ token_ids_0: List[int],
277
+ token_ids_1: Optional[List[int]] = None
278
+ ) -> List[int]:
279
+ """
280
+ Create a mask from the two sequences passed to be used in a sequence-pair classification task.
281
+
282
+ An IQuestCoder sequence pair mask has the following format:
283
+
284
+ ```
285
+ 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
286
+ | first sequence | second sequence |
287
+ ```
288
+
289
+ If `token_ids_1` is `None`, this method only returns the first portion of the mask (0s).
290
+
291
+ Args:
292
+ token_ids_0 (`List[int]`):
293
+ List of IDs.
294
+ token_ids_1 (`List[int]`, *optional*):
295
+ Optional second list of IDs for sequence pairs.
296
+
297
+ Returns:
298
+ `List[int]`: List of token type IDs according to the given sequence(s).
299
+ """
300
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
301
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
302
+
303
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
304
+
305
+ if token_ids_1 is not None:
306
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
307
+
308
+ return output
309
+
310
+ @property
311
+ def default_chat_template(self) -> str:
312
+ """
313
+ Returns the default chat template for IQuestCoder.
314
+
315
+ This template formats conversations with system, user, and assistant roles.
316
+ """
317
+ return DEFAULT_CHAT_TEMPLATE
318
+
319
+ def apply_chat_template(
320
+ self,
321
+ conversation: Union[List[Dict[str, str]], "Conversation"],
322
+ chat_template: Optional[str] = None,
323
+ add_generation_prompt: bool = False,
324
+ tokenize: bool = True,
325
+ padding: bool = False,
326
+ truncation: bool = False,
327
+ max_length: Optional[int] = None,
328
+ return_tensors: Optional[str] = None,
329
+ return_dict: bool = False,
330
+ **tokenizer_kwargs,
331
+ ):
332
+ """
333
+ Apply a chat template to format a conversation.
334
+
335
+ Args:
336
+ conversation (`List[Dict[str, str]]` or `Conversation`):
337
+ A list of dicts with "role" and "content" keys, representing the conversation history.
338
+ chat_template (`str`, *optional*):
339
+ A Jinja template to use for formatting. If not provided, the tokenizer's default will be used.
340
+ add_generation_prompt (`bool`, *optional*, defaults to `False`):
341
+ Whether to add a generation prompt at the end for the assistant to continue.
342
+ tokenize (`bool`, *optional*, defaults to `True`):
343
+ Whether to tokenize the output. If `False`, returns a string.
344
+ padding (`bool`, *optional*, defaults to `False`):
345
+ Whether to pad sequences.
346
+ truncation (`bool`, *optional*, defaults to `False`):
347
+ Whether to truncate sequences.
348
+ max_length (`int`, *optional*):
349
+ Maximum length of the output.
350
+ return_tensors (`str`, *optional*):
351
+ The type of tensors to return ("pt", "tf", "np", or None).
352
+ return_dict (`bool`, *optional*, defaults to `False`):
353
+ Whether to return a dictionary with additional information.
354
+ **tokenizer_kwargs:
355
+ Additional keyword arguments passed to the tokenizer.
356
+
357
+ Returns:
358
+ `Union[str, List[int], BatchEncoding]`: The formatted (and optionally tokenized) conversation.
359
+
360
+ Example:
361
+ ```python
362
+ >>> tokenizer = IQuestCoderTokenizer.from_pretrained("path/to/model")
363
+ >>> conversation = [
364
+ ... {"role": "system", "content": "You are a helpful assistant."},
365
+ ... {"role": "user", "content": "Hello!"},
366
+ ... {"role": "assistant", "content": "Hi there! How can I help you today?"},
367
+ ... {"role": "user", "content": "What's the weather like?"},
368
+ ... ]
369
+ >>> tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=False)
370
+ '<|system|>\\nYou are a helpful assistant.\\n</|system|><|user|>\\nHello!\\n</|user|>...'
371
+ ```
372
+ """
373
+ # Use parent class implementation with our template
374
+ return super().apply_chat_template(
375
+ conversation,
376
+ chat_template=chat_template,
377
+ add_generation_prompt=add_generation_prompt,
378
+ tokenize=tokenize,
379
+ padding=padding,
380
+ truncation=truncation,
381
+ max_length=max_length,
382
+ return_tensors=return_tensors,
383
+ return_dict=return_dict,
384
+ **tokenizer_kwargs,
385
+ )
386
+
387
+
388
+ # Try to import and create Fast tokenizer version
389
+ try:
390
+ from transformers import PreTrainedTokenizerFast
391
+ from tokenizers import Tokenizer, decoders, models, normalizers, pre_tokenizers, processors
392
+
393
+ class IQuestCoderTokenizerFast(PreTrainedTokenizerFast):
394
+ """
395
+ Construct a "fast" IQuestCoder tokenizer (backed by HuggingFace's *tokenizers* library).
396
+
397
+ This is a fast implementation of [`IQuestCoderTokenizer`] using the 🤗 Tokenizers library.
398
+
399
+ Args:
400
+ vocab_file (`str`, *optional*):
401
+ Path to the vocabulary file (SentencePiece model).
402
+ tokenizer_file (`str`, *optional*):
403
+ Path to a tokenizer JSON file.
404
+ unk_token (`str`, *optional*, defaults to `"<unk>"`):
405
+ The unknown token.
406
+ bos_token (`str`, *optional*, defaults to `"<s>"`):
407
+ The beginning of sequence token.
408
+ eos_token (`str`, *optional*, defaults to `"</s>"`):
409
+ The end of sequence token.
410
+ pad_token (`str`, *optional*):
411
+ The token used for padding.
412
+ add_bos_token (`bool`, *optional*, defaults to `True`):
413
+ Whether to add a BOS token at the start of sequences.
414
+ add_eos_token (`bool`, *optional*, defaults to `False`):
415
+ Whether to add an EOS token at the end of sequences.
416
+ add_prefix_space (`bool`, *optional*, defaults to `False`):
417
+ Whether to add an initial space to the input.
418
+ use_default_system_prompt (`bool`, *optional*, defaults to `False`):
419
+ Whether to use the default system prompt.
420
+ chat_template (`str`, *optional*):
421
+ A Jinja template for formatting conversations.
422
+
423
+ Example:
424
+ ```python
425
+ >>> from tokenization_iquestcoder import IQuestCoderTokenizerFast
426
+
427
+ >>> tokenizer = IQuestCoderTokenizerFast.from_pretrained("path/to/model")
428
+ >>> tokenizer.encode("Hello, world!")
429
+ [1, 15043, 29892, 3186, 29991]
430
+ ```
431
+ """
432
+
433
+ vocab_files_names = VOCAB_FILES_NAMES
434
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
435
+ max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
436
+ model_input_names = ["input_ids", "attention_mask"]
437
+ slow_tokenizer_class = IQuestCoderTokenizer
438
+
439
+ def __init__(
440
+ self,
441
+ vocab_file=None,
442
+ tokenizer_file=None,
443
+ unk_token="<unk>",
444
+ bos_token="<s>",
445
+ eos_token="</s>",
446
+ pad_token=None,
447
+ add_bos_token=True,
448
+ add_eos_token=False,
449
+ add_prefix_space=False,
450
+ use_default_system_prompt=False,
451
+ chat_template=None,
452
+ **kwargs,
453
+ ):
454
+ self.add_bos_token = add_bos_token
455
+ self.add_eos_token = add_eos_token
456
+ self.add_prefix_space = add_prefix_space
457
+ self.use_default_system_prompt = use_default_system_prompt
458
+
459
+ if chat_template is None:
460
+ chat_template = DEFAULT_CHAT_TEMPLATE
461
+
462
+ super().__init__(
463
+ vocab_file=vocab_file,
464
+ tokenizer_file=tokenizer_file,
465
+ unk_token=unk_token,
466
+ bos_token=bos_token,
467
+ eos_token=eos_token,
468
+ pad_token=pad_token,
469
+ add_bos_token=add_bos_token,
470
+ add_eos_token=add_eos_token,
471
+ add_prefix_space=add_prefix_space,
472
+ use_default_system_prompt=use_default_system_prompt,
473
+ chat_template=chat_template,
474
+ **kwargs,
475
+ )
476
+
477
+ @property
478
+ def can_save_slow_tokenizer(self) -> bool:
479
+ return os.path.isfile(self.vocab_file) if self.vocab_file else False
480
+
481
+ @property
482
+ def default_chat_template(self) -> str:
483
+ """Returns the default chat template."""
484
+ return DEFAULT_CHAT_TEMPLATE
485
+
486
+ def build_inputs_with_special_tokens(
487
+ self,
488
+ token_ids_0: List[int],
489
+ token_ids_1: Optional[List[int]] = None
490
+ ) -> List[int]:
491
+ """Build model inputs with special tokens."""
492
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
493
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
494
+
495
+ output = bos_token_id + token_ids_0 + eos_token_id
496
+
497
+ if token_ids_1 is not None:
498
+ output = output + bos_token_id + token_ids_1 + eos_token_id
499
+
500
+ return output
501
+
502
+ def get_special_tokens_mask(
503
+ self,
504
+ token_ids_0: List[int],
505
+ token_ids_1: Optional[List[int]] = None,
506
+ already_has_special_tokens: bool = False
507
+ ) -> List[int]:
508
+ """Retrieve special tokens mask."""
509
+ if already_has_special_tokens:
510
+ return super().get_special_tokens_mask(
511
+ token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
512
+ )
513
+
514
+ bos_token_id = [1] if self.add_bos_token else []
515
+ eos_token_id = [1] if self.add_eos_token else []
516
+
517
+ if token_ids_1 is None:
518
+ return bos_token_id + ([0] * len(token_ids_0)) + eos_token_id
519
+ return (
520
+ bos_token_id
521
+ + ([0] * len(token_ids_0))
522
+ + eos_token_id
523
+ + bos_token_id
524
+ + ([0] * len(token_ids_1))
525
+ + eos_token_id
526
+ )
527
+
528
+ def create_token_type_ids_from_sequences(
529
+ self,
530
+ token_ids_0: List[int],
531
+ token_ids_1: Optional[List[int]] = None
532
+ ) -> List[int]:
533
+ """Create token type IDs from sequences."""
534
+ bos_token_id = [self.bos_token_id] if self.add_bos_token else []
535
+ eos_token_id = [self.eos_token_id] if self.add_eos_token else []
536
+
537
+ output = [0] * len(bos_token_id + token_ids_0 + eos_token_id)
538
+
539
+ if token_ids_1 is not None:
540
+ output += [1] * len(bos_token_id + token_ids_1 + eos_token_id)
541
+
542
+ return output
543
+
544
+ except ImportError:
545
+ # tokenizers library not available, Fast tokenizer not supported
546
+ IQuestCoderTokenizerFast = None
547
+ logger.info(
548
+ "The `tokenizers` library is not installed. "
549
+ "IQuestCoderTokenizerFast will not be available. "
550
+ "Install it with `pip install tokenizers`."
551
+ )
552
+
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d3be68e090a927f31e0e378d7599b15c206dd47e4a73933775a746cc9c1cd91
3
+ size 1345108
tokenizer_config.json ADDED
@@ -0,0 +1,285 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": false,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": true,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": true,
28
+ "special": true
29
+ },
30
+ "75858": {
31
+ "content": "<CLS>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "75859": {
39
+ "content": "<SEP>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "75860": {
47
+ "content": "<EOD>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ },
54
+ "75861": {
55
+ "content": "<MASK>",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false,
60
+ "special": true
61
+ },
62
+ "75862": {
63
+ "content": "<PAD>",
64
+ "lstrip": false,
65
+ "normalized": false,
66
+ "rstrip": false,
67
+ "single_word": false,
68
+ "special": true
69
+ },
70
+ "75863": {
71
+ "content": "<|im_start|>",
72
+ "lstrip": false,
73
+ "normalized": false,
74
+ "rstrip": false,
75
+ "single_word": false,
76
+ "special": true
77
+ },
78
+ "75864": {
79
+ "content": "<|im_end|>",
80
+ "lstrip": false,
81
+ "normalized": false,
82
+ "rstrip": false,
83
+ "single_word": false,
84
+ "special": true
85
+ },
86
+ "75865": {
87
+ "content": "<|fim_prefix|>",
88
+ "lstrip": false,
89
+ "normalized": false,
90
+ "rstrip": false,
91
+ "single_word": false,
92
+ "special": true
93
+ },
94
+ "75866": {
95
+ "content": "<|fim_middle|>",
96
+ "lstrip": false,
97
+ "normalized": false,
98
+ "rstrip": false,
99
+ "single_word": false,
100
+ "special": true
101
+ },
102
+ "75867": {
103
+ "content": "<|fim_suffix|>",
104
+ "lstrip": false,
105
+ "normalized": false,
106
+ "rstrip": false,
107
+ "single_word": false,
108
+ "special": true
109
+ },
110
+ "75868": {
111
+ "content": "<|fim_pad|>",
112
+ "lstrip": false,
113
+ "normalized": false,
114
+ "rstrip": false,
115
+ "single_word": false,
116
+ "special": true
117
+ },
118
+ "75869": {
119
+ "content": "<|endoftext|>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false,
124
+ "special": true
125
+ },
126
+ "75870": {
127
+ "content": "<|repo_name|>",
128
+ "lstrip": false,
129
+ "normalized": false,
130
+ "rstrip": false,
131
+ "single_word": false,
132
+ "special": true
133
+ },
134
+ "75871": {
135
+ "content": "<|file_sep|>",
136
+ "lstrip": false,
137
+ "normalized": false,
138
+ "rstrip": false,
139
+ "single_word": false,
140
+ "special": true
141
+ },
142
+ "75872": {
143
+ "content": "<think>",
144
+ "lstrip": false,
145
+ "normalized": false,
146
+ "rstrip": false,
147
+ "single_word": false,
148
+ "special": false
149
+ },
150
+ "75873": {
151
+ "content": "</think>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false,
156
+ "special": false
157
+ },
158
+ "75874": {
159
+ "content": "<tools>",
160
+ "lstrip": false,
161
+ "normalized": false,
162
+ "rstrip": false,
163
+ "single_word": false,
164
+ "special": false
165
+ },
166
+ "75875": {
167
+ "content": "</tools>",
168
+ "lstrip": false,
169
+ "normalized": false,
170
+ "rstrip": false,
171
+ "single_word": false,
172
+ "special": false
173
+ },
174
+ "75876": {
175
+ "content": "<tool_call>",
176
+ "lstrip": false,
177
+ "normalized": false,
178
+ "rstrip": false,
179
+ "single_word": false,
180
+ "special": false
181
+ },
182
+ "75877": {
183
+ "content": "</tool_call>",
184
+ "lstrip": false,
185
+ "normalized": false,
186
+ "rstrip": false,
187
+ "single_word": false,
188
+ "special": false
189
+ },
190
+ "75878": {
191
+ "content": "<tool_response>",
192
+ "lstrip": false,
193
+ "normalized": false,
194
+ "rstrip": false,
195
+ "single_word": false,
196
+ "special": false
197
+ },
198
+ "75879": {
199
+ "content": "</tool_response>",
200
+ "lstrip": false,
201
+ "normalized": false,
202
+ "rstrip": false,
203
+ "single_word": false,
204
+ "special": false
205
+ },
206
+ "75880": {
207
+ "content": "<|CLS|>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false,
212
+ "special": true
213
+ },
214
+ "75881": {
215
+ "content": "<|SEP|>",
216
+ "lstrip": false,
217
+ "normalized": false,
218
+ "rstrip": false,
219
+ "single_word": false,
220
+ "special": true
221
+ },
222
+ "75882": {
223
+ "content": "<|EOD|>",
224
+ "lstrip": false,
225
+ "normalized": false,
226
+ "rstrip": false,
227
+ "single_word": false,
228
+ "special": true
229
+ },
230
+ "75883": {
231
+ "content": "<|MASK|>",
232
+ "lstrip": false,
233
+ "normalized": false,
234
+ "rstrip": false,
235
+ "single_word": false,
236
+ "special": true
237
+ },
238
+ "75884": {
239
+ "content": "<|PAD|>",
240
+ "lstrip": false,
241
+ "normalized": false,
242
+ "rstrip": false,
243
+ "single_word": false,
244
+ "special": true
245
+ }
246
+ },
247
+ "additional_special_tokens": [
248
+ "<|CLS|>",
249
+ "<|SEP|>",
250
+ "<|EOD|>",
251
+ "<|MASK|>",
252
+ "<|PAD|>",
253
+ "<|fim_prefix|>",
254
+ "<|fim_middle|>",
255
+ "<|fim_suffix|>",
256
+ "<|im_start|>",
257
+ "<|im_end|>",
258
+ "<|fim_pad|>",
259
+ "<|endoftext|>",
260
+ "<|repo_name|>",
261
+ "<|file_sep|>",
262
+ "<think>",
263
+ "</think>"
264
+ ],
265
+ "auto_map": {
266
+ "AutoTokenizer": [
267
+ "tokenization_iquestcoder.IQuestCoderTokenizer",
268
+ null
269
+ ]
270
+ },
271
+ "bos_token": "<s>",
272
+ "clean_up_tokenization_spaces": false,
273
+ "eos_token": "<|im_end|>",
274
+ "extra_special_tokens": {},
275
+ "legacy": true,
276
+ "model_max_length": 131072,
277
+ "pad_token": "<|endoftext|>",
278
+ "padding_side": "right",
279
+ "sp_model_kwargs": {},
280
+ "split_special_tokens": false,
281
+ "tokenizer_class": "IQuestCoderTokenizer",
282
+ "unk_token": "<unk>",
283
+ "use_default_system_prompt": false,
284
+ "use_fast": false
285
+ }