fred-baseten commited on
Commit
467fc63
·
verified ·
1 Parent(s): 200bef3

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ base_model:
6
+ - Qwen/Qwen3-4B
7
+ pipeline_tag: text-ranking
8
+ tags:
9
+ - finance
10
+ - legal
11
+ - code
12
+ - stem
13
+ - medical
14
+ library_name: sentence-transformers
15
+ ---
16
+
17
+ <img src="https://i.imgur.com/oxvhvQu.png"/>
18
+
19
+ # Releasing zeroentropy/zerank-1-small
20
+
21
+ In search enginers, [rerankers are crucial](https://www.zeroentropy.dev/blog/what-is-a-reranker-and-do-i-need-one) for improving the accuracy of your retrieval system.
22
+
23
+ This 1.7B reranker is the smaller version of our flagship model [zeroentropy/zerank-1](https://huggingface.co/zeroentropy/zerank-1). Though the model is over 2x smaller, it maintains nearly the same standard of performance, continuing to outperform other popular rerankers, and displaying massive accuracy gains over traditional vector search.
24
+
25
+ We release this model under the open-source Apache 2.0 license, in order to support the open-source community and push the frontier of what's possible with open-source models.
26
+
27
+ ## How to Use
28
+
29
+ ```python
30
+ from sentence_transformers import CrossEncoder
31
+
32
+ model = CrossEncoder("zeroentropy/zerank-1-small", trust_remote_code=True)
33
+
34
+ query_documents = [
35
+ ("What is 2+2?", "4"),
36
+ ("What is 2+2?", "The answer is definitely 1 million"),
37
+ ]
38
+
39
+ scores = model.predict(query_documents)
40
+
41
+ print(scores)
42
+ ```
43
+
44
+ The model can also be inferenced using ZeroEntropy's [/models/rerank](https://docs.zeroentropy.dev/api-reference/models/rerank) endpoint.
45
+
46
+ ## Evaluations
47
+
48
+ NDCG@10 scores between `zerank-1-small` and competing closed-source proprietary rerankers. Since we are evaluating rerankers, OpenAI's `text-embedding-3-small` is used as an initial retriever for the Top 100 candidate documents.
49
+
50
+ | Task | Embedding | cohere-rerank-v3.5 | Salesforce/Llama-rank-v1 | **zerank-1-small** | zerank-1 |
51
+ |----------------|-----------|--------------------|--------------------------|----------------|----------|
52
+ | Code | 0.678 | 0.724 | 0.694 | **0.730** | 0.754 |
53
+ | Conversational | 0.250 | 0.571 | 0.484 | **0.556** | 0.596 |
54
+ | Finance | 0.839 | 0.824 | 0.828 | **0.861** | 0.894 |
55
+ | Legal | 0.703 | 0.804 | 0.767 | **0.817** | 0.821 |
56
+ | Medical | 0.619 | 0.750 | 0.719 | **0.773** | 0.796 |
57
+ | STEM | 0.401 | 0.510 | 0.595 | **0.680** | 0.694 |
58
+
59
+ Comparing BM25 and Hybrid Search without and with `zerank-1-small`:
60
+
61
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/67776f9dcd9c9435499eafc8/2GPVHFrI39FspnSNklhsM.png" alt="Description" width="400"/> <img src="https://cdn-uploads.huggingface.co/production/uploads/67776f9dcd9c9435499eafc8/dwYo2D7hoL8QiE8u3yqr9.png" alt="Description" width="400"/>
added_tokens.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</think>": 151668,
3
+ "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
+ "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
+ "<|box_end|>": 151649,
9
+ "<|box_start|>": 151648,
10
+ "<|endoftext|>": 151643,
11
+ "<|file_sep|>": 151664,
12
+ "<|fim_middle|>": 151660,
13
+ "<|fim_pad|>": 151662,
14
+ "<|fim_prefix|>": 151659,
15
+ "<|fim_suffix|>": 151661,
16
+ "<|im_end|>": 151645,
17
+ "<|im_start|>": 151644,
18
+ "<|image_pad|>": 151655,
19
+ "<|object_ref_end|>": 151647,
20
+ "<|object_ref_start|>": 151646,
21
+ "<|quad_end|>": 151651,
22
+ "<|quad_start|>": 151650,
23
+ "<|repo_name|>": 151663,
24
+ "<|video_pad|>": 151656,
25
+ "<|vision_end|>": 151653,
26
+ "<|vision_pad|>": 151654,
27
+ "<|vision_start|>": 151652
28
+ }
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForSequenceClassification"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "modeling_zeranker.ZEConfig"
9
+ },
10
+ "bos_token_id": 151643,
11
+ "eos_token_id": 151645,
12
+ "head_dim": 128,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "id2label": {
16
+ "0": "Yes"
17
+ },
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 6144,
20
+ "label2id": {
21
+ "Yes": 0
22
+ },
23
+ "layer_types": [
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention"
52
+ ],
53
+ "max_position_embeddings": 40960,
54
+ "max_window_layers": 28,
55
+ "model_type": "qwen3",
56
+ "num_attention_heads": 16,
57
+ "num_hidden_layers": 28,
58
+ "num_key_value_heads": 8,
59
+ "rms_norm_eps": 1e-06,
60
+ "rope_scaling": null,
61
+ "rope_theta": 1000000,
62
+ "sliding_window": null,
63
+ "tie_word_embeddings": true,
64
+ "torch_dtype": "float16",
65
+ "transformers_version": "4.53.2",
66
+ "use_cache": true,
67
+ "use_sliding_window": false,
68
+ "vocab_size": 151936
69
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.51.3"
13
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82054da4aaac67d0fa861c3a015661de695d64f37914e859e81c04b36bb5dab9
3
+ size 3441189480
modeling_zeranker.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import CrossEncoder as _CE
2
+
3
+ import math
4
+ from typing import cast, Any
5
+ import types
6
+
7
+ import torch
8
+ from transformers.configuration_utils import PretrainedConfig
9
+ from transformers.models.auto.configuration_auto import AutoConfig
10
+ from transformers.models.auto.modeling_auto import AutoModelForCausalLM
11
+ from transformers.models.auto.tokenization_auto import AutoTokenizer
12
+ from transformers.models.gemma3.modeling_gemma3 import (
13
+ Gemma3ForCausalLM,
14
+ Gemma3ForConditionalGeneration,
15
+ )
16
+ from transformers.models.llama.modeling_llama import LlamaForCausalLM
17
+ from transformers.models.qwen3.modeling_qwen3 import Qwen3ForCausalLM
18
+ from transformers.tokenization_utils_base import BatchEncoding
19
+ from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
20
+
21
+ # pyright: reportUnknownMemberType=false
22
+ # pyright: reportUnknownVariableType=false
23
+
24
+ MODEL_PATH = "zeroentropy/zerank-1-small"
25
+ PER_DEVICE_BATCH_SIZE_TOKENS = 15_000
26
+ global_device = (
27
+ torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
28
+ )
29
+
30
+
31
+ def format_pointwise_datapoints(
32
+ tokenizer: PreTrainedTokenizerFast,
33
+ query_documents: list[tuple[str, str]],
34
+ ) -> BatchEncoding:
35
+ input_texts: list[str] = []
36
+ for query, document in query_documents:
37
+ system_prompt = f"""
38
+ {query}
39
+ """.strip()
40
+ user_message = f"""
41
+ {document}
42
+ """.strip()
43
+ messages = [
44
+ {"role": "system", "content": system_prompt},
45
+ {"role": "user", "content": user_message},
46
+ ]
47
+ input_text = tokenizer.apply_chat_template(
48
+ messages,
49
+ tokenize=False,
50
+ add_generation_prompt=True,
51
+ )
52
+ assert isinstance(input_text, str)
53
+ input_texts.append(input_text)
54
+
55
+ batch_inputs = tokenizer(
56
+ input_texts,
57
+ padding=True,
58
+ return_tensors="pt",
59
+ )
60
+ return batch_inputs
61
+
62
+
63
+ def load_model(
64
+ device: torch.device | None = None,
65
+ ) -> tuple[
66
+ PreTrainedTokenizerFast,
67
+ LlamaForCausalLM
68
+ | Gemma3ForConditionalGeneration
69
+ | Gemma3ForCausalLM
70
+ | Qwen3ForCausalLM,
71
+ ]:
72
+ if device is None:
73
+ device = global_device
74
+
75
+ config = AutoConfig.from_pretrained(MODEL_PATH)
76
+ assert isinstance(config, PretrainedConfig)
77
+
78
+ model = AutoModelForCausalLM.from_pretrained(
79
+ MODEL_PATH,
80
+ torch_dtype="auto",
81
+ quantization_config=None,
82
+ device_map={"": device},
83
+ )
84
+ if config.model_type == "llama":
85
+ model.config.attn_implementation = "flash_attention_2"
86
+ assert isinstance(
87
+ model,
88
+ LlamaForCausalLM
89
+ | Gemma3ForConditionalGeneration
90
+ | Gemma3ForCausalLM
91
+ | Qwen3ForCausalLM,
92
+ )
93
+
94
+ tokenizer = cast(
95
+ AutoTokenizer,
96
+ AutoTokenizer.from_pretrained(
97
+ MODEL_PATH,
98
+ padding_side="right",
99
+ ),
100
+ )
101
+ assert isinstance(tokenizer, PreTrainedTokenizerFast)
102
+
103
+ if tokenizer.pad_token is None:
104
+ tokenizer.pad_token = tokenizer.eos_token
105
+
106
+ return tokenizer, model
107
+
108
+
109
+ def predict(
110
+ self,
111
+ query_documents: list[tuple[str, str]] | None = None,
112
+ *,
113
+ sentences: Any = None,
114
+ batch_size: Any = None,
115
+ show_progress_bar: Any = None,
116
+ activation_fn: Any = None,
117
+ apply_softmax: Any = None,
118
+ convert_to_numpy: Any = None,
119
+ convert_to_tensor: Any = None,
120
+ ) -> list[float]:
121
+ if query_documents is None:
122
+ if sentences is None:
123
+ raise ValueError("query_documents or sentences must be provided")
124
+ query_documents = [[sentence[0], sentence[1]] for sentence in sentences]
125
+
126
+ if not hasattr(self, "inner_model"):
127
+ self.inner_tokenizer, self.inner_model = load_model(global_device)
128
+ self.inner_model.gradient_checkpointing_enable()
129
+ self.inner_model.eval()
130
+ self.inner_yes_token_id = self.inner_tokenizer.encode(
131
+ "Yes", add_special_tokens=False
132
+ )[0]
133
+
134
+ model = self.inner_model
135
+ tokenizer = self.inner_tokenizer
136
+
137
+ query_documents = [
138
+ (query[:2_000], document[:10_000]) for query, document in query_documents
139
+ ]
140
+ # Sort
141
+ permutation = list(range(len(query_documents)))
142
+ permutation.sort(
143
+ key=lambda i: -len(query_documents[i][0]) - len(query_documents[i][1])
144
+ )
145
+ query_documents = [query_documents[i] for i in permutation]
146
+
147
+ # Extract document batches from this line of datapoints
148
+ max_length = 0
149
+ batches: list[list[tuple[str, str]]] = []
150
+ for query, document in query_documents:
151
+ if (
152
+ len(batches) == 0
153
+ or (len(batches[-1]) + 1) * max(max_length, len(query) + len(document))
154
+ > PER_DEVICE_BATCH_SIZE_TOKENS
155
+ ):
156
+ batches.append([])
157
+ max_length = 0
158
+
159
+ batches[-1].append((query, document))
160
+ max_length = max(max_length, 20 + len(query) + len(document))
161
+
162
+ # Inference all of the document batches
163
+ all_logits: list[float] = []
164
+ for batch in batches:
165
+ batch_inputs = format_pointwise_datapoints(
166
+ tokenizer,
167
+ batch,
168
+ )
169
+
170
+ batch_inputs = batch_inputs.to(global_device)
171
+
172
+ try:
173
+ outputs = model(**batch_inputs, use_cache=False)
174
+ except torch.OutOfMemoryError:
175
+ print(f"GPU OOM! {torch.cuda.memory_reserved()}")
176
+ torch.cuda.empty_cache()
177
+ print(f"GPU After OOM Cache Clear: {torch.cuda.memory_reserved()}")
178
+ outputs = model(**batch_inputs, use_cache=False)
179
+
180
+ # Extract the logits
181
+ logits = cast(torch.Tensor, outputs.logits)
182
+ attention_mask = cast(torch.Tensor, batch_inputs.attention_mask)
183
+ last_positions = attention_mask.sum(dim=1) - 1
184
+
185
+ batch_size = logits.shape[0]
186
+ batch_indices = torch.arange(batch_size, device=global_device)
187
+ last_logits = logits[batch_indices, last_positions]
188
+
189
+ yes_logits = last_logits[:, self.inner_yes_token_id]
190
+ all_logits.extend([float(logit) / 5.0 for logit in yes_logits])
191
+
192
+ def sigmoid(x: float) -> float:
193
+ return 1 / (1 + math.exp(-x))
194
+
195
+ scores = [sigmoid(logit) for logit in all_logits]
196
+
197
+ # Unsort by indices
198
+ scores = [score for _, score in sorted(zip(permutation, scores, strict=True))]
199
+
200
+ return scores
201
+
202
+
203
+ def to_device(self: _CE, new_device: torch.device) -> None:
204
+ global global_device
205
+ global_device = new_device
206
+
207
+
208
+ _CE.predict = predict
209
+
210
+ from transformers import Qwen3Config
211
+
212
+ ZEConfig = Qwen3Config
213
+
214
+ _CE.to = to_device
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
+ }
213
+ },
214
+ "additional_special_tokens": [
215
+ "<|im_start|>",
216
+ "<|im_end|>",
217
+ "<|object_ref_start|>",
218
+ "<|object_ref_end|>",
219
+ "<|box_start|>",
220
+ "<|box_end|>",
221
+ "<|quad_start|>",
222
+ "<|quad_end|>",
223
+ "<|vision_start|>",
224
+ "<|vision_end|>",
225
+ "<|vision_pad|>",
226
+ "<|image_pad|>",
227
+ "<|video_pad|>"
228
+ ],
229
+ "bos_token": null,
230
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {{- messages[0].content + '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n {%- set ns.multi_step_tool = false %}\n {%- set ns.last_query_index = index %}\n {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if loop.last or (not loop.last and reasoning_content) %}\n {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content }}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n {%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n\\n' }}\n {%- endif %}\n{%- endif %}",
231
+ "clean_up_tokenization_spaces": false,
232
+ "eos_token": "<|im_end|>",
233
+ "errors": "replace",
234
+ "extra_special_tokens": {},
235
+ "model_max_length": 131072,
236
+ "pad_token": "<|endoftext|>",
237
+ "padding_side": "right",
238
+ "split_special_tokens": false,
239
+ "tokenizer_class": "Qwen2Tokenizer",
240
+ "unk_token": null
241
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff