Bittoby1040 commited on
Commit
18f2d16
·
verified ·
1 Parent(s): aedf7b8

Clean repository before upload

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chat_template.jinja DELETED
@@ -1,112 +0,0 @@
1
- {%- macro render_content(msg) -%}
2
- {%- set c = msg.get('content') -%}
3
- {%- if c is string -%}
4
- {{ c }}
5
- {%- elif c is not none -%}
6
- {% for content in c -%}
7
- {% if content['type'] == 'image' or content['type'] == 'image_url' -%}
8
- <|media_begin|>image<|media_content|><|media_pad|><|media_end|>
9
- {% elif content['type'] == 'video' or content['type']== 'video_url'-%}
10
- <|kimi_k25_video_placeholder|>
11
- {% else -%}
12
- {{ content['text'] }}
13
- {%- endif -%}
14
- {%- endfor -%}
15
- {%- endif -%}
16
- {%- endmacro -%}
17
-
18
- {% macro set_roles(message) -%}
19
- {%- set role_name = message.get('name') or message['role'] -%}
20
- {%- if message['role'] == 'user' -%}
21
- <|im_user|>{{role_name}}<|im_middle|>
22
- {%- elif message['role'] == 'assistant' -%}
23
- <|im_assistant|>{{role_name}}<|im_middle|>
24
- {%- else -%}
25
- <|im_system|>{{role_name}}<|im_middle|>
26
- {%- endif -%}
27
- {%- endmacro -%}
28
-
29
-
30
- {%- macro render_toolcalls(message) -%}
31
- <|tool_calls_section_begin|>
32
- {%- for tool_call in message['tool_calls'] -%}
33
- {%- set formatted_id = tool_call['id'] -%}
34
- <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>
35
- {%- endfor -%}
36
- <|tool_calls_section_end|>
37
- {%- endmacro -%}
38
-
39
-
40
- {%- set preserve_thinking = preserve_thinking | default(false) -%}
41
- {# Find last non-tool-call assistant message. If preserve_thinking, keep -1 so hist is empty and all msgs use suffix (retain reasoning). #}
42
- {%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}
43
- {%- if not preserve_thinking -%}
44
- {%- for idx in range(messages|length-1, -1, -1) -%}
45
- {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}
46
- {%- set ns.last_non_tool_call_assistant_msg = idx -%}
47
- {%- break -%}
48
- {%- endif -%}
49
- {%- endfor -%}
50
- {%- endif -%}
51
-
52
- {# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}
53
- {%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}
54
- {%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}
55
-
56
- {%- if tools -%}
57
- {%- if tools_ts_str -%}
58
- <|im_system|>tool_declare<|im_middle|>{{ tools_ts_str }}<|im_end|>
59
- {%- else -%}
60
- <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>
61
- {%- endif -%}
62
- {%- endif -%}
63
-
64
-
65
- {%- for message in hist_msgs -%}
66
- {{set_roles(message)}}
67
- {%- if message['role'] == 'assistant' -%}
68
- <think></think>{{render_content(message)}}
69
- {%- if message.get('tool_calls') -%}
70
- {{render_toolcalls(message)}}
71
- {%- endif -%}
72
- {%- elif message['role'] == 'tool' -%}
73
- {%- set tool_call_id = message.tool_call_id -%}
74
- ## Return of {{ tool_call_id }}
75
- {{render_content(message)}}
76
- {%- elif message['content'] is not none -%}
77
- {{render_content(message)}}
78
- {%- endif -%}
79
- <|im_end|>
80
- {%- endfor -%}
81
-
82
- {%- for message in suffix_msgs -%}
83
- {{set_roles(message)}}
84
- {%- if message['role'] == 'assistant' -%}
85
- {%- if thinking is defined and thinking is false and preserve_thinking is false -%}
86
- <think></think>{{render_content(message)}}
87
- {%- else -%}
88
- {%- set rc = message.get('reasoning', message.get('reasoning_content', '')) -%}
89
- <think>{{rc}}</think>{{render_content(message)}}
90
- {%- endif -%}
91
- {%- if message.get('tool_calls') -%}
92
- {{render_toolcalls(message)}}
93
- {%- endif -%}
94
- {%- elif message['role'] == 'tool' -%}
95
- {%- set tool_call_id = message.tool_call_id -%}
96
- ## Return of {{ tool_call_id }}
97
- {{render_content(message)}}
98
- {%- elif message['content'] is not none -%}
99
- {{render_content(message)}}
100
- {%- endif -%}
101
- <|im_end|>
102
- {%- endfor -%}
103
-
104
-
105
- {%- if add_generation_prompt -%}
106
- <|im_assistant|>assistant<|im_middle|>
107
- {%- if thinking is defined and thinking is false -%}
108
- <think></think>
109
- {%- else -%}
110
- <think>
111
- {%- endif -%}
112
- {%- endif -%}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,55 +0,0 @@
1
- {
2
- "architectures": [
3
- "DeepseekV3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "aux_loss_alpha": 0.001,
8
- "bos_token_id": 163584,
9
- "dtype": "float16",
10
- "eos_token_id": 163585,
11
- "ep_size": 1,
12
- "first_k_dense_replace": 1,
13
- "head_dim": 64,
14
- "hidden_act": "silu",
15
- "hidden_size": 2048,
16
- "initializer_range": 0.02,
17
- "intermediate_size": 11264,
18
- "kv_lora_rank": 512,
19
- "max_position_embeddings": 131072,
20
- "model_type": "deepseek_v3",
21
- "moe_intermediate_size": 1408,
22
- "moe_layer_freq": 1,
23
- "n_group": 1,
24
- "n_routed_experts": 64,
25
- "n_shared_experts": 2,
26
- "norm_topk_prob": true,
27
- "num_attention_heads": 16,
28
- "num_experts_per_tok": 6,
29
- "num_hidden_layers": 27,
30
- "num_key_value_heads": 16,
31
- "num_nextn_predict_layers": 1,
32
- "num_shared_experts": 2,
33
- "pad_token_id": 163839,
34
- "pretraining_tp": 1,
35
- "q_lora_rank": null,
36
- "qk_head_dim": 192,
37
- "qk_nope_head_dim": 128,
38
- "qk_rope_head_dim": 64,
39
- "rms_norm_eps": 1e-05,
40
- "rope_interleave": true,
41
- "rope_parameters": {
42
- "rope_theta": 800000.0,
43
- "rope_type": "default"
44
- },
45
- "routed_scaling_factor": 2.446,
46
- "scoring_func": "sigmoid",
47
- "seq_aux": true,
48
- "tie_word_embeddings": false,
49
- "topk_group": 1,
50
- "topk_method": "noaux_tc",
51
- "transformers_version": "5.8.1",
52
- "use_cache": false,
53
- "v_head_dim": 128,
54
- "vocab_size": 163840
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "bos_token_id": 163584,
4
- "eos_token_id": 163585,
5
- "pad_token_id": 163839,
6
- "transformers_version": "5.8.1"
7
- }
 
 
 
 
 
 
 
 
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0e0b6791300386330613a41b9fe4632ec4c99c7cd1ee51e9d9ec9a3523fa64c
3
- size 31920888072
 
 
 
 
tiktoken.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6c497a7469b33ced9c38afb1ad6e47f03f5e5dc05f15930799210ec050c5103
3
- size 2795286
 
 
 
 
tokenization_kimi.py DELETED
@@ -1,353 +0,0 @@
1
- import os
2
- from collections import OrderedDict
3
- from logging import getLogger
4
- from pathlib import Path
5
- from shutil import copyfile
6
- from typing import Any, Dict, Iterator, List, Optional, Tuple, Union, cast
7
-
8
- import tiktoken
9
- from tiktoken.load import load_tiktoken_bpe
10
- from tokenizers import AddedToken
11
- from transformers.convert_slow_tokenizer import bytes_to_unicode
12
- from transformers.tokenization_utils import PreTrainedTokenizer
13
-
14
- from .tool_declaration_ts import encode_tools_to_typescript_style
15
-
16
- logger = getLogger(__name__)
17
- VOCAB_FILES_NAMES = {"vocab_file": "tiktoken.model"}
18
-
19
-
20
- class TikTokenTokenizer(PreTrainedTokenizer):
21
- """
22
- Tokenizing and encoding/decoding text using the Tiktoken tokenizer. See megatron/tokenizer/tiktoken_tokenizer.py.
23
-
24
- This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
25
- this superclass for more information regarding those methods.
26
-
27
- Args:
28
- vocab_file (`str`):
29
- The path to the Tiktoken model file.
30
- bos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<|begin_of_text|>",`):
31
- The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.
32
- eos_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<|end_of_text|>"`):
33
- The end of sequence token.
34
- unk_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<|reserved_special_token_249|>"`):
35
- The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
36
- token instead. The second to last item in special_tokens.
37
- pad_token (`str` or `tokenizers.AddedToken`, *optional*, defaults to `"<|reserved_special_token_250|>"`):
38
- The token used for padding, for example when batching sequences of different lengths.
39
- additional_special_tokens (list of `str`, *optional*):
40
- A tuple or a list of additional tokens, which will be marked as `special`, meaning that they will be
41
- skipped when decoding if `skip_special_tokens` is set to `True`.
42
- """
43
-
44
- vocab_files_names = VOCAB_FILES_NAMES
45
-
46
- model_input_names = ["input_ids", "attention_mask"]
47
-
48
- special_tokens: Dict[str, int]
49
-
50
- num_reserved_special_tokens = 256
51
-
52
- pat_str = "|".join([
53
- r"""[\p{Han}]+""",
54
- r"""[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?""",
55
- r"""[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?""",
56
- r"""\p{N}{1,3}""",
57
- r""" ?[^\s\p{L}\p{N}]+[\r\n]*""",
58
- r"""\s*[\r\n]+""",
59
- r"""\s+(?!\S)""",
60
- r"""\s+""",
61
- ])
62
-
63
- def __init__(
64
- self,
65
- vocab_file,
66
- bos_token: Union[str, AddedToken] = "[BOS]",
67
- eos_token: Union[str, AddedToken] = "[EOS]",
68
- unk_token: Union[str, AddedToken, None] = None,
69
- pad_token: Union[str, AddedToken, None] = None,
70
- additional_special_tokens: List[str] = None,
71
- added_tokens_decoder: Optional[dict] = None,
72
- **kwargs,
73
- ):
74
- assert os.path.isfile(vocab_file), vocab_file
75
-
76
- if additional_special_tokens is None:
77
- additional_special_tokens = [
78
- "<|im_end|>",
79
- "<|im_user|>",
80
- "<|im_assistant|>",
81
- "<|start_header_id|>",
82
- "<|end_header_id|>",
83
- "[EOT]",
84
- "<|im_system|>",
85
- "<|im_middle|>",
86
- ]
87
-
88
- if added_tokens_decoder:
89
- special_tokens_mapping = {
90
- i: added_tokens_decoder[i].content
91
- for i in added_tokens_decoder
92
- }
93
- else:
94
- special_tokens_mapping = {}
95
-
96
- self.vocab_file = vocab_file
97
- mergeable_ranks = load_tiktoken_bpe(vocab_file)
98
- num_base_tokens = len(mergeable_ranks)
99
- self.special_tokens = {
100
- special_tokens_mapping.get(i, f"<|reserved_token_{i}|>"): i
101
- for i in range(num_base_tokens, num_base_tokens +
102
- self.num_reserved_special_tokens)
103
- }
104
-
105
- self.model = tiktoken.Encoding(
106
- name=Path(vocab_file).name,
107
- pat_str=self.pat_str,
108
- mergeable_ranks=mergeable_ranks,
109
- special_tokens=self.special_tokens,
110
- )
111
- logger.info(f"Reloaded tiktoken model from {vocab_file}")
112
-
113
- self.n_words: int = self.model.n_vocab
114
- # BOS / EOS token IDs
115
- self.bos_id: int = self.special_tokens[str(bos_token)]
116
- self.eos_id: int = self.special_tokens[str(eos_token)]
117
- logger.info(
118
- f"#words: {self.n_words} - BOS ID: {self.bos_id} - EOS ID: {self.eos_id}"
119
- )
120
-
121
- self.pad_id: int = self.special_tokens[str(pad_token)]
122
- self.unk_id: int = self.special_tokens[str(unk_token)]
123
-
124
- self.byte_encoder = bytes_to_unicode()
125
- self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
126
-
127
- self.decoder = {}
128
- for i in range(self.n_words):
129
- # Taken from https://gist.github.com/xenova/a452a6474428de0182b17605a98631ee
130
- decoding = ''.join([
131
- self.byte_encoder[ord(char)] for char in
132
- self.model.decode_single_token_bytes(i).decode('latin-1')
133
- ])
134
- self.decoder[i] = decoding
135
-
136
- self.encoder = {}
137
- for i in range(self.n_words):
138
- if i in self.decoder:
139
- self.encoder[self.decoder[i]] = i
140
-
141
- self._token_config_cache = OrderedDict()
142
- self._cache_max_size = 128
143
-
144
- super().__init__(
145
- bos_token=bos_token,
146
- eos_token=eos_token,
147
- unk_token=unk_token,
148
- pad_token=pad_token,
149
- additional_special_tokens=additional_special_tokens,
150
- added_tokens_decoder=added_tokens_decoder,
151
- **kwargs,
152
- )
153
- self.all_special_ids_set = set(self.all_special_ids)
154
-
155
- def encode(self,
156
- text: str,
157
- allow_special_tokens: bool = True,
158
- **kwargs) -> List[int]:
159
- """
160
- Encodes a string into a list of token IDs.
161
-
162
- Args:
163
- text (str): The input string to be encoded.
164
-
165
- Returns:
166
- list[int]: A list of token IDs.
167
- """
168
- # If there are other args, we should call super().encode because there are a lot of code
169
- # to handle those args. supper().encode finally will call _tokenize and _convert_token_to_id.
170
- # NOTE: our encode method is not compatible with the super().encode method,
171
- # e.g. split_special_tokens' default is True in our encode method.
172
- if len(kwargs) > 0:
173
- logger.warning(f"Calling super().encode with {kwargs}")
174
- return super().encode(text, **kwargs)
175
-
176
- assert type(text) is str
177
-
178
- # The tiktoken tokenizer can handle <=400k chars without
179
- # pyo3_runtime.PanicException.
180
- TIKTOKEN_MAX_ENCODE_CHARS = 400_000
181
-
182
- # https://github.com/openai/tiktoken/issues/195
183
- # Here we iterate over subsequences and split if we exceed the limit
184
- # of max consecutive non-whitespace or whitespace characters.
185
- MAX_NO_WHITESPACES_CHARS = 25_000
186
-
187
- texts = self.pre_tokenizer_process(text)
188
-
189
- all_substrs = []
190
- for text in texts:
191
- substrs = (
192
- substr for i in range(0, len(text), TIKTOKEN_MAX_ENCODE_CHARS)
193
- for substr in self._split_whitespaces_or_nonwhitespaces(
194
- text[i:i +
195
- TIKTOKEN_MAX_ENCODE_CHARS], MAX_NO_WHITESPACES_CHARS))
196
- all_substrs.extend(substrs)
197
-
198
- t: List[int] = []
199
- for substr in all_substrs:
200
- if allow_special_tokens:
201
- t.extend(
202
- # we should consider special token as a common token
203
- self.model.encode(
204
- substr,
205
- allowed_special="all",
206
- ))
207
- else:
208
- t.extend(
209
- # we should consider special token as a common token
210
- self.model.encode(
211
- substr,
212
- disallowed_special=(),
213
- ))
214
-
215
- return t
216
-
217
- def decode(self, token_ids: Union[int, List[int]], **kwargs) -> str:
218
- """
219
- Decodes a list of token IDs into a string.
220
-
221
- Args:
222
- token_ids (List[int]): The list of token IDs to be decoded.
223
-
224
- Returns:
225
- str: The decoded string.
226
- """
227
- # If there are other args, we should call super().decode because there are a lot of code
228
- # to handle those args. supper().encode finally will call convert_tokens_to_string and _convert_id_to_token.
229
- if len(kwargs) > 0:
230
- return super().decode(token_ids, **kwargs)
231
-
232
- if type(token_ids) is int:
233
- token_ids = [token_ids]
234
-
235
- return self.model.decode(cast(List[int], token_ids))
236
-
237
- @staticmethod
238
- def _split_whitespaces_or_nonwhitespaces(
239
- s: str, max_consecutive_slice_len: int) -> Iterator[str]:
240
- """
241
- Splits the string `s` so that each substring contains no more than `max_consecutive_slice_len`
242
- consecutive whitespaces or consecutive non-whitespaces.
243
- """
244
- current_slice_len = 0
245
- current_slice_is_space = s[0].isspace() if len(s) > 0 else False
246
- slice_start = 0
247
-
248
- for i in range(len(s)):
249
- is_now_space = s[i].isspace()
250
-
251
- if current_slice_is_space ^ is_now_space:
252
- current_slice_len = 1
253
- current_slice_is_space = is_now_space
254
- else:
255
- current_slice_len += 1
256
- if current_slice_len > max_consecutive_slice_len:
257
- yield s[slice_start:i]
258
- slice_start = i
259
- current_slice_len = 1
260
- yield s[slice_start:]
261
-
262
- def pre_tokenizer_process(self, text: str) -> List[str]:
263
- """
264
- pre-tokenizes the input text into a list of tokens.
265
- This method is used to split the input text into smaller chunks for internal processing.
266
- """
267
- return [text]
268
-
269
- """ ----- Below are the abstract methods required by PreTrainedTokenizer ----- """
270
-
271
- @property
272
- def vocab_size(self) -> int:
273
- return self.n_words
274
-
275
- def get_vocab(self) -> Dict[str, int]:
276
- return self.encoder
277
-
278
- def _tokenize(self, text: str, **kwargs) -> List[str]:
279
- return [self.decoder[t] for t in self.encode(text)]
280
-
281
- def _convert_token_to_id(self, token: str) -> int:
282
- return self.encoder.get(token, self.unk_id)
283
-
284
- def _convert_id_to_token(self, index: int) -> str:
285
- return self.decoder.get(index)
286
-
287
- @staticmethod
288
- def clean_up_tokenization(out_string: str) -> str:
289
- return out_string
290
-
291
- def convert_tokens_to_string(self, tokens: List[str]) -> str:
292
- text = ''.join(tokens)
293
- text = bytearray([self.byte_decoder[c]
294
- for c in text]).decode('utf-8', 'replace')
295
- return text
296
-
297
- def save_vocabulary(self,
298
- save_directory: str,
299
- filename_prefix: Optional[str] = None) -> Tuple[str]:
300
- if not os.path.isdir(save_directory):
301
- raise ValueError(
302
- f"vocabulary path ({save_directory}) should be a directory")
303
- out_vocab_file = os.path.join(
304
- save_directory,
305
- (filename_prefix + "-" if filename_prefix else "") +
306
- VOCAB_FILES_NAMES["vocab_file"])
307
-
308
- if os.path.abspath(self.vocab_file) != os.path.abspath(
309
- out_vocab_file) and os.path.isfile(self.vocab_file):
310
- copyfile(self.vocab_file, out_vocab_file)
311
-
312
- return (out_vocab_file, )
313
-
314
- def apply_chat_template(self,
315
- conversation,
316
- tools: Optional[list[dict]] = None,
317
- tokenize: bool = False,
318
- add_generation_prompt: bool = True,
319
- thinking: bool = True,
320
- preserve_thinking: bool = False,
321
- **kwargs):
322
-
323
- tools = deep_sort_dict(tools)
324
-
325
- # Convert tools to TypeScript style string if tools are provided
326
- tools_ts_str = None
327
- if tools:
328
- try:
329
- tools_ts_str = encode_tools_to_typescript_style(tools)
330
-
331
- except Exception as e:
332
- print(f"Failed to convert tools to TypeScript style: {e}")
333
- tools_ts_str = None
334
-
335
- # Store the TypeScript string in kwargs so it can be accessed by the template
336
- if tools_ts_str is not None:
337
- kwargs['tools_ts_str'] = tools_ts_str
338
- return super().apply_chat_template(
339
- conversation,
340
- tools=tools,
341
- tokenize=tokenize,
342
- add_generation_prompt=add_generation_prompt,
343
- thinking=thinking,
344
- preserve_thinking=preserve_thinking,
345
- **kwargs)
346
-
347
-
348
- def deep_sort_dict(obj: Any) -> Any:
349
- if isinstance(obj, dict):
350
- return {k: deep_sort_dict(v) for k, v in sorted(obj.items())}
351
- if isinstance(obj, list):
352
- return [deep_sort_dict(item) for item in obj]
353
- return obj
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json DELETED
@@ -1,214 +0,0 @@
1
- {
2
- "added_tokens_decoder": {
3
- "163584": {
4
- "content": "[BOS]",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false,
9
- "special": true
10
- },
11
- "163585": {
12
- "content": "[EOS]",
13
- "lstrip": false,
14
- "normalized": false,
15
- "rstrip": false,
16
- "single_word": false,
17
- "special": true
18
- },
19
- "163586": {
20
- "content": "<|im_end|>",
21
- "lstrip": false,
22
- "normalized": false,
23
- "rstrip": false,
24
- "single_word": false,
25
- "special": true
26
- },
27
- "163587": {
28
- "content": "<|im_user|>",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "163588": {
36
- "content": "<|im_assistant|>",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "163590": {
44
- "content": "<|start_header_id|>",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "163591": {
52
- "content": "<|end_header_id|>",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "163593": {
60
- "content": "[EOT]",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "163594": {
68
- "content": "<|im_system|>",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "163595": {
76
- "content": "<|tool_calls_section_begin|>",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": false
82
- },
83
- "163596": {
84
- "content": "<|tool_calls_section_end|>",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": false
90
- },
91
- "163597": {
92
- "content": "<|tool_call_begin|>",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": false
98
- },
99
- "163598": {
100
- "content": "<|tool_call_argument_begin|>",
101
- "lstrip": false,
102
- "normalized": false,
103
- "rstrip": false,
104
- "single_word": false,
105
- "special": false
106
- },
107
- "163599": {
108
- "content": "<|tool_call_end|>",
109
- "lstrip": false,
110
- "normalized": false,
111
- "rstrip": false,
112
- "single_word": false,
113
- "special": false
114
- },
115
- "163601": {
116
- "content": "<|im_middle|>",
117
- "lstrip": false,
118
- "normalized": false,
119
- "rstrip": false,
120
- "single_word": false,
121
- "special": true
122
- },
123
- "163602": {
124
- "content": "<|media_begin|>",
125
- "lstrip": false,
126
- "normalized": false,
127
- "rstrip": false,
128
- "single_word": false,
129
- "special": true
130
- },
131
- "163603": {
132
- "content": "<|media_content|>",
133
- "lstrip": false,
134
- "normalized": false,
135
- "rstrip": false,
136
- "single_word": false,
137
- "special": true
138
- },
139
- "163604": {
140
- "content": "<|media_end|>",
141
- "lstrip": false,
142
- "normalized": false,
143
- "rstrip": false,
144
- "single_word": false,
145
- "special": true
146
- },
147
- "163605": {
148
- "content": "<|media_pad|>",
149
- "lstrip": false,
150
- "normalized": false,
151
- "rstrip": false,
152
- "single_word": false,
153
- "special": true
154
- },
155
- "163606": {
156
- "content": "<think>",
157
- "lstrip": false,
158
- "normalized": false,
159
- "rstrip": false,
160
- "single_word": false,
161
- "special": false
162
- },
163
- "163607": {
164
- "content": "</think>",
165
- "lstrip": false,
166
- "normalized": false,
167
- "rstrip": false,
168
- "single_word": false,
169
- "special": false
170
- },
171
- "163838": {
172
- "content": "[UNK]",
173
- "lstrip": false,
174
- "normalized": false,
175
- "rstrip": false,
176
- "single_word": false,
177
- "special": true
178
- },
179
- "163839": {
180
- "content": "[PAD]",
181
- "lstrip": false,
182
- "normalized": false,
183
- "rstrip": false,
184
- "single_word": false,
185
- "special": true
186
- }
187
- },
188
- "auto_map": {
189
- "AutoTokenizer": [
190
- "tokenization_kimi.TikTokenTokenizer",
191
- null
192
- ]
193
- },
194
- "backend": "custom",
195
- "bos_token": "[BOS]",
196
- "clean_up_tokenization_spaces": false,
197
- "eos_token": "[EOS]",
198
- "extra_special_tokens": [
199
- "<|im_end|>",
200
- "<|im_user|>",
201
- "<|im_assistant|>",
202
- "<|start_header_id|>",
203
- "<|end_header_id|>",
204
- "[EOT]",
205
- "<|im_system|>",
206
- "<|im_middle|>"
207
- ],
208
- "is_local": false,
209
- "local_files_only": false,
210
- "model_max_length": 1000000000000000019884624838656,
211
- "pad_token": "[PAD]",
212
- "tokenizer_class": "TikTokenTokenizer",
213
- "unk_token": "[UNK]"
214
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tool_declaration_ts.py DELETED
@@ -1,479 +0,0 @@
1
- """
2
- Encode structured tool declaration to typescript style string.
3
- """
4
- import dataclasses
5
- import json
6
- import logging
7
- from collections.abc import Sequence
8
- from typing import Any
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
- _TS_INDENT = " "
13
- _TS_FIELD_DELIMITER = ",\n"
14
-
15
-
16
- class _SchemaRegistry:
17
- """Registry for schema definitions to handle $ref resolution"""
18
-
19
- def __init__(self):
20
- self.definitions = {}
21
- self.has_self_ref = False
22
-
23
- def register_definitions(self, defs: dict[str, Any]):
24
- """Register schema definitions from $defs section"""
25
- if not defs:
26
- return
27
- for def_name, def_schema in defs.items():
28
- self.definitions[def_name] = def_schema
29
-
30
- def resolve_ref(self, ref: str) -> dict[str, Any]:
31
- """Resolve a reference to its schema definition"""
32
- if ref == "#":
33
- self.has_self_ref = True
34
- return {"$self_ref": True}
35
- elif ref.startswith("#/$defs/"):
36
- def_name = ref.split("/")[-1]
37
- if def_name not in self.definitions:
38
- raise ValueError(f"Reference not found: {ref}")
39
- return self.definitions[def_name]
40
- else:
41
- raise ValueError(f"Unsupported reference format: {ref}")
42
-
43
-
44
- def _format_description(description: str, indent: str = "") -> str:
45
- return "\n".join([
46
- f"{indent}// {line}" if line else ""
47
- for line in description.split("\n")
48
- ])
49
-
50
-
51
- class _BaseType:
52
- description: str
53
- constraints: dict[str, Any]
54
-
55
- def __init__(
56
- self,
57
- extra_props: dict[str, Any],
58
- *,
59
- allowed_constraint_keys: Sequence[str] = (),
60
- ):
61
- self.description = extra_props.get("description", "")
62
- self.constraints = {
63
- k: v
64
- for k, v in extra_props.items() if k in allowed_constraint_keys
65
- }
66
-
67
- def to_typescript_style(self, indent: str = "") -> str:
68
- raise NotImplementedError
69
-
70
- def format_docstring(self, indent: str) -> str:
71
- lines = []
72
- if self.description:
73
- lines.append(_format_description(self.description, indent))
74
- if self.constraints:
75
- constraints_str = ", ".join(f"{k}: {v}" for k, v in sorted(
76
- self.constraints.items(), key=lambda kv: kv[0]))
77
- lines.append(f"{indent}// {constraints_str}")
78
-
79
- return "".join(x + "\n" for x in lines)
80
-
81
-
82
- class _ParameterTypeScalar(_BaseType):
83
- type: str
84
-
85
- def __init__(self, type: str, extra_props: dict[str, Any] | None = None):
86
- self.type = type
87
-
88
- allowed_constraint_keys: list[str] = []
89
- if self.type == "string":
90
- allowed_constraint_keys = ["maxLength", "minLength", "pattern"]
91
- elif self.type in ("number", "integer"):
92
- allowed_constraint_keys = ["maximum", "minimum"]
93
-
94
- super().__init__(extra_props or {},
95
- allowed_constraint_keys=allowed_constraint_keys)
96
-
97
- def to_typescript_style(self, indent: str = "") -> str:
98
- # Map integer to number in TypeScript
99
- if self.type == "integer":
100
- return "number"
101
- return self.type
102
-
103
-
104
- class _ParameterTypeObject(_BaseType):
105
- properties: list["_Parameter"]
106
- additional_properties: Any | None = None
107
-
108
- def __init__(self,
109
- json_schema_object: dict[str, Any],
110
- registry: _SchemaRegistry | None = None):
111
- super().__init__(json_schema_object)
112
-
113
- self.properties = []
114
- self.additional_properties = None
115
-
116
- if not json_schema_object:
117
- return
118
-
119
- if "$defs" in json_schema_object and registry:
120
- registry.register_definitions(json_schema_object["$defs"])
121
-
122
- self.additional_properties = json_schema_object.get(
123
- "additionalProperties")
124
- if isinstance(self.additional_properties, dict):
125
- self.additional_properties = _parse_parameter_type(
126
- self.additional_properties, registry)
127
-
128
- if "properties" not in json_schema_object:
129
- return
130
-
131
- required_parameters = json_schema_object.get("required", [])
132
- optional_parameters = set(
133
- json_schema_object["properties"].keys()) - set(required_parameters)
134
-
135
- self.properties = [
136
- _Parameter(
137
- name=name,
138
- type=_parse_parameter_type(prop, registry),
139
- optional=name in optional_parameters,
140
- default=prop.get("default")
141
- if isinstance(prop, dict) else None,
142
- ) for name, prop in json_schema_object["properties"].items()
143
- ]
144
-
145
- def to_typescript_style(self, indent: str = "") -> str:
146
- # sort by optional, make the required parameters first
147
- parameters = [p for p in self.properties if not p.optional]
148
- opt_params = [p for p in self.properties if p.optional]
149
-
150
- parameters = sorted(parameters, key=lambda p: p.name)
151
- parameters.extend(sorted(opt_params, key=lambda p: p.name))
152
-
153
- param_strs = []
154
- for p in parameters:
155
- one = p.to_typescript_style(indent=indent + _TS_INDENT)
156
- param_strs.append(one)
157
-
158
- if self.additional_properties is not None:
159
- ap_type_str = "any"
160
- if self.additional_properties is True:
161
- ap_type_str = "any"
162
- elif self.additional_properties is False:
163
- ap_type_str = "never"
164
- elif isinstance(self.additional_properties, _ParameterType):
165
- ap_type_str = self.additional_properties.to_typescript_style(
166
- indent=indent + _TS_INDENT)
167
- else:
168
- raise ValueError(
169
- f"Unknown additionalProperties: {self.additional_properties}"
170
- )
171
- param_strs.append(
172
- f"{indent + _TS_INDENT}[k: string]: {ap_type_str}")
173
-
174
- if not param_strs:
175
- return "{}"
176
-
177
- params_str = _TS_FIELD_DELIMITER.join(param_strs)
178
- if params_str:
179
- # add new line before and after
180
- params_str = f"\n{params_str}\n"
181
- # always wrap with object
182
- return f"{{{params_str}{indent}}}"
183
-
184
-
185
- class _ParameterTypeArray(_BaseType):
186
- item: "_ParameterType"
187
-
188
- def __init__(self,
189
- json_schema_object: dict[str, Any],
190
- registry: _SchemaRegistry | None = None):
191
- super().__init__(json_schema_object,
192
- allowed_constraint_keys=("minItems", "maxItems"))
193
- if json_schema_object.get("items"):
194
- self.item = _parse_parameter_type(json_schema_object["items"],
195
- registry)
196
- else:
197
- self.item = _ParameterTypeScalar(type="any")
198
-
199
- def to_typescript_style(self, indent: str = "") -> str:
200
- item_docstring = self.item.format_docstring(indent + _TS_INDENT)
201
- if item_docstring:
202
- return ("Array<\n" + item_docstring + indent + _TS_INDENT +
203
- self.item.to_typescript_style(indent=indent + _TS_INDENT) +
204
- "\n" + indent + ">")
205
- else:
206
- return f"Array<{self.item.to_typescript_style(indent=indent)}>"
207
-
208
-
209
- class _ParameterTypeEnum(_BaseType):
210
- # support scalar types only
211
- enum: list[str | int | float | bool | None]
212
-
213
- def __init__(self, json_schema_object: dict[str, Any]):
214
- super().__init__(json_schema_object)
215
- self.enum = json_schema_object["enum"]
216
-
217
- # Validate enum values against declared type if present
218
- if "type" in json_schema_object:
219
- typ = json_schema_object["type"]
220
- if isinstance(typ, list):
221
- if len(typ) == 1:
222
- typ = typ[0]
223
- elif len(typ) == 2:
224
- if "null" not in typ:
225
- raise ValueError(f"Enum type {typ} is not supported")
226
- else:
227
- typ = typ[0] if typ[0] != "null" else typ[1]
228
- else:
229
- raise ValueError(f"Enum type {typ} is not supported")
230
- for val in self.enum:
231
- if val is None:
232
- continue
233
- if typ == "string" and not isinstance(val, str):
234
- raise ValueError(f"Enum value {val} is not a string")
235
- elif typ == "number" and not isinstance(val, (int, float)):
236
- raise ValueError(f"Enum value {val} is not a number")
237
- elif typ == "integer" and not isinstance(val, int):
238
- raise ValueError(f"Enum value {val} is not an integer")
239
- elif typ == "boolean" and not isinstance(val, bool):
240
- raise ValueError(f"Enum value {val} is not a boolean")
241
-
242
- def to_typescript_style(self, indent: str = "") -> str:
243
- return " | ".join(
244
- [f'"{e}"' if isinstance(e, str) else str(e) for e in self.enum])
245
-
246
-
247
- class _ParameterTypeAnyOf(_BaseType):
248
- types: list["_ParameterType"]
249
-
250
- def __init__(
251
- self,
252
- json_schema_object: dict[str, Any],
253
- registry: _SchemaRegistry | None = None,
254
- ):
255
- super().__init__(json_schema_object)
256
- self.types = [
257
- _parse_parameter_type(t, registry)
258
- for t in json_schema_object["anyOf"]
259
- ]
260
-
261
- def to_typescript_style(self, indent: str = "") -> str:
262
- return " | ".join(
263
- [t.to_typescript_style(indent=indent) for t in self.types])
264
-
265
-
266
- class _ParameterTypeUnion(_BaseType):
267
- types: list[str]
268
-
269
- def __init__(self, json_schema_object: dict[str, Any]):
270
- super().__init__(json_schema_object)
271
-
272
- mapping = {
273
- "string": "string",
274
- "number": "number",
275
- "integer": "number",
276
- "boolean": "boolean",
277
- "null": "null",
278
- "object": "{}",
279
- "array": "Array<any>",
280
- }
281
- self.types = [mapping[t] for t in json_schema_object["type"]]
282
-
283
- def to_typescript_style(self, indent: str = "") -> str:
284
- return " | ".join(self.types)
285
-
286
-
287
- class _ParameterTypeRef(_BaseType):
288
- ref_name: str
289
- is_self_ref: bool = False
290
-
291
- def __init__(self, json_schema_object: dict[str, Any],
292
- registry: _SchemaRegistry):
293
- super().__init__(json_schema_object)
294
-
295
- ref = json_schema_object["$ref"]
296
- resolved_schema = registry.resolve_ref(ref)
297
-
298
- if resolved_schema.get("$self_ref", False):
299
- self.ref_name = "parameters"
300
- self.is_self_ref = True
301
- else:
302
- self.ref_name = ref.split("/")[-1]
303
-
304
- def to_typescript_style(self, indent: str = "") -> str:
305
- return self.ref_name
306
-
307
-
308
- _ParameterType = (_ParameterTypeScalar
309
- | _ParameterTypeObject
310
- | _ParameterTypeArray
311
- | _ParameterTypeEnum
312
- | _ParameterTypeAnyOf
313
- | _ParameterTypeUnion
314
- | _ParameterTypeRef)
315
-
316
-
317
- @dataclasses.dataclass
318
- class _Parameter:
319
- """
320
- A parameter in a function, or a field in a object.
321
- It consists of the type as well as the name.
322
- """
323
-
324
- type: _ParameterType
325
- name: str = "_"
326
- optional: bool = True
327
- default: Any | None = None
328
-
329
- @classmethod
330
- def parse_extended(cls, attributes: dict[str, Any]) -> "_Parameter":
331
- if not attributes:
332
- raise ValueError("attributes is empty")
333
-
334
- return cls(
335
- name=attributes.get("name", "_"),
336
- type=_parse_parameter_type(attributes),
337
- optional=attributes.get("optional", False),
338
- default=attributes.get("default"),
339
- )
340
-
341
- def to_typescript_style(self, indent: str = "") -> str:
342
- comments = self.type.format_docstring(indent)
343
-
344
- if self.default is not None:
345
- default_repr = (json.dumps(self.default, ensure_ascii=False)
346
- if not isinstance(self.default, (int, float, bool))
347
- else repr(self.default))
348
- comments += f"{indent}// Default: {default_repr}\n"
349
-
350
- return (
351
- comments +
352
- f"{indent}{self.name}{'?' if self.optional else ''}: {self.type.to_typescript_style(indent=indent)}"
353
- )
354
-
355
-
356
- def _parse_parameter_type(
357
- json_schema_object: dict[str, Any] | bool,
358
- registry: _SchemaRegistry | None = None) -> _ParameterType:
359
- if isinstance(json_schema_object, bool):
360
- if json_schema_object:
361
- return _ParameterTypeScalar(type="any")
362
- else:
363
- logger.warning(
364
- f"Warning: Boolean value {json_schema_object} is not supported, use null instead."
365
- )
366
- return _ParameterTypeScalar(type="null")
367
-
368
- if "$ref" in json_schema_object and registry:
369
- return _ParameterTypeRef(json_schema_object, registry)
370
-
371
- if "anyOf" in json_schema_object:
372
- return _ParameterTypeAnyOf(json_schema_object, registry)
373
- elif "enum" in json_schema_object:
374
- return _ParameterTypeEnum(json_schema_object)
375
- elif "type" in json_schema_object:
376
- typ = json_schema_object["type"]
377
- if isinstance(typ, list):
378
- return _ParameterTypeUnion(json_schema_object)
379
- elif typ == "object":
380
- return _ParameterTypeObject(json_schema_object, registry)
381
- elif typ == "array":
382
- return _ParameterTypeArray(json_schema_object, registry)
383
- else:
384
- return _ParameterTypeScalar(typ, json_schema_object)
385
- elif json_schema_object == {}:
386
- return _ParameterTypeScalar(type="any")
387
- else:
388
- raise ValueError(f"Invalid JSON Schema object: {json_schema_object}")
389
-
390
-
391
- def _openai_function_to_typescript_style(function: dict[str, Any], ) -> str:
392
- """Convert OpenAI function definition (dict) to TypeScript style string."""
393
- registry = _SchemaRegistry()
394
- parameters = function.get("parameters") or {}
395
- parsed = _ParameterTypeObject(parameters, registry)
396
-
397
- interfaces = []
398
- root_interface_name = None
399
- if registry.has_self_ref:
400
- root_interface_name = "parameters"
401
- params_str = _TS_FIELD_DELIMITER.join([
402
- p.to_typescript_style(indent=_TS_INDENT) for p in parsed.properties
403
- ])
404
- params_str = f"\n{params_str}\n" if params_str else ""
405
- interface_def = f"interface {root_interface_name} {{{params_str}}}"
406
- interfaces.append(interface_def)
407
-
408
- definitions_copy = dict(registry.definitions)
409
- for def_name, def_schema in definitions_copy.items():
410
- obj_type = _parse_parameter_type(def_schema, registry)
411
- params_str = obj_type.to_typescript_style()
412
-
413
- description_part = ""
414
- if obj_description := def_schema.get("description", ""):
415
- description_part = _format_description(obj_description) + "\n"
416
-
417
- interface_def = f"{description_part}interface {def_name} {params_str}"
418
- interfaces.append(interface_def)
419
-
420
- interface_str = "\n".join(interfaces)
421
- function_name = function.get("name", "function")
422
- if root_interface_name:
423
- type_def = f"type {function_name} = (_: {root_interface_name}) => any;"
424
- else:
425
- params_str = parsed.to_typescript_style()
426
- type_def = f"type {function_name} = (_: {params_str}) => any;"
427
-
428
- description = function.get("description")
429
- return "\n".join(
430
- filter(
431
- bool,
432
- [
433
- interface_str,
434
- ((description and _format_description(description)) or ""),
435
- type_def,
436
- ],
437
- ))
438
-
439
-
440
- def encode_tools_to_typescript_style(tools: list[dict[str, Any]], ) -> str:
441
- """
442
- Convert tools (list of dict) to TypeScript style string.
443
-
444
- Supports OpenAI format: {"type": "function", "function": {...}}
445
-
446
- Args:
447
- tools: List of tool definitions in dict format
448
-
449
- Returns:
450
- TypeScript style string representation of the tools
451
- """
452
- if not tools:
453
- return ""
454
-
455
- functions = []
456
-
457
- for tool in tools:
458
- tool_type = tool.get("type")
459
- if tool_type == "function":
460
- func_def = tool.get("function", {})
461
- if func_def:
462
- functions.append(
463
- _openai_function_to_typescript_style(func_def))
464
- else:
465
- # Skip unsupported tool types (like "_plugin")
466
- continue
467
-
468
- if not functions:
469
- return ""
470
-
471
- functions_str = "\n".join(functions)
472
- result = "# Tools\n\n"
473
-
474
- if functions_str:
475
- result += "## functions\nnamespace functions {\n"
476
- result += functions_str + "\n"
477
- result += "}\n"
478
-
479
- return result