smartguy0505 commited on
Commit
b39753c
·
verified ·
1 Parent(s): cf28519

Upload folder using huggingface_hub

Browse files
Files changed (45) hide show
  1. .gitattributes +5 -0
  2. intelligence_score_vs_output_tokens.png +3 -0
  3. model-00003-of-000163.safetensors +3 -0
  4. model-00005-of-000163.safetensors +3 -0
  5. model-00008-of-000163.safetensors +3 -0
  6. model-00015-of-000163.safetensors +3 -0
  7. model-00018-of-000163.safetensors +3 -0
  8. model-00025-of-000163.safetensors +3 -0
  9. model-00027-of-000163.safetensors +3 -0
  10. model-00036-of-000163.safetensors +3 -0
  11. model-00043-of-000163.safetensors +3 -0
  12. model-00045-of-000163.safetensors +3 -0
  13. model-00046-of-000163.safetensors +3 -0
  14. model-00049-of-000163.safetensors +3 -0
  15. model-00059-of-000163.safetensors +3 -0
  16. model-00076-of-000163.safetensors +3 -0
  17. model-00083-of-000163.safetensors +3 -0
  18. model-00084-of-000163.safetensors +3 -0
  19. model-00086-of-000163.safetensors +3 -0
  20. model-00088-of-000163.safetensors +3 -0
  21. model-00089-of-000163.safetensors +3 -0
  22. model-00090-of-000163.safetensors +3 -0
  23. model-00092-of-000163.safetensors +3 -0
  24. model-00094-of-000163.safetensors +3 -0
  25. model-00096-of-000163.safetensors +3 -0
  26. model-00105-of-000163.safetensors +3 -0
  27. model-00107-of-000163.safetensors +3 -0
  28. model-00108-of-000163.safetensors +3 -0
  29. model-00109-of-000163.safetensors +3 -0
  30. model-00110-of-000163.safetensors +3 -0
  31. model-00111-of-000163.safetensors +3 -0
  32. model-00112-of-000163.safetensors +3 -0
  33. model-00113-of-000163.safetensors +3 -0
  34. model-00115-of-000163.safetensors +3 -0
  35. model-00116-of-000163.safetensors +3 -0
  36. model-00119-of-000163.safetensors +3 -0
  37. model-00123-of-000163.safetensors +3 -0
  38. model-00125-of-000163.safetensors +3 -0
  39. model-00126-of-000163.safetensors +3 -0
  40. model-00133-of-000163.safetensors +3 -0
  41. model-00144-of-000163.safetensors +3 -0
  42. model-00154-of-000163.safetensors +3 -0
  43. model-00156-of-000163.safetensors +3 -0
  44. model-00161-of-000163.safetensors +3 -0
  45. tool_parser_vllm.py +583 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ intelligence_score_vs_output_tokens.png filter=lfs diff=lfs merge=lfs -text
37
+ *.json filter=lfs diff=lfs merge=lfs -text
38
+ *.txt filter=lfs diff=lfs merge=lfs -text
39
+ tokenizer* filter=lfs diff=lfs merge=lfs -text
40
+ *.tokenizer filter=lfs diff=lfs merge=lfs -text
intelligence_score_vs_output_tokens.png ADDED

Git LFS Details

  • SHA256: ace1e8df27abccaf153f01b719117cbc024839c02cab6e2a300aa401ba196af7
  • Pointer size: 131 Bytes
  • Size of remote file: 197 kB
model-00003-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac592467a2391b507296d568a87c6f02d41a1a3473f3bc6705fdafe91a7e84d9
3
+ size 4302382136
model-00005-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4cd762a7f2575c8a8fb04a68556e2311350567c9cb015fd94af7d37e5ef5c7d
3
+ size 4302381912
model-00008-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8659a4124c5001e4f594fc53586c148c3210c8c080efcd0ebf420df1d18a3d9
3
+ size 4302382112
model-00015-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded3c735c606e9f1bc067f7782b63d701577e59a338b08e3fc49c8fb603be3f8
3
+ size 4302347992
model-00018-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab55b759fa61688e5cdbfc0e76426e29eee60cb7dc4098a505268bd8508105c0
3
+ size 4302347800
model-00025-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c4f8ea8cfd59a26c06a4118e98f5519d25f9471c7f2dc09534ec0352cb4efcf
3
+ size 4302382720
model-00027-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9d5999b2943768dcc67cad1f8411d8f1e1aeb55c429e513d7789a5cad3152be
3
+ size 4302382448
model-00036-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80dfc11f732d16b60d7e150ec6bffe84cf6373f9963383fc1da6ec1491a04b72
3
+ size 4302382672
model-00043-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31b7cb0d0e6761fec2ce693b99059802b055a51cac5d7e0485d214517af4cdf
3
+ size 4302348184
model-00045-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d205fd835afec706efad9b7349b92bc098e1e21d7c4c283ec7aad45423d381c8
3
+ size 4302348584
model-00046-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf167f9e7c7b2c5a91d6af1a2a4b3d837693c751045a767b7754459ae0bbb15c
3
+ size 4302382264
model-00049-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f78faa1b3a5dd74244c87806000e8cc53a75c5d904f40a7e84eecfe0c43882
3
+ size 4302382448
model-00059-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cb401b037eb3a5d1fa2ddff2f6aada163be92a5ce4263efc29ea1b0abd0dd9
3
+ size 4302348568
model-00076-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3733153525cf7775f6f1ecd5c4a7285f742a74ec9d9390baf06bb0ddc28fa17
3
+ size 4302382248
model-00083-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25a777958d3a868ab6b57b6e5d2d9e040a0fa95d8a67c095a21e03a7e1a04b4
3
+ size 4302382720
model-00084-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0df8afe9fc329bd69bd0f0349240fe622c8f2de176e680699894e36f0f21c78f
3
+ size 4302348376
model-00086-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f63d1bb77a82c1f0c2f84f2fdb5c5a81ee8eead300feeb333c3bb52f2f007c9
3
+ size 4302382720
model-00088-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5740f8e77b3f86fd8c686d5eda884eea8c637a1a4937046f44f988a27543041e
3
+ size 4302382656
model-00089-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3137b2b82b3d50c80e48e187e6c743d735124bd55a9ecf2b6c5a4f008a9f73a1
3
+ size 4302348584
model-00090-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e7368bc72f3f331d5bb6ab9ca023b42a4423eb21266e21e8d30f1cd70b9cd4f
3
+ size 4302382264
model-00092-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c512f6f8a69f4528f503841f9982d39c044a5bd077939505e5b763de435502f9
3
+ size 4302348392
model-00094-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3bfb9aef946581c145c358c416e687a4271c49848daa7edae10ddd1575b325a
3
+ size 4302382720
model-00096-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5951c6ff635b0b2b1f4a3bce3158f71ce16b27f51a6e75f2b91296deacb52b9c
3
+ size 4302382640
model-00105-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbbcd10373f9a2dfc33a650a117cf000e7117fa863bbf0835d57ca47ef1c8770
3
+ size 4302382720
model-00107-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9acd49a549e874ae346c940dc939144670da931d6f3d3ca69f1de7ae588399d
3
+ size 4302382472
model-00108-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5378084e6769b6a496be211d5808858149956aca7893d5e928c2772f5f9e476
3
+ size 4302382720
model-00109-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820b730ab4882f28492381fd40c0591ac68e276e69450d5e3ed553496dc2a349
3
+ size 4302348184
model-00110-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e37571416f241afcc413442a11ce8896a1ee63aab13b065227c0a04099cb2c15
3
+ size 4302382656
model-00111-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:605b39268636cae3bb925ef22c34c720840bc6b4ee33366fc5c98e8d8d4a5d83
3
+ size 4302348584
model-00112-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7152fa20f96fa500166c52abdebdfd00c19871d81f235166522d5169e7263ce
3
+ size 4302382264
model-00113-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea651e865707be3259305153f4e7a06c6f4c6b4d9989bb6b97f92039d7714d0
3
+ size 4302382720
model-00115-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35f808fc0eb23a376220e074d4463ade7f685eb174a528aac37f56b90a9c241f
3
+ size 4302382448
model-00116-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8f050ca1cc24dca24f789af6e9c81cb7da3e33163a9dd753bf9ac983799387
3
+ size 4302382720
model-00119-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d7b0e6a2393122e0cf50556d05ed050b2016f71cf09a8265c89361feff9e9b
3
+ size 4302348600
model-00123-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519568466224076a0751d3bfe99f1e1c2e63b063df7042412777f74bb78fc6e5
3
+ size 4302315568
model-00125-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a0dd8ee862041575bd6747e29a31decc36ee61e06286ee9fde205009391bd1
3
+ size 4302348568
model-00126-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2123b22cd5f9deca6155053114f874e45d74033f66b0e98878d91abaec10526f
3
+ size 4302382280
model-00133-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f488efd2ab0864520342e8da6696b711a2fc67440e5093c9cb6856b0e24a1f0
3
+ size 4302348584
model-00144-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1d10df8855972ca9c778f59a13389ffe22b617b80b443859aa232cd2bed0be
3
+ size 4302348568
model-00154-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5035c3fc7b727c6087ce3eee4de9a2979d821b7ccfbf61c793bb2ea13634f2dd
3
+ size 4302382720
model-00156-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7b340d7dfee6e91cc40a124255a95b459be3076d533812ce2bf2cb12a569871
3
+ size 4302382448
model-00161-of-000163.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8410fc54d2cca5a033964a922bc7f7aa954e7d549f5843606a4ab6ce1e57ccb
3
+ size 4302382080
tool_parser_vllm.py ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
+ # ruff: noqa
4
+ import json
5
+ from collections.abc import Sequence
6
+ from enum import Enum
7
+ from typing import Any, Union
8
+
9
+ import partial_json_parser
10
+ import regex as re
11
+ from partial_json_parser.core.options import Allow
12
+
13
+ from vllm.entrypoints.openai.protocol import (ChatCompletionRequest,
14
+ DeltaFunctionCall, DeltaMessage,
15
+ DeltaToolCall,
16
+ ExtractedToolCallInformation,
17
+ FunctionCall, ToolCall)
18
+ from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import (
19
+ ToolParser, ToolParserManager)
20
+ from vllm.logger import init_logger
21
+ from vllm.transformers_utils.tokenizer import AnyTokenizer
22
+ from vllm.utils import random_uuid
23
+
24
+ logger = init_logger(__name__)
25
+
26
+
27
+ class ParsedStructure(Enum):
28
+ CONTENT = 1
29
+ REASONING_CONTENT = 2
30
+ TOOL_CALL = 3
31
+ TOOL_CALL_DELIMITER = 4
32
+ TOOL_CALL_START_TAG = 5
33
+ TOOL_CALL_END_TAG = 6
34
+
35
+
36
+ @ToolParserManager.register_module("tng_r1t2")
37
+ class TngR1T2ToolParser(ToolParser):
38
+ """Tool Parser for models like tngtech/DeepSeek-TNG-R1T2-Chimera,
39
+ It is compatible with hermes tool call templates
40
+ but does not require <tool_call> and </tool_call>
41
+ to be single tokens in the vocabulary;
42
+ instead only the string representation of the model output
43
+ is parsed, making this tool call parser robust and versatile."""
44
+
45
+ def __init__(self, tokenizer: AnyTokenizer):
46
+ super().__init__(tokenizer)
47
+
48
+ # For backward compatibility with serving code
49
+ self.prev_tool_call_arr: list[dict] = []
50
+ self.streamed_args_for_tool: list[str] = []
51
+
52
+ self.think_tag_pattern = r"(<think>[\s\S]*?</think>)"
53
+ self.think_start_tag = "<think>"
54
+ self.think_end_tag = "</think>"
55
+
56
+ self.tool_call_tag_pattern = r"<tool_call>([\s\S]*?)</tool_call>"
57
+ self.tool_call_start_tag = "<tool_call>"
58
+ self.tool_call_end_tag = "</tool_call>"
59
+
60
+ # Define streaming state type to be initialized later
61
+ self.streaming_state: dict[str, Any] = {
62
+ "streamed_tool_calls": [],
63
+ "buffer": "",
64
+ "parsed_structure": ParsedStructure.CONTENT,
65
+ }
66
+
67
+ def extract_tool_call_from_nonthink_output(
68
+ self, raw_text: str) -> tuple[str, list[dict]]:
69
+ parts = re.split(self.tool_call_tag_pattern, raw_text)
70
+ content = ""
71
+ tool_calls: list[dict] = []
72
+ for i, part in enumerate(parts):
73
+ is_potential_tool_call = i % 2 == 1
74
+ if is_potential_tool_call:
75
+ try:
76
+ more_tool_calls = json.loads(part)
77
+ if isinstance(more_tool_calls, list):
78
+ tool_calls.extend(more_tool_calls)
79
+ else:
80
+ tool_calls.extend([more_tool_calls])
81
+ except json.JSONDecodeError:
82
+ logger.warning("Invalid tool call json "
83
+ "-> parse as text content")
84
+ content += part
85
+ continue
86
+ else:
87
+ content += part
88
+ return content, tool_calls
89
+
90
+ def extract_tool_calls(
91
+ self,
92
+ model_output: str,
93
+ request: ChatCompletionRequest,
94
+ ) -> ExtractedToolCallInformation:
95
+ """
96
+ Extract tool calls from a complete model output.
97
+ """
98
+ # split at think traces -> those will not be parsed for tool calls
99
+ think_parts = re.split(self.think_tag_pattern, model_output)
100
+ content = ""
101
+ tool_calls = []
102
+ for i, part in enumerate(think_parts):
103
+ parse_output = i % 2 == 0
104
+ if parse_output:
105
+ more_content, more_tool_calls = (
106
+ self.extract_tool_call_from_nonthink_output(part))
107
+ content += more_content
108
+ tool_calls += more_tool_calls
109
+ else:
110
+ content += part
111
+
112
+ if not tool_calls:
113
+ return ExtractedToolCallInformation(
114
+ tools_called=False,
115
+ tool_calls=[],
116
+ content=content,
117
+ )
118
+
119
+ tool_call_objs: list[ToolCall] = []
120
+
121
+ for idx, call in enumerate(tool_calls):
122
+ if (not isinstance(call, dict) or "name" not in call
123
+ or "arguments" not in call):
124
+ logger.warning("Invalid tool call format, ignore.")
125
+ continue
126
+
127
+ tool_call = ToolCall(
128
+ id=f"call_{idx}_{random_uuid()}",
129
+ type="function",
130
+ function=FunctionCall(
131
+ name=call["name"],
132
+ arguments=(json.dumps(call["arguments"]) if isinstance(
133
+ call["arguments"], dict) else call["arguments"]),
134
+ ),
135
+ )
136
+ tool_call_objs.append(tool_call)
137
+
138
+ return ExtractedToolCallInformation(
139
+ tools_called=len(tool_call_objs) > 0,
140
+ tool_calls=tool_call_objs,
141
+ content=content,
142
+ )
143
+
144
+ def _parse_think_trace(self, raw_text: str) -> tuple[str, bool, str]:
145
+ """
146
+ Returns: (unambiguous_text_content, found_think_end, rest_string)
147
+ """
148
+ # Either a complete think_end_tag can be somewhere in raw_text
149
+ think_end_pos = raw_text.find(self.think_end_tag)
150
+ if think_end_pos >= 0:
151
+ # in contrast to tool_call_start_tags, </think> remains part of content
152
+ think_end_tag_end_pos = think_end_pos + len(self.think_end_tag)
153
+ return (raw_text[:think_end_tag_end_pos], True,
154
+ raw_text[think_end_tag_end_pos:])
155
+
156
+ # or the end of raw_text can be continued to a complete think_end_tag
157
+ think_end_pos = (
158
+ len(raw_text) -
159
+ self._ends_with_partial_token(raw_text, self.think_end_tag))
160
+ return raw_text[:think_end_pos], False, raw_text[think_end_pos:]
161
+
162
+ def _parse_unambiguous_text_content(
163
+ self, raw_text: str) -> tuple[str, Union[str, None], str]:
164
+ """
165
+ Returns: (unambiguous_text_content, interrupting_tag, rest_string)
166
+ """
167
+ # Either a complete tool_call_start_tag or think_start can be somewhere in raw_text
168
+ search_tags = [self.think_start_tag, self.tool_call_start_tag]
169
+ tag_positions = [(tag, pos) for tag in search_tags
170
+ if (pos := raw_text.find(tag)) >= 0]
171
+ tag_positions.sort(key=lambda tag_and_pos: tag_and_pos[1])
172
+ if len(tag_positions) > 0:
173
+ first_tag, tag_pos = tag_positions[0]
174
+ return raw_text[:tag_pos], first_tag, raw_text[tag_pos:]
175
+
176
+ # or the end of raw_text can be continued to a complete tag
177
+ tag_positions = [
178
+ (tag, len(raw_text) - self._ends_with_partial_token(raw_text, tag))
179
+ for tag in search_tags
180
+ ]
181
+ tag_positions.sort(key=lambda tag_and_pos: tag_and_pos[1])
182
+ first_tag, tag_pos = tag_positions[0]
183
+ if tag_pos < len(raw_text):
184
+ return raw_text[:tag_pos], None, raw_text[tag_pos:]
185
+ return raw_text, None, ""
186
+
187
+ def _parse_tool_call_start_tag(self, raw_text: str) -> tuple[bool, str]:
188
+ """
189
+ Removes tool_call_start_tag from the beginning of raw_text,
190
+ and an optional "[", and leading whitespace.
191
+ Returns: (found_complete_tool_call_start_tag, rest_string)
192
+ """
193
+ if not raw_text.startswith(self.tool_call_start_tag):
194
+ return False, raw_text
195
+ rest = raw_text[len(self.tool_call_start_tag):].lstrip()
196
+ if rest.startswith("["):
197
+ rest = rest[1:].lstrip()
198
+ return True, rest
199
+
200
+ def _parse_tool_call_end_tag(
201
+ self, raw_text: str) -> tuple[Union[bool, None], str]:
202
+ """
203
+ Removes tool_call_end_tag from the beginning of raw_text,
204
+ and an optional "]" before it, and leading whitespace.
205
+ Returns: tuple
206
+ found_complete_tool_call_end_tag (or None if not decidable yet)
207
+ rest_string
208
+ """
209
+ # remove optional whitespace and closing ] bracket from json list notation
210
+ rest = raw_text.lstrip()
211
+ if rest.startswith("]"):
212
+ rest = rest[1:].lstrip()
213
+
214
+ if rest.startswith(self.tool_call_end_tag):
215
+ # found a complete tool call end tag
216
+ return True, rest[len(self.tool_call_end_tag):]
217
+ if (len(rest) >= len(self.tool_call_end_tag)
218
+ or rest != self.tool_call_end_tag[:len(rest)]):
219
+ # evidence that rest_string does not start with a tool call end tag
220
+ return False, raw_text
221
+ # incomplete tool call end tag, can not be decided yet
222
+ return None, raw_text
223
+
224
+ def _extract_arguments_from_partial_tool_call(
225
+ self, raw_text: str) -> Union[str, None]:
226
+ """
227
+ Extracts the raw text of the "arguments" field of a complete
228
+ or partial tool call.
229
+ Args:
230
+ raw_text: tool call raw text,
231
+ e.g `{"name": "my_tool", "arguments": {"firstarg": "some`
232
+
233
+ Returns:
234
+ raw text of the "arguments" field, which is not valid JSON
235
+ unless the tool call is complete,
236
+ e.g. `{"firstarg": "some` for the example raw_text above
237
+ """
238
+ # assumptions:
239
+ # - "arguments" is always an object
240
+ # - there is no other field of type object in the function call
241
+ # - `raw_text` contains first "name", then "arguments" (otherwise,
242
+ # we'd have to find the end of "arguments" before returning its
243
+ # raw text value)
244
+
245
+ # typically, at position 0, but there might be leading whitespace
246
+ tool_call_start_pos = raw_text.find("{")
247
+ assert raw_text[:tool_call_start_pos].strip() == ""
248
+ arguments_start_pos = raw_text.find("{", tool_call_start_pos + 1)
249
+ if arguments_start_pos < 0:
250
+ return None
251
+ arguments_raw_text = raw_text[arguments_start_pos:]
252
+ return arguments_raw_text
253
+
254
+ def _parse_complete_tool_call(
255
+ self, raw_text: str) -> tuple[Union[dict, None], str]:
256
+ """
257
+ Returns: tuple
258
+ parsed tool call if complete, None otherwise
259
+ rest_string that needs to be parsed again or may contain
260
+ a partial tool call
261
+ """
262
+ # raw_text must start without whitespace for correct parsing
263
+ obj, end_pos = self.extract_complete_json_dict(raw_text)
264
+ if obj is None:
265
+ return None, raw_text
266
+ tool_call_raw_text = raw_text[:end_pos]
267
+ # `tool_call_raw_text` is something like:
268
+ # '{"name": "tool-name", "arguments": {...xyz...} }'
269
+ # we want to extract `{...xyz...}`,
270
+ # but `extract_arguments_from_partial_tool_call` would return
271
+ # everything after the second '{', i.e. `{...xyz...} }`
272
+ arguments_raw_text = self._extract_arguments_from_partial_tool_call(
273
+ tool_call_raw_text.removesuffix("}").rstrip())
274
+ tool_call = {
275
+ "name": obj.get("name"),
276
+ "arguments": obj.get("arguments"),
277
+ "arguments_raw_text": arguments_raw_text,
278
+ "is_complete": True,
279
+ }
280
+ return tool_call, raw_text[end_pos:]
281
+
282
+ def _parse_partial_tool_call(self, raw_text: str) -> Union[dict, None]:
283
+ # raw_text must start without whitespace for correct parsing
284
+ obj = partial_json_parser.loads(raw_text, Allow.ALL)
285
+ arguments_raw_text = (
286
+ self._extract_arguments_from_partial_tool_call(raw_text))
287
+ tool_call = {
288
+ "name": obj.get("name"),
289
+ "arguments": obj.get("arguments"),
290
+ "arguments_raw_text": arguments_raw_text,
291
+ "is_complete": False,
292
+ }
293
+ return tool_call
294
+
295
+ def _parse_tool_call(
296
+ self,
297
+ raw_text: str) -> tuple[Union[bool, None], Union[dict, None], str]:
298
+ # remove optional whitespace and closing ] bracket
299
+ # from json list notation
300
+ rest = raw_text.lstrip()
301
+ if rest == "":
302
+ # no json has been received yet
303
+ # -> can't tell if this will be a valid tool call
304
+ return None, None, raw_text
305
+ if not rest.startswith("{"):
306
+ # can't be a tool call json
307
+ return False, None, raw_text
308
+
309
+ tool_call, rest = self._parse_complete_tool_call(rest)
310
+ if tool_call:
311
+ return True, tool_call, rest
312
+
313
+ try:
314
+ tool_call = self._parse_partial_tool_call(rest)
315
+ # need to re-parse partial tool call later again
316
+ # -> return None, not True
317
+ return None, tool_call, rest
318
+ except json.JSONDecodeError:
319
+ # invalid json -> neither complete nor partial tool call
320
+ return False, None, rest
321
+
322
+ def _parse_tool_call_delimiter(
323
+ self, raw_text: str) -> tuple[Union[bool, None], str]:
324
+ """
325
+ Returns: tuple
326
+ does raw_text start with tool call delimiter?
327
+ (None if undecidable/incomplete)
328
+ rest_string
329
+ """
330
+ rest = raw_text.lstrip()
331
+ if rest == "":
332
+ return None, raw_text
333
+ has_next_tool_call = rest.startswith(",")
334
+ if not has_next_tool_call:
335
+ return False, raw_text
336
+
337
+ rest = rest[1:].lstrip()
338
+ if rest == "":
339
+ return None, raw_text
340
+ has_next_tool_call = rest.startswith("{")
341
+ if not has_next_tool_call:
342
+ return False, raw_text
343
+ return True, rest
344
+
345
+ def _parse_all(
346
+ self, raw_text: str, start_mode: ParsedStructure
347
+ ) -> tuple[str, list[dict], str, ParsedStructure]:
348
+ if start_mode == ParsedStructure.REASONING_CONTENT:
349
+ content, found_closing_think, rest = self._parse_think_trace(
350
+ raw_text)
351
+ if found_closing_think:
352
+ more_content, tool_calls, rest, structure = self._parse_all(
353
+ rest, start_mode=ParsedStructure.CONTENT)
354
+ return content + more_content, tool_calls, rest, structure
355
+ return content, [], rest, ParsedStructure.REASONING_CONTENT
356
+
357
+ elif start_mode == ParsedStructure.CONTENT:
358
+ content, interrupting_tag, rest = self._parse_unambiguous_text_content(
359
+ raw_text)
360
+
361
+ # rest might contain a tool call start tag or a think start tag
362
+ if interrupting_tag == self.tool_call_start_tag:
363
+ more_content, tool_calls, rest, structure = self._parse_all(
364
+ rest, start_mode=ParsedStructure.TOOL_CALL_START_TAG)
365
+ return content + more_content, tool_calls, rest, structure
366
+ elif interrupting_tag == self.think_start_tag:
367
+ more_content, tool_calls, rest, structure = self._parse_all(
368
+ rest, start_mode=ParsedStructure.REASONING_CONTENT)
369
+ return content + more_content, tool_calls, rest, structure
370
+ else:
371
+ return content, [], rest, ParsedStructure.CONTENT
372
+
373
+ elif start_mode == ParsedStructure.TOOL_CALL_START_TAG:
374
+ found_tool_call_start_tag, rest = self._parse_tool_call_start_tag(
375
+ raw_text)
376
+ if not found_tool_call_start_tag:
377
+ return "", [], raw_text, ParsedStructure.CONTENT
378
+ # we found a complete start tag, but we haven't seen the begin of a tool call json yet
379
+ content, tool_calls, rest, structure = self._parse_all(
380
+ rest, start_mode=ParsedStructure.TOOL_CALL)
381
+ if not content and not tool_calls:
382
+ # We haven't reached the opening "{" of the tool call yet.
383
+ # We might see a "[" before the "{", so let's process the start tag again next chunk.
384
+ return content, [], raw_text, ParsedStructure.CONTENT
385
+ return content, tool_calls, rest, structure
386
+
387
+ elif start_mode == ParsedStructure.TOOL_CALL:
388
+ found_tool_call, tool_call, rest = self._parse_tool_call(raw_text)
389
+ if found_tool_call is True:
390
+ tool_calls = [tool_call] if tool_call else []
391
+ content, more_tool_calls, rest, structure = self._parse_all(
392
+ rest, start_mode=ParsedStructure.TOOL_CALL_DELIMITER)
393
+ return (content, tool_calls + more_tool_calls, rest, structure)
394
+ elif found_tool_call is None:
395
+ # partial tool call -> need to parse again with next chunk
396
+ tool_calls = ([tool_call] if tool_call is not None else [])
397
+ return "", tool_calls, rest, ParsedStructure.TOOL_CALL
398
+ else:
399
+ logger.warning(
400
+ "Invalid tool call -> continue with parsing model output as text content"
401
+ )
402
+ return self._parse_all(raw_text,
403
+ start_mode=ParsedStructure.CONTENT)
404
+
405
+ elif start_mode == ParsedStructure.TOOL_CALL_DELIMITER:
406
+ found_tool_call_delimiter, rest = self._parse_tool_call_delimiter(
407
+ raw_text)
408
+ if found_tool_call_delimiter is True:
409
+ return self._parse_all(rest,
410
+ start_mode=ParsedStructure.TOOL_CALL)
411
+ elif found_tool_call_delimiter is None:
412
+ # could neither confirm nor deny that raw_text starts with a tool call delimiter
413
+ return "", [], rest, ParsedStructure.TOOL_CALL_DELIMITER
414
+ else:
415
+ return self._parse_all(
416
+ raw_text, start_mode=ParsedStructure.TOOL_CALL_END_TAG)
417
+
418
+ elif start_mode == ParsedStructure.TOOL_CALL_END_TAG:
419
+ found_tool_call_end_tag, rest = self._parse_tool_call_end_tag(
420
+ raw_text)
421
+ if found_tool_call_end_tag is True:
422
+ return self._parse_all(rest,
423
+ start_mode=ParsedStructure.CONTENT)
424
+ elif found_tool_call_end_tag is None:
425
+ return "", [], rest, ParsedStructure.TOOL_CALL_END_TAG
426
+ else:
427
+ return self._parse_all(raw_text,
428
+ start_mode=ParsedStructure.CONTENT)
429
+
430
+ logger.warning(
431
+ f"Unknown tool call parser start_mode '{start_mode}'. Falling back to text content."
432
+ )
433
+ return self._parse_all(raw_text, start_mode=ParsedStructure.CONTENT)
434
+
435
+ def extract_tool_calls_streaming(
436
+ self,
437
+ previous_text: str,
438
+ current_text: str,
439
+ delta_text: str,
440
+ previous_token_ids: Sequence[int],
441
+ current_token_ids: Sequence[int],
442
+ delta_token_ids: Sequence[int],
443
+ request: ChatCompletionRequest,
444
+ ) -> Union[DeltaMessage, None]:
445
+ """
446
+ Extract tool calls for streaming mode.
447
+ """
448
+ raw_text = self.streaming_state["buffer"] + delta_text
449
+ structure = self.streaming_state["parsed_structure"]
450
+
451
+ content, tool_calls, rest, new_structure = self._parse_all(
452
+ raw_text, start_mode=structure)
453
+ self.streaming_state["buffer"] = rest
454
+ self.streaming_state["parsed_structure"] = new_structure
455
+
456
+ already_streamed_tool_calls = (
457
+ self.streaming_state["streamed_tool_calls"])
458
+ already_streamed_complete_tool_calls = [
459
+ tool_call for tool_call in already_streamed_tool_calls
460
+ if tool_call["is_complete"]
461
+ ]
462
+ all_tool_calls = (already_streamed_complete_tool_calls +
463
+ (tool_calls or []))
464
+ to_be_streamed_tool_calls = self._calculate_delta_tool_calls(
465
+ all_tool_calls, already_streamed_tool_calls)
466
+
467
+ if not content and not to_be_streamed_tool_calls:
468
+ return None
469
+ self.update_state_vars(all_tool_calls)
470
+ return DeltaMessage(content=content if content else None,
471
+ tool_calls=to_be_streamed_tool_calls)
472
+
473
+ def _calculate_delta_tool_calls(
474
+ self, current_tool_calls: Union[list[dict], None],
475
+ already_streamed_tool_calls: list[dict]) -> list[DeltaToolCall]:
476
+ if not current_tool_calls:
477
+ return []
478
+
479
+ new_deltas = []
480
+ for tool_call_idx, partial_tool_call in enumerate(current_tool_calls):
481
+ if (partial_tool_call.get("name") is None
482
+ or partial_tool_call.get("arguments") is None):
483
+ # do not stream arguments for an unknown tool name;
484
+ # and unless arguments appear in the partial json,
485
+ # it might be that "name" has not been received completely
486
+ # (assuming a template like `{"name": "mytool", "arguments": ...}`)
487
+ continue
488
+ partial_tool_call["tool_call_idx"] = tool_call_idx
489
+ partial_tool_call["arguments_raw_text"] = (
490
+ partial_tool_call.get('arguments_raw_text') or "")
491
+
492
+ if len(already_streamed_tool_calls) > tool_call_idx:
493
+ # parts of this tool_call_idx have already been streamed
494
+ already_streamed_tool_call = already_streamed_tool_calls[
495
+ tool_call_idx]
496
+ delta_tool_call = self._delta_for_partial_tool_call(
497
+ partial_tool_call, already_streamed_tool_call)
498
+ if delta_tool_call is not None:
499
+ new_deltas.append(delta_tool_call)
500
+ already_streamed_tool_calls[tool_call_idx] = (
501
+ already_streamed_tool_call | partial_tool_call)
502
+ else:
503
+ # no parts of this tool_call_idx have been streamed yet
504
+ tool_call = self._delta_for_new_tool_call(partial_tool_call)
505
+ new_deltas.append(tool_call)
506
+ already_streamed_tool_calls.append(partial_tool_call)
507
+
508
+ return new_deltas
509
+
510
+ def _delta_for_new_tool_call(self, tool_call_dict: dict) -> DeltaToolCall:
511
+ """constructs DeltaToolCall for new tool call,
512
+ with tool_call_id, name, and all arguments seen so far.
513
+ Updates tool_call dictionary with tool_call_id.
514
+ """
515
+ tool_call_idx = tool_call_dict["tool_call_idx"]
516
+ tool_call_dict[
517
+ "tool_call_id"] = f"call_{tool_call_idx}_{random_uuid()}"
518
+ tool_call_dict["arguments_raw_text"] = tool_call_dict.get(
519
+ 'arguments_raw_text') or ""
520
+ delta_tool_call = DeltaToolCall(
521
+ index=tool_call_idx,
522
+ type="function",
523
+ id=tool_call_dict["tool_call_id"],
524
+ function=DeltaFunctionCall(
525
+ name=tool_call_dict.get("name"),
526
+ arguments=tool_call_dict["arguments_raw_text"]))
527
+ return delta_tool_call
528
+
529
+ def _delta_for_partial_tool_call(
530
+ self, new_tool_call: dict,
531
+ already_streamed_tool_call: dict) -> Union[DeltaToolCall, None]:
532
+ """Calculate delta for a tool call of which some parts have already been streamed."""
533
+ assert new_tool_call["name"] == already_streamed_tool_call["name"]
534
+ assert already_streamed_tool_call.get("tool_call_id")
535
+ if already_streamed_tool_call.get("is_complete"):
536
+ return None
537
+
538
+ to_be_streamed_arguments = (
539
+ new_tool_call["arguments_raw_text"].removeprefix(
540
+ already_streamed_tool_call["arguments_raw_text"]))
541
+ if not to_be_streamed_arguments:
542
+ return None
543
+
544
+ delta_tool_call = DeltaToolCall(
545
+ index=new_tool_call["tool_call_idx"],
546
+ type="function",
547
+ function=DeltaFunctionCall(arguments=to_be_streamed_arguments))
548
+ return delta_tool_call
549
+
550
+ def update_state_vars(self, all_tools: list[dict]) -> None:
551
+ # `tool_parser.streamed_args_for_tool` and
552
+ # `tool_parser.prev_tool_call_arr` are checked in serving_chat.py
553
+
554
+ # relevant is {"arguments": {...}}
555
+ self.prev_tool_call_arr = all_tools
556
+
557
+ # json-serialized argument
558
+ self.streamed_args_for_tool = [
559
+ tool_call.get("arguments_raw_text", "") for tool_call in all_tools
560
+ ]
561
+
562
+ @classmethod
563
+ def _ends_with_partial_token(cls, buffer: str, tag: str) -> int:
564
+ """
565
+ Check if buffer ends with a partial tag.
566
+ Return the length of the partial tag.
567
+ """
568
+ for i in range(1, min(len(buffer) + 1, len(tag))):
569
+ if tag.startswith(buffer[-i:]):
570
+ return i
571
+ return 0
572
+
573
+ @classmethod
574
+ def extract_complete_json_dict(cls, json_str: str):
575
+ try:
576
+ decoder = json.JSONDecoder()
577
+ obj, end_pos = decoder.raw_decode(
578
+ json_str) # ignore any text after the end of the json object
579
+ if isinstance(obj, dict):
580
+ return obj, end_pos
581
+ return None, 0
582
+ except json.JSONDecodeError:
583
+ return None, 0