lthn commited on
Commit
06b064a
·
verified ·
1 Parent(s): e6e303f

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +20 -42
tokenizer_config.json CHANGED
@@ -17,71 +17,50 @@
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
20
- "is_local": true,
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
- "model_specific_special_tokens": {
24
- "audio_token": "<|audio|>",
25
- "boa_token": "<|audio>",
26
- "boi_token": "<|image>",
27
- "eoa_token": "<audio|>",
28
- "eoc_token": "<channel|>",
29
- "eoi_token": "<image|>",
30
- "eot_token": "<turn|>",
31
- "escape_token": "<|\"|>",
32
- "etc_token": "<tool_call|>",
33
- "etd_token": "<tool|>",
34
- "etr_token": "<tool_response|>",
35
- "image_token": "<|image|>",
36
- "soc_token": "<|channel>",
37
- "sot_token": "<|turn>",
38
- "stc_token": "<|tool_call>",
39
- "std_token": "<|tool>",
40
- "str_token": "<|tool_response>",
41
- "think_token": "<|think|>"
42
- },
43
  "pad_token": "<pad>",
44
  "padding_side": "left",
45
  "processor_class": "Gemma4Processor",
46
  "response_schema": {
 
47
  "properties": {
48
- "content": {
49
- "type": "string"
50
- },
51
  "role": {
52
  "const": "assistant"
53
  },
54
  "thinking": {
55
  "type": "string"
56
  },
 
 
 
57
  "tool_calls": {
 
 
58
  "items": {
 
59
  "properties": {
 
 
 
60
  "function": {
 
 
61
  "properties": {
62
- "arguments": {
63
- "additionalProperties": {},
64
- "type": "object",
65
- "x-parser": "gemma4-tool-call"
66
- },
67
  "name": {
68
  "type": "string"
 
 
 
 
 
69
  }
70
- },
71
- "type": "object",
72
- "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
73
- },
74
- "type": {
75
- "const": "function"
76
  }
77
- },
78
- "type": "object"
79
- },
80
- "type": "array",
81
- "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
82
  }
83
  },
84
- "type": "object",
85
  "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
86
  },
87
  "soc_token": "<|channel>",
@@ -91,6 +70,5 @@
91
  "str_token": "<|tool_response>",
92
  "think_token": "<|think|>",
93
  "tokenizer_class": "GemmaTokenizer",
94
- "tool_parser_type": "gemma4",
95
  "unk_token": "<unk>"
96
  }
 
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
 
20
  "mask_token": "<mask>",
21
  "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "pad_token": "<pad>",
23
  "padding_side": "left",
24
  "processor_class": "Gemma4Processor",
25
  "response_schema": {
26
+ "type": "object",
27
  "properties": {
 
 
 
28
  "role": {
29
  "const": "assistant"
30
  },
31
  "thinking": {
32
  "type": "string"
33
  },
34
+ "content": {
35
+ "type": "string"
36
+ },
37
  "tool_calls": {
38
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
+ "type": "array",
40
  "items": {
41
+ "type": "object",
42
  "properties": {
43
+ "type": {
44
+ "const": "function"
45
+ },
46
  "function": {
47
+ "type": "object",
48
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
  "properties": {
 
 
 
 
 
50
  "name": {
51
  "type": "string"
52
+ },
53
+ "arguments": {
54
+ "type": "object",
55
+ "x-parser": "gemma4-tool-call",
56
+ "additionalProperties": {}
57
  }
58
+ }
 
 
 
 
 
59
  }
60
+ }
61
+ }
 
 
 
62
  }
63
  },
 
64
  "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
  },
66
  "soc_token": "<|channel>",
 
70
  "str_token": "<|tool_response>",
71
  "think_token": "<|think|>",
72
  "tokenizer_class": "GemmaTokenizer",
 
73
  "unk_token": "<unk>"
74
  }