Axottee commited on
Commit
e0d673a
·
verified ·
1 Parent(s): ac461f4

(Trained with Unsloth)

Browse files
added_tokens.json CHANGED
@@ -1,7 +1,10 @@
1
  {
 
2
  "</tool_call>": 151658,
 
 
3
  "<tool_call>": 151657,
4
- "<|PAD_TOKEN|>": 151665,
5
  "<|box_end|>": 151649,
6
  "<|box_start|>": 151648,
7
  "<|endoftext|>": 151643,
 
1
  {
2
+ "</think>": 151668,
3
  "</tool_call>": 151658,
4
+ "</tool_response>": 151666,
5
+ "<think>": 151667,
6
  "<tool_call>": 151657,
7
+ "<tool_response>": 151665,
8
  "<|box_end|>": 151649,
9
  "<|box_start|>": 151648,
10
  "<|endoftext|>": 151643,
config.json CHANGED
@@ -1,31 +1,32 @@
1
  {
2
  "architectures": [
3
- "Qwen2ForCausalLM"
4
  ],
 
5
  "attention_dropout": 0.0,
6
- "bos_token_id": 151643,
7
  "eos_token_id": 151643,
 
8
  "hidden_act": "silu",
9
- "hidden_size": 3584,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
  "max_position_embeddings": 32768,
13
- "max_window_layers": 28,
14
- "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
- "pad_token_id": 151665,
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": null,
21
- "rope_theta": 1000000.0,
22
  "sliding_window": null,
23
- "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.52.4",
26
  "unsloth_fixed": true,
27
  "unsloth_version": "2025.6.2",
28
  "use_cache": true,
29
  "use_sliding_window": false,
30
- "vocab_size": 152064
31
  }
 
1
  {
2
  "architectures": [
3
+ "Qwen3ForCausalLM"
4
  ],
5
+ "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "eos_token_id": 151643,
8
+ "head_dim": 128,
9
  "hidden_act": "silu",
10
+ "hidden_size": 2560,
11
  "initializer_range": 0.02,
12
+ "intermediate_size": 9728,
13
  "max_position_embeddings": 32768,
14
+ "max_window_layers": 36,
15
+ "model_type": "qwen3",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 36,
18
+ "num_key_value_heads": 8,
19
+ "pad_token_id": 151654,
20
  "rms_norm_eps": 1e-06,
21
  "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
  "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.52.4",
27
  "unsloth_fixed": true,
28
  "unsloth_version": "2025.6.2",
29
  "use_cache": true,
30
  "use_sliding_window": false,
31
+ "vocab_size": 151936
32
  }
generation_config.json CHANGED
@@ -3,6 +3,6 @@
3
  "eos_token_id": 151643,
4
  "max_length": 32768,
5
  "max_new_tokens": 2048,
6
- "pad_token_id": 151665,
7
  "transformers_version": "4.52.4"
8
  }
 
3
  "eos_token_id": 151643,
4
  "max_length": 32768,
5
  "max_new_tokens": 2048,
6
+ "pad_token_id": 151654,
7
  "transformers_version": "4.52.4"
8
  }
special_tokens_map.json CHANGED
@@ -22,7 +22,7 @@
22
  "single_word": false
23
  },
24
  "pad_token": {
25
- "content": "<|PAD_TOKEN|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
 
22
  "single_word": false
23
  },
24
  "pad_token": {
25
+ "content": "<|vision_pad|>",
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fab42efe8d17406525a9154b728cf9e957629a8ed7ce997770efdd71128c6a1a
3
- size 11422086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654
tokenizer_config.json CHANGED
@@ -179,12 +179,36 @@
179
  "special": false
180
  },
181
  "151665": {
182
- "content": "<|PAD_TOKEN|>",
183
  "lstrip": false,
184
  "normalized": false,
185
  "rstrip": false,
186
  "single_word": false,
187
- "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  },
190
  "additional_special_tokens": [
@@ -208,7 +232,7 @@
208
  "errors": "replace",
209
  "extra_special_tokens": {},
210
  "model_max_length": 32768,
211
- "pad_token": "<|PAD_TOKEN|>",
212
  "padding_side": "right",
213
  "split_special_tokens": false,
214
  "tokenizer_class": "Qwen2Tokenizer",
 
179
  "special": false
180
  },
181
  "151665": {
182
+ "content": "<tool_response>",
183
  "lstrip": false,
184
  "normalized": false,
185
  "rstrip": false,
186
  "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": false
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": false
212
  }
213
  },
214
  "additional_special_tokens": [
 
232
  "errors": "replace",
233
  "extra_special_tokens": {},
234
  "model_max_length": 32768,
235
+ "pad_token": "<|vision_pad|>",
236
  "padding_side": "right",
237
  "split_special_tokens": false,
238
  "tokenizer_class": "Qwen2Tokenizer",