Snider Cladius Maximus commited on
Commit
33447f7
·
1 Parent(s): 17a41c7

verified: all 6 GGUF quants + 3 MLX quants confirmed working

Browse files

Tested via Ollama (GGUF) and mlx-lm (MLX):
- Q3_K_M, Q4_K_M, Q5_K_M, Q6_K, Q8_0, BF16 — all pass
- MLX Q4, Q8, BF16 — all pass

Co-Authored-By: Cladius Maximus <cladius@lethean.io>

Files changed (3) hide show
  1. config.json +44 -1
  2. processor_config.json +16 -49
  3. tokenizer_config.json +41 -20
config.json CHANGED
@@ -154,5 +154,48 @@
154
  "tie_word_embeddings": true,
155
  "transformers_version": "5.5.0.dev0",
156
  "video_token_id": 258884,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  "vision_soft_tokens_per_image": 280
158
- }
 
154
  "tie_word_embeddings": true,
155
  "transformers_version": "5.5.0.dev0",
156
  "video_token_id": 258884,
157
+ "vision_config": {
158
+ "_name_or_path": "",
159
+ "architectures": null,
160
+ "attention_bias": false,
161
+ "attention_dropout": 0.0,
162
+ "chunk_size_feed_forward": 0,
163
+ "default_output_length": 280,
164
+ "dtype": "bfloat16",
165
+ "global_head_dim": 64,
166
+ "head_dim": 64,
167
+ "hidden_activation": "gelu_pytorch_tanh",
168
+ "hidden_size": 768,
169
+ "id2label": {
170
+ "0": "LABEL_0",
171
+ "1": "LABEL_1"
172
+ },
173
+ "initializer_range": 0.02,
174
+ "intermediate_size": 3072,
175
+ "is_encoder_decoder": false,
176
+ "label2id": {
177
+ "LABEL_0": 0,
178
+ "LABEL_1": 1
179
+ },
180
+ "max_position_embeddings": 131072,
181
+ "model_type": "gemma4_vision",
182
+ "num_attention_heads": 12,
183
+ "num_hidden_layers": 16,
184
+ "num_key_value_heads": 12,
185
+ "output_attentions": false,
186
+ "output_hidden_states": false,
187
+ "patch_size": 16,
188
+ "pooling_kernel_size": 3,
189
+ "position_embedding_size": 10240,
190
+ "problem_type": null,
191
+ "return_dict": true,
192
+ "rms_norm_eps": 1e-06,
193
+ "rope_parameters": {
194
+ "rope_theta": 100.0,
195
+ "rope_type": "default"
196
+ },
197
+ "standardize": false,
198
+ "use_clipped_linears": true
199
+ },
200
  "vision_soft_tokens_per_image": 280
201
+ }
processor_config.json CHANGED
@@ -1,27 +1,5 @@
1
  {
2
- "audio_ms_per_token": 40,
3
  "audio_seq_length": 750,
4
- "feature_extractor": {
5
- "dither": 0.0,
6
- "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
- "feature_size": 128,
8
- "fft_length": 512,
9
- "fft_overdrive": false,
10
- "frame_length": 320,
11
- "hop_length": 160,
12
- "input_scale_factor": 1.0,
13
- "max_frequency": 8000.0,
14
- "mel_floor": 0.001,
15
- "min_frequency": 0.0,
16
- "padding_side": "right",
17
- "padding_value": 0.0,
18
- "per_bin_mean": null,
19
- "per_bin_stddev": null,
20
- "preemphasis": 0.0,
21
- "preemphasis_htk_flavor": true,
22
- "return_attention_mask": true,
23
- "sampling_rate": 16000
24
- },
25
  "image_processor": {
26
  "do_convert_rgb": true,
27
  "do_normalize": false,
@@ -43,33 +21,22 @@
43
  "patch_size": 16,
44
  "pooling_kernel_size": 3,
45
  "resample": 3,
46
- "rescale_factor": 0.00392156862745098
 
 
 
 
47
  },
48
  "image_seq_length": 280,
49
  "processor_class": "Gemma4Processor",
50
- "video_processor": {
51
- "do_convert_rgb": true,
52
- "do_normalize": true,
53
- "do_rescale": true,
54
- "do_resize": true,
55
- "do_sample_frames": true,
56
- "image_mean": [
57
- 0.0,
58
- 0.0,
59
- 0.0
60
- ],
61
- "image_std": [
62
- 1.0,
63
- 1.0,
64
- 1.0
65
- ],
66
- "max_soft_tokens": 70,
67
- "num_frames": 32,
68
- "patch_size": 16,
69
- "pooling_kernel_size": 3,
70
- "resample": 3,
71
- "rescale_factor": 0.00392156862745098,
72
- "return_metadata": false,
73
- "video_processor_type": "Gemma4VideoProcessor"
74
- }
75
- }
 
1
  {
 
2
  "audio_seq_length": 750,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "image_processor": {
4
  "do_convert_rgb": true,
5
  "do_normalize": false,
 
21
  "patch_size": 16,
22
  "pooling_kernel_size": 3,
23
  "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
  },
30
  "image_seq_length": 280,
31
  "processor_class": "Gemma4Processor",
32
+ "feature_extractor": {
33
+ "feature_extractor_type": "Gemma4AudioFeatureExtractor",
34
+ "sampling_rate": 16000,
35
+ "num_mel_filters": 128,
36
+ "fft_length": 512,
37
+ "hop_length": 160,
38
+ "chunk_duration": 8.0,
39
+ "overlap_duration": 1.0
40
+ },
41
+ "audio_ms_per_token": 40
42
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -17,50 +17,71 @@
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
 
20
  "mask_token": "<mask>",
21
  "model_max_length": 1000000000000000019884624838656,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "pad_token": "<pad>",
23
  "padding_side": "left",
24
  "processor_class": "Gemma4Processor",
25
  "response_schema": {
26
- "type": "object",
27
  "properties": {
 
 
 
28
  "role": {
29
  "const": "assistant"
30
  },
31
  "thinking": {
32
  "type": "string"
33
  },
34
- "content": {
35
- "type": "string"
36
- },
37
  "tool_calls": {
38
- "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>",
39
- "type": "array",
40
  "items": {
41
- "type": "object",
42
  "properties": {
43
- "type": {
44
- "const": "function"
45
- },
46
  "function": {
47
- "type": "object",
48
- "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})",
49
  "properties": {
50
- "name": {
51
- "type": "string"
52
- },
53
  "arguments": {
 
54
  "type": "object",
55
- "x-parser": "gemma4-tool-call",
56
- "additionalProperties": {}
 
 
57
  }
58
- }
 
 
 
 
 
59
  }
60
- }
61
- }
 
 
 
62
  }
63
  },
 
64
  "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
65
  },
66
  "soc_token": "<|channel>",
 
17
  "<|video|>"
18
  ],
19
  "image_token": "<|image|>",
20
+ "is_local": true,
21
  "mask_token": "<mask>",
22
  "model_max_length": 1000000000000000019884624838656,
23
+ "model_specific_special_tokens": {
24
+ "audio_token": "<|audio|>",
25
+ "boa_token": "<|audio>",
26
+ "boi_token": "<|image>",
27
+ "eoa_token": "<audio|>",
28
+ "eoc_token": "<channel|>",
29
+ "eoi_token": "<image|>",
30
+ "eot_token": "<turn|>",
31
+ "escape_token": "<|\"|>",
32
+ "etc_token": "<tool_call|>",
33
+ "etd_token": "<tool|>",
34
+ "etr_token": "<tool_response|>",
35
+ "image_token": "<|image|>",
36
+ "soc_token": "<|channel>",
37
+ "sot_token": "<|turn>",
38
+ "stc_token": "<|tool_call>",
39
+ "std_token": "<|tool>",
40
+ "str_token": "<|tool_response>",
41
+ "think_token": "<|think|>"
42
+ },
43
  "pad_token": "<pad>",
44
  "padding_side": "left",
45
  "processor_class": "Gemma4Processor",
46
  "response_schema": {
 
47
  "properties": {
48
+ "content": {
49
+ "type": "string"
50
+ },
51
  "role": {
52
  "const": "assistant"
53
  },
54
  "thinking": {
55
  "type": "string"
56
  },
 
 
 
57
  "tool_calls": {
 
 
58
  "items": {
 
59
  "properties": {
 
 
 
60
  "function": {
 
 
61
  "properties": {
 
 
 
62
  "arguments": {
63
+ "additionalProperties": {},
64
  "type": "object",
65
+ "x-parser": "gemma4-tool-call"
66
+ },
67
+ "name": {
68
+ "type": "string"
69
  }
70
+ },
71
+ "type": "object",
72
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
73
+ },
74
+ "type": {
75
+ "const": "function"
76
  }
77
+ },
78
+ "type": "object"
79
+ },
80
+ "type": "array",
81
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
82
  }
83
  },
84
+ "type": "object",
85
  "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<content>(?:(?!\\<\\|tool_call\\>)(?!\\<turn\\|\\>).)+)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?:\\<turn\\|\\>)?"
86
  },
87
  "soc_token": "<|channel>",