prince-canuma commited on
Commit
cad01d4
·
verified ·
1 Parent(s): 87bf6bd

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ base_model: ibm-granite/granite-vision-3.2-2b
6
+ library_name: mlx
7
+ new_version: ibm-granite/granite-vision-3.3-2b
8
+ tags:
9
+ - mlx
10
+ pipeline_tag: image-text-to-text
11
+ ---
12
+
13
+ # mlx-community/granite-vision-3.2-2b-6bit
14
+
15
+ This model was converted to MLX format from [`ibm-granite/granite-vision-3.2-2b`](https://huggingface.co/ibm-granite/granite-vision-3.2-2b)
16
+ using mlx-vlm version **0.4.3**.
17
+ Refer to the [original model card](https://huggingface.co/ibm-granite/granite-vision-3.2-2b) for more details on the model.
18
+
19
+ ## Use with mlx
20
+
21
+ ```bash
22
+ pip install -U mlx-vlm
23
+ ```
24
+
25
+ ```bash
26
+ python -m mlx_vlm.generate --model mlx-community/granite-vision-3.2-2b-6bit --max-tokens 100 --temperature 0.0 --prompt "Describe this image." --image <path_to_image>
27
+ ```
added_tokens.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "<|end_of_role|>": 49153,
3
+ "<|start_of_role|>": 49152,
4
+ "<|tool_call|>": 49154,
5
+ "<image>": "49155"
6
+ }
chat_template.jinja ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|start_of_role|>available_tools<|end_of_role|>
3
+ ' }}
4
+ {%- for tool in tools %}
5
+ {{- tool | tojson(indent=4) }}
6
+ {%- if not loop.last %}
7
+ {{- '
8
+
9
+ ' }}
10
+ {%- endif %}
11
+ {%- endfor %}
12
+ {{- '<|end_of_text|>
13
+ ' }}
14
+ {%- endif %}
15
+ {%- for message in messages if message['role'] == 'system'%}{% else %}<|system|>
16
+ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
17
+ {% endfor %}{%- for message in messages %}
18
+ {%- if message['role'] == 'system' %}
19
+ {{- '<|system|>
20
+ ' + message['content'][0]['text'] + '
21
+ ' }}
22
+ {%- elif message['role'] == 'user' %}<|user|>
23
+ {# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>
24
+ ' }}{% endfor %}{# Render all text next #}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + '
25
+ ' }}{% endfor %}
26
+ {%- elif message['role'] == 'assistant' %}
27
+ {{- '<|assistant|>
28
+ ' + message['content'][0]['text'] + '<|end_of_text|>' }}
29
+ {%- elif message['role'] == 'assistant_tool_call' %}
30
+ {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'][0]['text'] + '<|end_of_text|>
31
+ ' }}
32
+ {%- elif message['role'] == 'tool_response' %}
33
+ {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'][0]['text'] + '<|end_of_text|>
34
+ ' }}
35
+ {%- endif %}
36
+ {%- if loop.last and add_generation_prompt %}
37
+ {{- '<|assistant|>
38
+ ' }}
39
+ {%- endif %}
40
+ {%- endfor %}
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{%- if tools %}\n {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n {%- for tool in tools %}\n {{- tool | tojson(indent=4) }}\n {%- if not loop.last %}\n {{- '\n\n' }}\n {%- endif %}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages if message['role'] == 'system'%}{% else %}<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n{% endfor %}{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n {{- '<|system|>\n' + message['content'][0]['text'] + '\n' }}\n {%- elif message['role'] == 'user' %}<|user|>\n {# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + '\n' }}{% endfor %}\n{%- elif message['role'] == 'assistant' %}\n {{- '<|assistant|>\n' + message['content'][0]['text'] + '<|end_of_text|>' }}\n {%- elif message['role'] == 'assistant_tool_call' %}\n {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'][0]['text'] + '<|end_of_text|>\n' }}\n {%- elif message['role'] == 'tool_response' %}\n {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'][0]['text'] + '<|end_of_text|>\n' }}\n {%- endif %}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|assistant|>\n' }}\n {%- endif %}\n{%- endfor %}"
3
+ }
config.json ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlavaNextForConditionalGeneration"
4
+ ],
5
+ "image_grid_pinpoints": [
6
+ [
7
+ 384,
8
+ 384
9
+ ],
10
+ [
11
+ 384,
12
+ 768
13
+ ],
14
+ [
15
+ 384,
16
+ 1152
17
+ ],
18
+ [
19
+ 384,
20
+ 1536
21
+ ],
22
+ [
23
+ 384,
24
+ 1920
25
+ ],
26
+ [
27
+ 384,
28
+ 2304
29
+ ],
30
+ [
31
+ 384,
32
+ 2688
33
+ ],
34
+ [
35
+ 384,
36
+ 3072
37
+ ],
38
+ [
39
+ 384,
40
+ 3456
41
+ ],
42
+ [
43
+ 384,
44
+ 3840
45
+ ],
46
+ [
47
+ 768,
48
+ 384
49
+ ],
50
+ [
51
+ 768,
52
+ 768
53
+ ],
54
+ [
55
+ 768,
56
+ 1152
57
+ ],
58
+ [
59
+ 768,
60
+ 1536
61
+ ],
62
+ [
63
+ 768,
64
+ 1920
65
+ ],
66
+ [
67
+ 1152,
68
+ 384
69
+ ],
70
+ [
71
+ 1152,
72
+ 768
73
+ ],
74
+ [
75
+ 1152,
76
+ 1152
77
+ ],
78
+ [
79
+ 1536,
80
+ 384
81
+ ],
82
+ [
83
+ 1536,
84
+ 768
85
+ ],
86
+ [
87
+ 1920,
88
+ 384
89
+ ],
90
+ [
91
+ 1920,
92
+ 768
93
+ ],
94
+ [
95
+ 2304,
96
+ 384
97
+ ],
98
+ [
99
+ 2688,
100
+ 384
101
+ ],
102
+ [
103
+ 3072,
104
+ 384
105
+ ],
106
+ [
107
+ 3456,
108
+ 384
109
+ ],
110
+ [
111
+ 3840,
112
+ 384
113
+ ]
114
+ ],
115
+ "image_token_index": 49155,
116
+ "model_type": "granite_vision",
117
+ "quantization": {
118
+ "group_size": 64,
119
+ "bits": 6,
120
+ "mode": "affine"
121
+ },
122
+ "quantization_config": {
123
+ "group_size": 64,
124
+ "bits": 6,
125
+ "mode": "affine"
126
+ },
127
+ "text_config": {
128
+ "architectures": [
129
+ "GraniteForCausalLM"
130
+ ],
131
+ "attention_bias": false,
132
+ "attention_dropout": 0.1,
133
+ "attention_multiplier": 0.015625,
134
+ "bos_token_id": 0,
135
+ "embedding_multiplier": 12.0,
136
+ "eos_token_id": 0,
137
+ "hidden_act": "silu",
138
+ "hidden_size": 2048,
139
+ "initializer_range": 0.02,
140
+ "intermediate_size": 8192,
141
+ "logits_scaling": 8.0,
142
+ "max_position_embeddings": 131072,
143
+ "mlp_bias": false,
144
+ "model_type": "granite",
145
+ "num_attention_heads": 32,
146
+ "num_hidden_layers": 40,
147
+ "num_key_value_heads": 8,
148
+ "pad_token_id": 0,
149
+ "residual_multiplier": 0.22,
150
+ "rms_norm_eps": 1e-05,
151
+ "rope_scaling": null,
152
+ "rope_theta": 300000,
153
+ "tie_word_embeddings": true,
154
+ "torch_dtype": "bfloat16",
155
+ "transformers_version": "4.46.0.dev0",
156
+ "use_cache": true,
157
+ "vocab_size": 49156
158
+ },
159
+ "tie_word_embeddings": true,
160
+ "transformers_version": "4.45.0.dev0",
161
+ "use_image_newline_parameter": true,
162
+ "vision_config": {
163
+ "hidden_size": 1152,
164
+ "image_size": 384,
165
+ "intermediate_size": 4304,
166
+ "model_type": "siglip_vision_model",
167
+ "num_attention_heads": 16,
168
+ "num_hidden_layers": 27,
169
+ "patch_size": 14
170
+ },
171
+ "vision_feature_layer": [
172
+ -24,
173
+ -20,
174
+ -12,
175
+ -1
176
+ ],
177
+ "vision_feature_select_strategy": "full"
178
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.45.0.dev0"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0cce0909c7561a3ecfa92af4782c4f2ae9b5e72c04b49b755a7f9744e817acb
3
+ size 3008209166
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 384,
4
+ "width": 384
5
+ },
6
+ "do_convert_rgb": null,
7
+ "do_normalize": true,
8
+ "do_rescale": true,
9
+ "do_resize": true,
10
+ "image_mean": [
11
+ 0.5,
12
+ 0.5,
13
+ 0.5
14
+ ],
15
+ "image_processor_type": "LlavaNextImageProcessor",
16
+ "image_std": [
17
+ 0.5,
18
+ 0.5,
19
+ 0.5
20
+ ],
21
+ "processor_class": "LlavaNextProcessor",
22
+ "resample": 3,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "height": 384,
26
+ "width": 384
27
+ },
28
+ "image_grid_pinpoints": [
29
+ [
30
+ 384,
31
+ 384
32
+ ],
33
+ [
34
+ 384,
35
+ 768
36
+ ],
37
+ [
38
+ 384,
39
+ 1152
40
+ ],
41
+ [
42
+ 384,
43
+ 1536
44
+ ],
45
+ [
46
+ 384,
47
+ 1920
48
+ ],
49
+ [
50
+ 384,
51
+ 2304
52
+ ],
53
+ [
54
+ 384,
55
+ 2688
56
+ ],
57
+ [
58
+ 384,
59
+ 3072
60
+ ],
61
+ [
62
+ 384,
63
+ 3456
64
+ ],
65
+ [
66
+ 384,
67
+ 3840
68
+ ],
69
+ [
70
+ 768,
71
+ 384
72
+ ],
73
+ [
74
+ 768,
75
+ 768
76
+ ],
77
+ [
78
+ 768,
79
+ 1152
80
+ ],
81
+ [
82
+ 768,
83
+ 1536
84
+ ],
85
+ [
86
+ 768,
87
+ 1920
88
+ ],
89
+ [
90
+ 1152,
91
+ 384
92
+ ],
93
+ [
94
+ 1152,
95
+ 768
96
+ ],
97
+ [
98
+ 1152,
99
+ 1152
100
+ ],
101
+ [
102
+ 1536,
103
+ 384
104
+ ],
105
+ [
106
+ 1536,
107
+ 768
108
+ ],
109
+ [
110
+ 1920,
111
+ 384
112
+ ],
113
+ [
114
+ 1920,
115
+ 768
116
+ ],
117
+ [
118
+ 2304,
119
+ 384
120
+ ],
121
+ [
122
+ 2688,
123
+ 384
124
+ ],
125
+ [
126
+ 3072,
127
+ 384
128
+ ],
129
+ [
130
+ 3456,
131
+ 384
132
+ ],
133
+ [
134
+ 3840,
135
+ 384
136
+ ]
137
+ ]
138
+ }
processor_config.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "crop_size": {
4
+ "height": 384,
5
+ "width": 384
6
+ },
7
+ "do_center_crop": true,
8
+ "do_convert_rgb": null,
9
+ "do_normalize": true,
10
+ "do_pad": true,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_grid_pinpoints": [
14
+ [
15
+ 384,
16
+ 384
17
+ ],
18
+ [
19
+ 384,
20
+ 768
21
+ ],
22
+ [
23
+ 384,
24
+ 1152
25
+ ],
26
+ [
27
+ 384,
28
+ 1536
29
+ ],
30
+ [
31
+ 384,
32
+ 1920
33
+ ],
34
+ [
35
+ 384,
36
+ 2304
37
+ ],
38
+ [
39
+ 384,
40
+ 2688
41
+ ],
42
+ [
43
+ 384,
44
+ 3072
45
+ ],
46
+ [
47
+ 384,
48
+ 3456
49
+ ],
50
+ [
51
+ 384,
52
+ 3840
53
+ ],
54
+ [
55
+ 768,
56
+ 384
57
+ ],
58
+ [
59
+ 768,
60
+ 768
61
+ ],
62
+ [
63
+ 768,
64
+ 1152
65
+ ],
66
+ [
67
+ 768,
68
+ 1536
69
+ ],
70
+ [
71
+ 768,
72
+ 1920
73
+ ],
74
+ [
75
+ 1152,
76
+ 384
77
+ ],
78
+ [
79
+ 1152,
80
+ 768
81
+ ],
82
+ [
83
+ 1152,
84
+ 1152
85
+ ],
86
+ [
87
+ 1536,
88
+ 384
89
+ ],
90
+ [
91
+ 1536,
92
+ 768
93
+ ],
94
+ [
95
+ 1920,
96
+ 384
97
+ ],
98
+ [
99
+ 1920,
100
+ 768
101
+ ],
102
+ [
103
+ 2304,
104
+ 384
105
+ ],
106
+ [
107
+ 2688,
108
+ 384
109
+ ],
110
+ [
111
+ 3072,
112
+ 384
113
+ ],
114
+ [
115
+ 3456,
116
+ 384
117
+ ],
118
+ [
119
+ 3840,
120
+ 384
121
+ ]
122
+ ],
123
+ "image_mean": [
124
+ 0.5,
125
+ 0.5,
126
+ 0.5
127
+ ],
128
+ "image_processor_type": "LlavaNextImageProcessor",
129
+ "image_std": [
130
+ 0.5,
131
+ 0.5,
132
+ 0.5
133
+ ],
134
+ "resample": 3,
135
+ "rescale_factor": 0.00392156862745098,
136
+ "size": {
137
+ "height": 384,
138
+ "width": 384
139
+ }
140
+ },
141
+ "image_token": "<image>",
142
+ "num_additional_image_tokens": 0,
143
+ "patch_size": 14,
144
+ "processor_class": "GraniteVisionProcessor",
145
+ "vision_feature_select_strategy": "full"
146
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|start_of_role|>",
4
+ "<|end_of_role|>",
5
+ "<|tool_call|>"
6
+ ],
7
+ "bos_token": {
8
+ "content": "<|end_of_text|>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "eos_token": {
15
+ "content": "<|end_of_text|>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "pad_token": {
22
+ "content": "<|end_of_text|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "unk_token": {
29
+ "content": "<|end_of_text|>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|end_of_text|>",
5
+ "clean_up_tokenization_spaces": true,
6
+ "eos_token": "<|end_of_text|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|start_of_role|>",
10
+ "<|end_of_role|>",
11
+ "<|tool_call|>"
12
+ ],
13
+ "is_local": true,
14
+ "model_max_length": 131072,
15
+ "pad_token": "<|end_of_text|>",
16
+ "padding_side": "right",
17
+ "processor_class": "GraniteVisionProcessor",
18
+ "tokenizer_class": "TokenizersBackend",
19
+ "unk_token": "<|end_of_text|>",
20
+ "vocab_size": 49152
21
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff