Upload folder using huggingface_hub

Browse files

Files changed (14) hide show

README.md +27 -0
added_tokens.json +6 -0
chat_template.jinja +40 -0
chat_template.json +3 -0
config.json +178 -0
generation_config.json +7 -0
model.safetensors +3 -0
model.safetensors.index.json +0 -0
preprocessor_config.json +138 -0
processor_config.json +146 -0
special_tokens_map.json +35 -0
tokenizer.json +0 -0
tokenizer_config.json +21 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+license: apache-2.0
+language:
+- en
+base_model: ibm-granite/granite-vision-3.2-2b
+library_name: mlx
+new_version: ibm-granite/granite-vision-3.3-2b
+tags:
+- mlx
+pipeline_tag: image-text-to-text
+---
+# mlx-community/granite-vision-3.2-2b-6bit
+This model was converted to MLX format from [`ibm-granite/granite-vision-3.2-2b`](https://huggingface.co/ibm-granite/granite-vision-3.2-2b)
+using mlx-vlm version **0.4.3**.
+Refer to the [original model card](https://huggingface.co/ibm-granite/granite-vision-3.2-2b) for more details on the model.
+## Use with mlx
+```bash
+pip install -U mlx-vlm
+```
+```bash
+python -m mlx_vlm.generate --model mlx-community/granite-vision-3.2-2b-6bit --max-tokens 100 --temperature 0.0 --prompt "Describe this image." --image <path_to_image>
+```

added_tokens.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "<|end_of_role|>": 49153,
+    "<|start_of_role|>": 49152,
+    "<|tool_call|>": 49154,
+    "<image>": "49155"
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,40 @@

+{%- if tools %}
+    {{- '<|start_of_role|>available_tools<|end_of_role|>
+' }}
+    {%- for tool in tools %}
+    {{- tool | tojson(indent=4) }}
+    {%- if not loop.last %}
+        {{- '
+' }}
+    {%- endif %}
+    {%- endfor %}
+    {{- '<|end_of_text|>
+' }}
+{%- endif %}
+{%- for message in messages if message['role'] == 'system'%}{% else %}<|system|>
+A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
+{% endfor %}{%- for message in messages %}
+    {%- if message['role'] == 'system' %}
+    {{- '<|system|>
+' + message['content'][0]['text'] + '
+' }}
+    {%- elif message['role'] == 'user' %}<|user|>
+ {# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>
+' }}{% endfor %}{# Render all text next #}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + '
+' }}{% endfor %}
+{%- elif message['role'] == 'assistant' %}
+    {{- '<|assistant|>
+'  + message['content'][0]['text']  + '<|end_of_text|>' }}
+    {%- elif message['role'] == 'assistant_tool_call' %}
+    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'][0]['text']  + '<|end_of_text|>
+' }}
+    {%- elif message['role'] == 'tool_response' %}
+    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'][0]['text'] + '<|end_of_text|>
+' }}
+    {%- endif %}
+    {%- if loop.last and add_generation_prompt %}
+    {{- '<|assistant|>
+' }}
+    {%- endif %}
+{%- endfor %}

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "chat_template": "{%- if tools %}\n    {{- '<|start_of_role|>available_tools<|end_of_role|>\n' }}\n    {%- for tool in tools %}\n    {{- tool | tojson(indent=4) }}\n    {%- if not loop.last %}\n        {{- '\n\n' }}\n    {%- endif %}\n    {%- endfor %}\n    {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in messages if message['role'] == 'system'%}{% else %}<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n{% endfor %}{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n    {{- '<|system|>\n' + message['content'][0]['text'] + '\n' }}\n    {%- elif message['role'] == 'user' %}<|user|>\n {# Render all images first #}{% for content in message['content'] | selectattr('type', 'equalto', 'image') %}{{ '<image>\n' }}{% endfor %}{# Render all text next #}{% for content in message['content'] | selectattr('type', 'equalto', 'text') %}{{ content['text'] + '\n' }}{% endfor %}\n{%- elif message['role'] == 'assistant' %}\n    {{- '<|assistant|>\n'  + message['content'][0]['text']  + '<|end_of_text|>' }}\n    {%- elif message['role'] == 'assistant_tool_call' %}\n    {{- '<|start_of_role|>assistant<|end_of_role|><|tool_call|>' + message['content'][0]['text']  + '<|end_of_text|>\n' }}\n    {%- elif message['role'] == 'tool_response' %}\n    {{- '<|start_of_role|>tool_response<|end_of_role|>' + message['content'][0]['text'] + '<|end_of_text|>\n' }}\n    {%- endif %}\n    {%- if loop.last and add_generation_prompt %}\n    {{- '<|assistant|>\n' }}\n    {%- endif %}\n{%- endfor %}"
+}

config.json ADDED Viewed

	@@ -0,0 +1,178 @@

+{
+    "architectures": [
+        "LlavaNextForConditionalGeneration"
+    ],
+    "image_grid_pinpoints": [
+        [
+            384,
+            384
+        ],
+        [
+            384,
+            768
+        ],
+        [
+            384,
+            1152
+        ],
+        [
+            384,
+            1536
+        ],
+        [
+            384,
+            1920
+        ],
+        [
+            384,
+            2304
+        ],
+        [
+            384,
+            2688
+        ],
+        [
+            384,
+            3072
+        ],
+        [
+            384,
+            3456
+        ],
+        [
+            384,
+            3840
+        ],
+        [
+            768,
+            384
+        ],
+        [
+            768,
+            768
+        ],
+        [
+            768,
+            1152
+        ],
+        [
+            768,
+            1536
+        ],
+        [
+            768,
+            1920
+        ],
+        [
+            1152,
+            384
+        ],
+        [
+            1152,
+            768
+        ],
+        [
+            1152,
+            1152
+        ],
+        [
+            1536,
+            384
+        ],
+        [
+            1536,
+            768
+        ],
+        [
+            1920,
+            384
+        ],
+        [
+            1920,
+            768
+        ],
+        [
+            2304,
+            384
+        ],
+        [
+            2688,
+            384
+        ],
+        [
+            3072,
+            384
+        ],
+        [
+            3456,
+            384
+        ],
+        [
+            3840,
+            384
+        ]
+    ],
+    "image_token_index": 49155,
+    "model_type": "granite_vision",
+    "quantization": {
+        "group_size": 64,
+        "bits": 6,
+        "mode": "affine"
+    },
+    "quantization_config": {
+        "group_size": 64,
+        "bits": 6,
+        "mode": "affine"
+    },
+    "text_config": {
+        "architectures": [
+            "GraniteForCausalLM"
+        ],
+        "attention_bias": false,
+        "attention_dropout": 0.1,
+        "attention_multiplier": 0.015625,
+        "bos_token_id": 0,
+        "embedding_multiplier": 12.0,
+        "eos_token_id": 0,
+        "hidden_act": "silu",
+        "hidden_size": 2048,
+        "initializer_range": 0.02,
+        "intermediate_size": 8192,
+        "logits_scaling": 8.0,
+        "max_position_embeddings": 131072,
+        "mlp_bias": false,
+        "model_type": "granite",
+        "num_attention_heads": 32,
+        "num_hidden_layers": 40,
+        "num_key_value_heads": 8,
+        "pad_token_id": 0,
+        "residual_multiplier": 0.22,
+        "rms_norm_eps": 1e-05,
+        "rope_scaling": null,
+        "rope_theta": 300000,
+        "tie_word_embeddings": true,
+        "torch_dtype": "bfloat16",
+        "transformers_version": "4.46.0.dev0",
+        "use_cache": true,
+        "vocab_size": 49156
+    },
+    "tie_word_embeddings": true,
+    "transformers_version": "4.45.0.dev0",
+    "use_image_newline_parameter": true,
+    "vision_config": {
+        "hidden_size": 1152,
+        "image_size": 384,
+        "intermediate_size": 4304,
+        "model_type": "siglip_vision_model",
+        "num_attention_heads": 16,
+        "num_hidden_layers": 27,
+        "patch_size": 14
+    },
+    "vision_feature_layer": [
+        -24,
+        -20,
+        -12,
+        -1
+    ],
+    "vision_feature_select_strategy": "full"
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 0,
+  "pad_token_id": 0,
+  "transformers_version": "4.45.0.dev0"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0cce0909c7561a3ecfa92af4782c4f2ae9b5e72c04b49b755a7f9744e817acb
+size 3008209166

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,138 @@

+{
+    "crop_size": {
+        "height": 384,
+        "width": 384
+    },
+    "do_convert_rgb": null,
+    "do_normalize": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "image_mean": [
+        0.5,
+        0.5,
+        0.5
+    ],
+    "image_processor_type": "LlavaNextImageProcessor",
+    "image_std": [
+        0.5,
+        0.5,
+        0.5
+    ],
+    "processor_class": "LlavaNextProcessor",
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098,
+    "size": {
+        "height": 384,
+        "width": 384
+    },
+    "image_grid_pinpoints": [
+        [
+            384,
+            384
+        ],
+        [
+            384,
+            768
+        ],
+        [
+            384,
+            1152
+        ],
+        [
+            384,
+            1536
+        ],
+        [
+            384,
+            1920
+        ],
+        [
+            384,
+            2304
+        ],
+        [
+            384,
+            2688
+        ],
+        [
+            384,
+            3072
+        ],
+        [
+            384,
+            3456
+        ],
+        [
+            384,
+            3840
+        ],
+        [
+            768,
+            384
+        ],
+        [
+            768,
+            768
+        ],
+        [
+            768,
+            1152
+        ],
+        [
+            768,
+            1536
+        ],
+        [
+            768,
+            1920
+        ],
+        [
+            1152,
+            384
+        ],
+        [
+            1152,
+            768
+        ],
+        [
+            1152,
+            1152
+        ],
+        [
+            1536,
+            384
+        ],
+        [
+            1536,
+            768
+        ],
+        [
+            1920,
+            384
+        ],
+        [
+            1920,
+            768
+        ],
+        [
+            2304,
+            384
+        ],
+        [
+            2688,
+            384
+        ],
+        [
+            3072,
+            384
+        ],
+        [
+            3456,
+            384
+        ],
+        [
+            3840,
+            384
+        ]
+    ]
+}

processor_config.json ADDED Viewed

	@@ -0,0 +1,146 @@

+{
+  "image_processor": {
+    "crop_size": {
+      "height": 384,
+      "width": 384
+    },
+    "do_center_crop": true,
+    "do_convert_rgb": null,
+    "do_normalize": true,
+    "do_pad": true,
+    "do_rescale": true,
+    "do_resize": true,
+    "image_grid_pinpoints": [
+      [
+        384,
+        384
+      ],
+      [
+        384,
+        768
+      ],
+      [
+        384,
+        1152
+      ],
+      [
+        384,
+        1536
+      ],
+      [
+        384,
+        1920
+      ],
+      [
+        384,
+        2304
+      ],
+      [
+        384,
+        2688
+      ],
+      [
+        384,
+        3072
+      ],
+      [
+        384,
+        3456
+      ],
+      [
+        384,
+        3840
+      ],
+      [
+        768,
+        384
+      ],
+      [
+        768,
+        768
+      ],
+      [
+        768,
+        1152
+      ],
+      [
+        768,
+        1536
+      ],
+      [
+        768,
+        1920
+      ],
+      [
+        1152,
+        384
+      ],
+      [
+        1152,
+        768
+      ],
+      [
+        1152,
+        1152
+      ],
+      [
+        1536,
+        384
+      ],
+      [
+        1536,
+        768
+      ],
+      [
+        1920,
+        384
+      ],
+      [
+        1920,
+        768
+      ],
+      [
+        2304,
+        384
+      ],
+      [
+        2688,
+        384
+      ],
+      [
+        3072,
+        384
+      ],
+      [
+        3456,
+        384
+      ],
+      [
+        3840,
+        384
+      ]
+    ],
+    "image_mean": [
+      0.5,
+      0.5,
+      0.5
+    ],
+    "image_processor_type": "LlavaNextImageProcessor",
+    "image_std": [
+      0.5,
+      0.5,
+      0.5
+    ],
+    "resample": 3,
+    "rescale_factor": 0.00392156862745098,
+    "size": {
+      "height": 384,
+      "width": 384
+    }
+  },
+  "image_token": "<image>",
+  "num_additional_image_tokens": 0,
+  "patch_size": 14,
+  "processor_class": "GraniteVisionProcessor",
+  "vision_feature_select_strategy": "full"
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "additional_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>"
+  ],
+  "bos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|end_of_text|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<|start_of_role|>",
+    "<|end_of_role|>",
+    "<|tool_call|>"
+  ],
+  "is_local": true,
+  "model_max_length": 131072,
+  "pad_token": "<|end_of_text|>",
+  "padding_side": "right",
+  "processor_class": "GraniteVisionProcessor",
+  "tokenizer_class": "TokenizersBackend",
+  "unk_token": "<|end_of_text|>",
+  "vocab_size": 49152
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff