Image-Text-to-Text
Transformers
PyTorch
multilingual
internvl_chat
feature-extraction
internvl
custom_code
conversational
Instructions to use OpenGVLab/InternVL2_5-4B-AWQ with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use OpenGVLab/InternVL2_5-4B-AWQ with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="OpenGVLab/InternVL2_5-4B-AWQ", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("OpenGVLab/InternVL2_5-4B-AWQ", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use OpenGVLab/InternVL2_5-4B-AWQ with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "OpenGVLab/InternVL2_5-4B-AWQ" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OpenGVLab/InternVL2_5-4B-AWQ", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/OpenGVLab/InternVL2_5-4B-AWQ
- SGLang
How to use OpenGVLab/InternVL2_5-4B-AWQ with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "OpenGVLab/InternVL2_5-4B-AWQ" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OpenGVLab/InternVL2_5-4B-AWQ", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "OpenGVLab/InternVL2_5-4B-AWQ" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "OpenGVLab/InternVL2_5-4B-AWQ", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use OpenGVLab/InternVL2_5-4B-AWQ with Docker Model Runner:
docker model run hf.co/OpenGVLab/InternVL2_5-4B-AWQ
Add supports_gradient_checkpointing
Browse files- .cache/huggingface/.gitignore +1 -0
- .cache/huggingface/download/.gitattributes.lock +0 -0
- .cache/huggingface/download/.gitattributes.metadata +3 -0
- .cache/huggingface/download/README.md.lock +0 -0
- .cache/huggingface/download/README.md.metadata +3 -0
- .cache/huggingface/download/added_tokens.json.lock +0 -0
- .cache/huggingface/download/added_tokens.json.metadata +3 -0
- .cache/huggingface/download/config.json.lock +0 -0
- .cache/huggingface/download/config.json.metadata +3 -0
- .cache/huggingface/download/configuration_intern_vit.py.lock +0 -0
- .cache/huggingface/download/configuration_intern_vit.py.metadata +3 -0
- .cache/huggingface/download/configuration_internvl_chat.py.lock +0 -0
- .cache/huggingface/download/configuration_internvl_chat.py.metadata +3 -0
- .cache/huggingface/download/conversation.py.lock +0 -0
- .cache/huggingface/download/conversation.py.metadata +3 -0
- .cache/huggingface/download/generation_config.json.lock +0 -0
- .cache/huggingface/download/generation_config.json.metadata +3 -0
- .cache/huggingface/download/inputs_stats.pth.lock +0 -0
- .cache/huggingface/download/inputs_stats.pth.metadata +3 -0
- .cache/huggingface/download/merges.txt.lock +0 -0
- .cache/huggingface/download/merges.txt.metadata +3 -0
- .cache/huggingface/download/modeling_intern_vit.py.lock +0 -0
- .cache/huggingface/download/modeling_intern_vit.py.metadata +3 -0
- .cache/huggingface/download/modeling_internvl_chat.py.lock +0 -0
- .cache/huggingface/download/modeling_internvl_chat.py.metadata +3 -0
- .cache/huggingface/download/outputs_stats.pth.lock +0 -0
- .cache/huggingface/download/outputs_stats.pth.metadata +3 -0
- .cache/huggingface/download/preprocessor_config.json.lock +0 -0
- .cache/huggingface/download/preprocessor_config.json.metadata +3 -0
- .cache/huggingface/download/pytorch_model-00001-of-00002.bin.lock +0 -0
- .cache/huggingface/download/pytorch_model-00001-of-00002.bin.metadata +3 -0
- .cache/huggingface/download/pytorch_model-00002-of-00002.bin.lock +0 -0
- .cache/huggingface/download/pytorch_model-00002-of-00002.bin.metadata +3 -0
- .cache/huggingface/download/pytorch_model.bin.index.json.lock +0 -0
- .cache/huggingface/download/pytorch_model.bin.index.json.metadata +3 -0
- .cache/huggingface/download/special_tokens_map.json.lock +0 -0
- .cache/huggingface/download/special_tokens_map.json.metadata +3 -0
- .cache/huggingface/download/tokenizer.json.lock +0 -0
- .cache/huggingface/download/tokenizer.json.metadata +3 -0
- .cache/huggingface/download/tokenizer_config.json.lock +0 -0
- .cache/huggingface/download/tokenizer_config.json.metadata +3 -0
- .cache/huggingface/download/vocab.json.lock +0 -0
- .cache/huggingface/download/vocab.json.metadata +3 -0
- configuration_internvl_chat.py +2 -0
- modeling_intern_vit.py +1 -0
- modeling_internvl_chat.py +11 -0
.cache/huggingface/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*
|
.cache/huggingface/download/.gitattributes.lock
ADDED
|
File without changes
|
.cache/huggingface/download/.gitattributes.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
52373fe24473b1aa44333d318f578ae6bf04b49b
|
| 3 |
+
1734456791.0596795
|
.cache/huggingface/download/README.md.lock
ADDED
|
File without changes
|
.cache/huggingface/download/README.md.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
50c19d24e67d8074a25c9eea234d1cf6438c740b
|
| 3 |
+
1734456791.1753678
|
.cache/huggingface/download/added_tokens.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/added_tokens.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
dd972e4080e791eab591742c1168ee7fd6279146
|
| 3 |
+
1734456791.1994705
|
.cache/huggingface/download/config.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
855bd81ff3939cf16c978e81a32b6e56c672e04c
|
| 3 |
+
1734456791.1441224
|
.cache/huggingface/download/configuration_intern_vit.py.lock
ADDED
|
File without changes
|
.cache/huggingface/download/configuration_intern_vit.py.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
7e630c456eb9cf350e55bf850c3ff72f445a7e17
|
| 3 |
+
1734456791.089145
|
.cache/huggingface/download/configuration_internvl_chat.py.lock
ADDED
|
File without changes
|
.cache/huggingface/download/configuration_internvl_chat.py.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
799209432caf749e77de4c889ec03fe6a32fcdf9
|
| 3 |
+
1734456791.1727533
|
.cache/huggingface/download/conversation.py.lock
ADDED
|
File without changes
|
.cache/huggingface/download/conversation.py.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
5a771766f21ce3aeeb99b286fb8d188b0038a547
|
| 3 |
+
1734456791.044592
|
.cache/huggingface/download/generation_config.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/generation_config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
ad56757950bfa85900de6f41f11ef4cca19c6c93
|
| 3 |
+
1734456791.1714978
|
.cache/huggingface/download/inputs_stats.pth.lock
ADDED
|
File without changes
|
.cache/huggingface/download/inputs_stats.pth.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
46db3d6cad81aacccef721ea2b5ab612e8db4fc985dad930e7264cd8ff21f3c2
|
| 3 |
+
1734456794.4679294
|
.cache/huggingface/download/merges.txt.lock
ADDED
|
File without changes
|
.cache/huggingface/download/merges.txt.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
31349551d90c7606f325fe0f11bbb8bd5fa0d7c7
|
| 3 |
+
1734456794.464309
|
.cache/huggingface/download/modeling_intern_vit.py.lock
ADDED
|
File without changes
|
.cache/huggingface/download/modeling_intern_vit.py.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
1c5c043a4b860720b3b6e55107e8e6ecf0c573de
|
| 3 |
+
1734456791.741343
|
.cache/huggingface/download/modeling_internvl_chat.py.lock
ADDED
|
File without changes
|
.cache/huggingface/download/modeling_internvl_chat.py.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
41f48cd5cb907e025725fc42ac819cf3f03c01b5
|
| 3 |
+
1734456791.6780381
|
.cache/huggingface/download/outputs_stats.pth.lock
ADDED
|
File without changes
|
.cache/huggingface/download/outputs_stats.pth.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
0a1e5ab3e5bc804a219212405819a71d42e5f5d74459e16fa6d566f8a85a13cc
|
| 3 |
+
1734456800.2738512
|
.cache/huggingface/download/preprocessor_config.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/preprocessor_config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
dfd7e50d9d4e67cd679b16b337b419a0c6cfa849
|
| 3 |
+
1734456791.7072408
|
.cache/huggingface/download/pytorch_model-00001-of-00002.bin.lock
ADDED
|
File without changes
|
.cache/huggingface/download/pytorch_model-00001-of-00002.bin.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
483e318a80579169bef059579ff3dc6537a00d2687273d2e2ea930b9b256ab1d
|
| 3 |
+
1734465176.732157
|
.cache/huggingface/download/pytorch_model-00002-of-00002.bin.lock
ADDED
|
File without changes
|
.cache/huggingface/download/pytorch_model-00002-of-00002.bin.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
67802cbbb66929b00889f9f45e996cfbbf6c2a54e710d3b2edd248f3433ddb83
|
| 3 |
+
1734497137.5813448
|
.cache/huggingface/download/pytorch_model.bin.index.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/pytorch_model.bin.index.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
a541a2170f4d2ec99333725e52addf2920a53f9e
|
| 3 |
+
1734456793.2043414
|
.cache/huggingface/download/special_tokens_map.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/special_tokens_map.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
74ea6ef5bd52e11d45791fbab7cca3dc984942fa
|
| 3 |
+
1734456793.195179
|
.cache/huggingface/download/tokenizer.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/tokenizer.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
34a2790c1c37a3f4774fef44480b2b50e3c0f40f2122d26e057f249460b8735d
|
| 3 |
+
1734456811.4525309
|
.cache/huggingface/download/tokenizer_config.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/tokenizer_config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
a8e0f5b3e9c57572779dbead982ccd39aa8e8960
|
| 3 |
+
1734456794.3733914
|
.cache/huggingface/download/vocab.json.lock
ADDED
|
File without changes
|
.cache/huggingface/download/vocab.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
6d7aff90716e34e40b62b1d20d52134a2a390211
|
| 2 |
+
4783fe10ac3adce15ac8f358ef5462739852c569
|
| 3 |
+
1734456796.272894
|
configuration_internvl_chat.py
CHANGED
|
@@ -63,6 +63,8 @@ class InternVLChatConfig(PretrainedConfig):
|
|
| 63 |
self.ps_version = ps_version # pixel shuffle version
|
| 64 |
self.min_dynamic_patch = min_dynamic_patch
|
| 65 |
self.max_dynamic_patch = max_dynamic_patch
|
|
|
|
|
|
|
| 66 |
|
| 67 |
logger.info(f'vision_select_layer: {self.select_layer}')
|
| 68 |
logger.info(f'ps_version: {self.ps_version}')
|
|
|
|
| 63 |
self.ps_version = ps_version # pixel shuffle version
|
| 64 |
self.min_dynamic_patch = min_dynamic_patch
|
| 65 |
self.max_dynamic_patch = max_dynamic_patch
|
| 66 |
+
# By default, we use tie_word_embeddings=False for models of all sizes.
|
| 67 |
+
self.tie_word_embeddings = self.llm_config.tie_word_embeddings
|
| 68 |
|
| 69 |
logger.info(f'vision_select_layer: {self.select_layer}')
|
| 70 |
logger.info(f'ps_version: {self.ps_version}')
|
modeling_intern_vit.py
CHANGED
|
@@ -364,6 +364,7 @@ class InternVisionEncoder(nn.Module):
|
|
| 364 |
class InternVisionModel(PreTrainedModel):
|
| 365 |
main_input_name = 'pixel_values'
|
| 366 |
_supports_flash_attn_2 = True
|
|
|
|
| 367 |
config_class = InternVisionConfig
|
| 368 |
_no_split_modules = ['InternVisionEncoderLayer']
|
| 369 |
|
|
|
|
| 364 |
class InternVisionModel(PreTrainedModel):
|
| 365 |
main_input_name = 'pixel_values'
|
| 366 |
_supports_flash_attn_2 = True
|
| 367 |
+
supports_gradient_checkpointing = True
|
| 368 |
config_class = InternVisionConfig
|
| 369 |
_no_split_modules = ['InternVisionEncoderLayer']
|
| 370 |
|
modeling_internvl_chat.py
CHANGED
|
@@ -37,6 +37,7 @@ class InternVLChatModel(PreTrainedModel):
|
|
| 37 |
main_input_name = 'pixel_values'
|
| 38 |
base_model_prefix = 'language_model'
|
| 39 |
_supports_flash_attn_2 = True
|
|
|
|
| 40 |
_no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
|
| 41 |
|
| 42 |
def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
|
|
@@ -346,3 +347,13 @@ class InternVLChatModel(PreTrainedModel):
|
|
| 346 |
)
|
| 347 |
|
| 348 |
return outputs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
main_input_name = 'pixel_values'
|
| 38 |
base_model_prefix = 'language_model'
|
| 39 |
_supports_flash_attn_2 = True
|
| 40 |
+
supports_gradient_checkpointing = True
|
| 41 |
_no_split_modules = ['InternVisionModel', 'LlamaDecoderLayer', 'Qwen2DecoderLayer']
|
| 42 |
|
| 43 |
def __init__(self, config: InternVLChatConfig, vision_model=None, language_model=None, use_flash_attn=True):
|
|
|
|
| 347 |
)
|
| 348 |
|
| 349 |
return outputs
|
| 350 |
+
|
| 351 |
+
@property
|
| 352 |
+
def lm_head(self):
|
| 353 |
+
return self.language_model.get_output_embeddings()
|
| 354 |
+
|
| 355 |
+
def get_input_embeddings(self):
|
| 356 |
+
return self.language_model.get_input_embeddings()
|
| 357 |
+
|
| 358 |
+
def get_output_embeddings(self):
|
| 359 |
+
return self.language_model.get_output_embeddings()
|