Upload folder using huggingface_hub
Browse files- README.md +146 -1
- config.json +1 -1
- configuration_intern_vit.py +1 -1
- configuration_internvl_chat.py +1 -1
- modeling_intern_vit.py +1 -1
- modeling_phi3.py +9 -0
README.md
CHANGED
|
@@ -57,6 +57,8 @@ Limitations: Although we have made efforts to ensure the safety of the model dur
|
|
| 57 |
|
| 58 |
We provide an example code to run Mini-InternVL-Chat-4B-V1-5 using `transformers`.
|
| 59 |
|
|
|
|
|
|
|
| 60 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
| 61 |
|
| 62 |
```python
|
|
@@ -301,7 +303,150 @@ print(f'Assistant: {response}')
|
|
| 301 |
|
| 302 |
### LMDeploy
|
| 303 |
|
| 304 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
## License
|
| 307 |
|
|
|
|
| 57 |
|
| 58 |
We provide an example code to run Mini-InternVL-Chat-4B-V1-5 using `transformers`.
|
| 59 |
|
| 60 |
+
We also welcome you to experience the InternVL2 series models in our [online demo](https://internvl.opengvlab.com/). Currently, due to the limited GPU resources with public IP addresses, we can only deploy models up to a maximum of 26B. We will expand soon and deploy larger models to the online demo.
|
| 61 |
+
|
| 62 |
> Please use transformers==4.37.2 to ensure the model works normally.
|
| 63 |
|
| 64 |
```python
|
|
|
|
| 303 |
|
| 304 |
### LMDeploy
|
| 305 |
|
| 306 |
+
LMDeploy is a toolkit for compressing, deploying, and serving LLM, developed by the MMRazor and MMDeploy teams.
|
| 307 |
+
|
| 308 |
+
```sh
|
| 309 |
+
pip install lmdeploy
|
| 310 |
+
```
|
| 311 |
+
|
| 312 |
+
LMDeploy abstracts the complex inference process of multi-modal Vision-Language Models (VLM) into an easy-to-use pipeline, similar to the Large Language Model (LLM) inference pipeline.
|
| 313 |
+
|
| 314 |
+
#### A 'Hello, world' example
|
| 315 |
+
|
| 316 |
+
```python
|
| 317 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig
|
| 318 |
+
from lmdeploy.vl import load_image
|
| 319 |
+
|
| 320 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
| 321 |
+
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg')
|
| 322 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
| 323 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
| 324 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
| 325 |
+
response = pipe(('describe this image', image))
|
| 326 |
+
print(response.text)
|
| 327 |
+
```
|
| 328 |
+
|
| 329 |
+
If `ImportError` occurs while executing this case, please install the required dependency packages as prompted.
|
| 330 |
+
|
| 331 |
+
#### Multi-images inference
|
| 332 |
+
|
| 333 |
+
When dealing with multiple images, you can put them all in one list. Keep in mind that multiple images will lead to a higher number of input tokens, and as a result, the size of the context window typically needs to be increased.
|
| 334 |
+
|
| 335 |
+
> Warning: Due to the scarcity of multi-image conversation data, the performance on multi-image tasks may be unstable, and it may require multiple attempts to achieve satisfactory results.
|
| 336 |
+
|
| 337 |
+
```python
|
| 338 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig
|
| 339 |
+
from lmdeploy.vl import load_image
|
| 340 |
+
from lmdeploy.vl.constants import IMAGE_TOKEN
|
| 341 |
+
|
| 342 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
| 343 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
| 344 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
| 345 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
| 346 |
+
|
| 347 |
+
image_urls=[
|
| 348 |
+
'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg',
|
| 349 |
+
'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/det.jpg'
|
| 350 |
+
]
|
| 351 |
+
|
| 352 |
+
images = [load_image(img_url) for img_url in image_urls]
|
| 353 |
+
# Numbering images improves multi-image conversations
|
| 354 |
+
response = pipe((f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\ndescribe these two images', images))
|
| 355 |
+
print(response.text)
|
| 356 |
+
```
|
| 357 |
+
|
| 358 |
+
#### Batch prompts inference
|
| 359 |
+
|
| 360 |
+
Conducting inference with batch prompts is quite straightforward; just place them within a list structure:
|
| 361 |
+
|
| 362 |
+
```python
|
| 363 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig
|
| 364 |
+
from lmdeploy.vl import load_image
|
| 365 |
+
|
| 366 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
| 367 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
| 368 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
| 369 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
| 370 |
+
|
| 371 |
+
image_urls=[
|
| 372 |
+
"https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg",
|
| 373 |
+
"https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/det.jpg"
|
| 374 |
+
]
|
| 375 |
+
prompts = [('describe this image', load_image(img_url)) for img_url in image_urls]
|
| 376 |
+
response = pipe(prompts)
|
| 377 |
+
print(response)
|
| 378 |
+
```
|
| 379 |
+
|
| 380 |
+
#### Multi-turn conversation
|
| 381 |
+
|
| 382 |
+
There are two ways to do the multi-turn conversations with the pipeline. One is to construct messages according to the format of OpenAI and use above introduced method, the other is to use the `pipeline.chat` interface.
|
| 383 |
+
|
| 384 |
+
```python
|
| 385 |
+
from lmdeploy import pipeline, PytorchEngineConfig, ChatTemplateConfig, GenerationConfig
|
| 386 |
+
from lmdeploy.vl import load_image
|
| 387 |
+
|
| 388 |
+
model = 'OpenGVLab/Mini-InternVL-Chat-4B-V1-5'
|
| 389 |
+
chat_template_config = ChatTemplateConfig('internvl-phi3')
|
| 390 |
+
pipe = pipeline(model, chat_template_config=chat_template_config,
|
| 391 |
+
backend_config=PytorchEngineConfig(session_len=8192))
|
| 392 |
+
|
| 393 |
+
image = load_image('https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg')
|
| 394 |
+
gen_config = GenerationConfig(top_k=40, top_p=0.8, temperature=0.8)
|
| 395 |
+
sess = pipe.chat(('describe this image', image), gen_config=gen_config)
|
| 396 |
+
print(sess.response.text)
|
| 397 |
+
sess = pipe.chat('What is the woman doing?', session=sess, gen_config=gen_config)
|
| 398 |
+
print(sess.response.text)
|
| 399 |
+
```
|
| 400 |
+
|
| 401 |
+
#### Service
|
| 402 |
+
|
| 403 |
+
LMDeploy's `api_server` enables models to be easily packed into services with a single command. The provided RESTful APIs are compatible with OpenAI's interfaces. Below are an example of service startup:
|
| 404 |
+
|
| 405 |
+
```shell
|
| 406 |
+
lmdeploy serve api_server OpenGVLab/Mini-InternVL-Chat-4B-V1-5 --model-name Mini-InternVL-Chat-4B-V1-5 --backend pytorch --server-port 23333
|
| 407 |
+
```
|
| 408 |
+
|
| 409 |
+
To use the OpenAI-style interface, you need to install OpenAI:
|
| 410 |
+
|
| 411 |
+
```shell
|
| 412 |
+
pip install openai
|
| 413 |
+
```
|
| 414 |
+
|
| 415 |
+
Then, use the code below to make the API call:
|
| 416 |
+
|
| 417 |
+
```python
|
| 418 |
+
from openai import OpenAI
|
| 419 |
+
|
| 420 |
+
client = OpenAI(api_key='YOUR_API_KEY', base_url='http://0.0.0.0:23333/v1')
|
| 421 |
+
model_name = client.models.list().data[0].id
|
| 422 |
+
response = client.chat.completions.create(
|
| 423 |
+
model="Mini-InternVL-Chat-4B-V1-5",
|
| 424 |
+
messages=[{
|
| 425 |
+
'role':
|
| 426 |
+
'user',
|
| 427 |
+
'content': [{
|
| 428 |
+
'type': 'text',
|
| 429 |
+
'text': 'describe this image',
|
| 430 |
+
}, {
|
| 431 |
+
'type': 'image_url',
|
| 432 |
+
'image_url': {
|
| 433 |
+
'url':
|
| 434 |
+
'https://modelscope.oss-cn-beijing.aliyuncs.com/resource/tiger.jpeg',
|
| 435 |
+
},
|
| 436 |
+
}],
|
| 437 |
+
}],
|
| 438 |
+
temperature=0.8,
|
| 439 |
+
top_p=0.8)
|
| 440 |
+
print(response)
|
| 441 |
+
```
|
| 442 |
+
|
| 443 |
+
### vLLM
|
| 444 |
+
|
| 445 |
+
TODO
|
| 446 |
+
|
| 447 |
+
### Ollama
|
| 448 |
+
|
| 449 |
+
TODO
|
| 450 |
|
| 451 |
## License
|
| 452 |
|
config.json
CHANGED
|
@@ -193,7 +193,7 @@
|
|
| 193 |
"tie_word_embeddings": false,
|
| 194 |
"tokenizer_class": null,
|
| 195 |
"top_k": 50,
|
| 196 |
-
"top_p":
|
| 197 |
"torch_dtype": "bfloat16",
|
| 198 |
"torchscript": false,
|
| 199 |
"transformers_version": "4.37.2",
|
|
|
|
| 193 |
"tie_word_embeddings": false,
|
| 194 |
"tokenizer_class": null,
|
| 195 |
"top_k": 50,
|
| 196 |
+
"top_p": 1.0,
|
| 197 |
"torch_dtype": "bfloat16",
|
| 198 |
"torchscript": false,
|
| 199 |
"transformers_version": "4.37.2",
|
configuration_intern_vit.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# --------------------------------------------------------
|
| 2 |
# InternVL
|
| 3 |
-
# Copyright (c)
|
| 4 |
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
# --------------------------------------------------------
|
| 6 |
import os
|
|
|
|
| 1 |
# --------------------------------------------------------
|
| 2 |
# InternVL
|
| 3 |
+
# Copyright (c) 2024 OpenGVLab
|
| 4 |
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
# --------------------------------------------------------
|
| 6 |
import os
|
configuration_internvl_chat.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# --------------------------------------------------------
|
| 2 |
# InternVL
|
| 3 |
-
# Copyright (c)
|
| 4 |
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
# --------------------------------------------------------
|
| 6 |
|
|
|
|
| 1 |
# --------------------------------------------------------
|
| 2 |
# InternVL
|
| 3 |
+
# Copyright (c) 2024 OpenGVLab
|
| 4 |
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
# --------------------------------------------------------
|
| 6 |
|
modeling_intern_vit.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# --------------------------------------------------------
|
| 2 |
# InternVL
|
| 3 |
-
# Copyright (c)
|
| 4 |
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
# --------------------------------------------------------
|
| 6 |
from typing import Optional, Tuple, Union
|
|
|
|
| 1 |
# --------------------------------------------------------
|
| 2 |
# InternVL
|
| 3 |
+
# Copyright (c) 2024 OpenGVLab
|
| 4 |
# Licensed under The MIT License [see LICENSE for details]
|
| 5 |
# --------------------------------------------------------
|
| 6 |
from typing import Optional, Tuple, Union
|
modeling_phi3.py
CHANGED
|
@@ -53,6 +53,7 @@ try:
|
|
| 53 |
unpad_input)
|
| 54 |
|
| 55 |
_flash_supports_window_size = 'window_size' in list(inspect.signature(flash_attn_func).parameters)
|
|
|
|
| 56 |
except ImportError as error:
|
| 57 |
logger.warning(
|
| 58 |
f'`flash-attention` package not found, consider installing for better performance: {error}.'
|
|
@@ -61,6 +62,7 @@ except ImportError as error:
|
|
| 61 |
logger.warning(
|
| 62 |
"Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`."
|
| 63 |
)
|
|
|
|
| 64 |
|
| 65 |
_CHECKPOINT_FOR_DOC = 'microsoft/Phi-3-mini-4k-instruct'
|
| 66 |
_CONFIG_FOR_DOC = 'Phi3Config'
|
|
@@ -937,6 +939,12 @@ class Phi3PreTrainedModel(PreTrainedModel):
|
|
| 937 |
|
| 938 |
_version = '0.0.5'
|
| 939 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 940 |
def _init_weights(self, module):
|
| 941 |
std = self.config.initializer_range
|
| 942 |
if isinstance(module, nn.Linear):
|
|
@@ -1042,6 +1050,7 @@ class Phi3Model(Phi3PreTrainedModel):
|
|
| 1042 |
[Phi3DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
|
| 1043 |
)
|
| 1044 |
self._attn_implementation = config._attn_implementation
|
|
|
|
| 1045 |
self.norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1046 |
|
| 1047 |
self.gradient_checkpointing = False
|
|
|
|
| 53 |
unpad_input)
|
| 54 |
|
| 55 |
_flash_supports_window_size = 'window_size' in list(inspect.signature(flash_attn_func).parameters)
|
| 56 |
+
has_flash_attn = True
|
| 57 |
except ImportError as error:
|
| 58 |
logger.warning(
|
| 59 |
f'`flash-attention` package not found, consider installing for better performance: {error}.'
|
|
|
|
| 62 |
logger.warning(
|
| 63 |
"Current `flash-attenton` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`."
|
| 64 |
)
|
| 65 |
+
has_flash_attn = False
|
| 66 |
|
| 67 |
_CHECKPOINT_FOR_DOC = 'microsoft/Phi-3-mini-4k-instruct'
|
| 68 |
_CONFIG_FOR_DOC = 'Phi3Config'
|
|
|
|
| 939 |
|
| 940 |
_version = '0.0.5'
|
| 941 |
|
| 942 |
+
def __init__(self, config: Phi3Config):
|
| 943 |
+
if not has_flash_attn:
|
| 944 |
+
config._attn_implementation = 'eager'
|
| 945 |
+
print('Warning: Flash attention is not available, using eager attention instead.')
|
| 946 |
+
super().__init__(config)
|
| 947 |
+
|
| 948 |
def _init_weights(self, module):
|
| 949 |
std = self.config.initializer_range
|
| 950 |
if isinstance(module, nn.Linear):
|
|
|
|
| 1050 |
[Phi3DecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)]
|
| 1051 |
)
|
| 1052 |
self._attn_implementation = config._attn_implementation
|
| 1053 |
+
|
| 1054 |
self.norm = Phi3RMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 1055 |
|
| 1056 |
self.gradient_checkpointing = False
|