Update README.md
Browse files
README.md
CHANGED
|
@@ -16,13 +16,15 @@ pipeline_tag: image-feature-extraction
|
|
| 16 |
<img src="https://cdn-uploads.huggingface.co/production/uploads/64119264f0f81eb569e0d569/2yzk5wUY-obL6H4rKiHlU.webp" alt="Image Description" width="300" height="300">
|
| 17 |
</p>
|
| 18 |
|
| 19 |
-
\[
|
|
|
|
|
|
|
| 20 |
|
| 21 |
| Model | Date | Download | Note |
|
| 22 |
| ----------------------- | ---------- | ---------------------------------------------------------------------- | -------------------------------- |
|
| 23 |
-
| InternViT-6B-448px-V1
|
| 24 |
-
| InternViT-6B-448px-V1
|
| 25 |
-
| InternViT-6B-448px-V1
|
| 26 |
| InternViT-6B-224px | 2023.12.22 | π€ [HF link](https://huggingface.co/OpenGVLab/InternViT-6B-224px) | vision foundation model |
|
| 27 |
| InternVL-14B-224px | 2023.12.22 | π€ [HF link](https://huggingface.co/OpenGVLab/InternVL-14B-224px) | vision-language foundation model |
|
| 28 |
|
|
@@ -132,6 +134,12 @@ If you find this project useful in your research, please consider citing:
|
|
| 132 |
journal={arXiv preprint arXiv:2312.14238},
|
| 133 |
year={2023}
|
| 134 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
```
|
| 136 |
|
| 137 |
|
|
|
|
| 16 |
<img src="https://cdn-uploads.huggingface.co/production/uploads/64119264f0f81eb569e0d569/2yzk5wUY-obL6H4rKiHlU.webp" alt="Image Description" width="300" height="300">
|
| 17 |
</p>
|
| 18 |
|
| 19 |
+
[\[π Blog\]](https://internvl.github.io/blog/) [\[π InternVL 1.0 Paper\]](https://arxiv.org/abs/2312.14238) [\[π InternVL 1.5 Report\]](https://arxiv.org/abs/2404.16821) [\[π¨οΈ Chat Demo\]](https://internvl.opengvlab.com/)
|
| 20 |
+
|
| 21 |
+
[\[π€ HF Demo\]](https://huggingface.co/spaces/OpenGVLab/InternVL) [\[π Quick Start\]](#model-usage) [\[π Community-hosted API\]](https://rapidapi.com/adushar1320/api/internvl-chat) [\[π δΈζθ§£θ―»\]](https://zhuanlan.zhihu.com/p/675877376)
|
| 22 |
|
| 23 |
| Model | Date | Download | Note |
|
| 24 |
| ----------------------- | ---------- | ---------------------------------------------------------------------- | -------------------------------- |
|
| 25 |
+
| InternViT-6B-448px-V1-5 | 2024.04.20 | π€ [HF link](https://huggingface.co/OpenGVLab/InternViT-6B-448px-V1-5) | support dynamic resolution, super strong OCR (π₯new) |
|
| 26 |
+
| InternViT-6B-448px-V1-2 | 2024.02.11 | π€ [HF link](https://huggingface.co/OpenGVLab/InternViT-6B-448px-V1-2) | 448 resolution |
|
| 27 |
+
| InternViT-6B-448px-V1-0 | 2024.01.30 | π€ [HF link](https://huggingface.co/OpenGVLab/InternViT-6B-448px-V1-0) | 448 resolution |
|
| 28 |
| InternViT-6B-224px | 2023.12.22 | π€ [HF link](https://huggingface.co/OpenGVLab/InternViT-6B-224px) | vision foundation model |
|
| 29 |
| InternVL-14B-224px | 2023.12.22 | π€ [HF link](https://huggingface.co/OpenGVLab/InternVL-14B-224px) | vision-language foundation model |
|
| 30 |
|
|
|
|
| 134 |
journal={arXiv preprint arXiv:2312.14238},
|
| 135 |
year={2023}
|
| 136 |
}
|
| 137 |
+
@article{chen2024far,
|
| 138 |
+
title={How Far Are We to GPT-4V? Closing the Gap to Commercial Multimodal Models with Open-Source Suites},
|
| 139 |
+
author={Chen, Zhe and Wang, Weiyun and Tian, Hao and Ye, Shenglong and Gao, Zhangwei and Cui, Erfei and Tong, Wenwen and Hu, Kongzhi and Luo, Jiapeng and Ma, Zheng and others},
|
| 140 |
+
journal={arXiv preprint arXiv:2404.16821},
|
| 141 |
+
year={2024}
|
| 142 |
+
}
|
| 143 |
```
|
| 144 |
|
| 145 |
|