Update README.md
Browse files
README.md
CHANGED
|
@@ -137,12 +137,12 @@ question = 'Hello, who are you?'
|
|
| 137 |
response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True)
|
| 138 |
print(f'User: {question} Assistant: {response}')
|
| 139 |
|
| 140 |
-
# text-image conversation
|
| 141 |
question = '<image> Please describe the image.'
|
| 142 |
response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
| 143 |
print(f'User: {question} Assistant: {response}')
|
| 144 |
|
| 145 |
-
|
| 146 |
question = 'What is best title for the image?'
|
| 147 |
response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
|
| 148 |
print(f'User: {question} Assistant: {response}')
|
|
@@ -153,15 +153,15 @@ print(f'User: {question} Assistant: {response}')
|
|
| 153 |
|
| 154 |
| Benchmark | Qwen2.5-VL-3B | InternVL2.5-4B | Ristretto-3B |
|
| 155 |
| :-------: | :----------: | :-------------: | :----: |
|
| 156 |
-
| MMBench-TEST-avg | 76.8 | 78.2 |
|
| 157 |
| MMStar | 56.3 | 58.7 | 62.6 |
|
| 158 |
| MMMU-VAL | 51.2 | 51.8 | 49.1 |
|
| 159 |
-
| MathVista-
|
| 160 |
| HallucinationBench | 46.6 | 46.6 | 50.2 |
|
| 161 |
| AI2D | 81.4 | 81.4 | 84.3 |
|
| 162 |
| OCRBench | 82.8 | 82.0 | 84.0 |
|
| 163 |
| MMVet | 60.0 | 61.5 | 61.8 |
|
| 164 |
-
| Average | 64.5 | 65.1 |
|
| 165 |
|
| 166 |
We use [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) to evaluate Ristretto-3B. Other results are taken from [OpenCompass](https://rank.opencompass.org.cn/leaderboard-multimodal)
|
| 167 |
|
|
|
|
| 137 |
response, history = model.chat(tokenizer, None, question, generation_config, history=None, return_history=True)
|
| 138 |
print(f'User: {question} Assistant: {response}')
|
| 139 |
|
| 140 |
+
# text-image conversation && multi-round conversation
|
| 141 |
question = '<image> Please describe the image.'
|
| 142 |
response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)
|
| 143 |
print(f'User: {question} Assistant: {response}')
|
| 144 |
|
| 145 |
+
|
| 146 |
question = 'What is best title for the image?'
|
| 147 |
response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=history, return_history=True)
|
| 148 |
print(f'User: {question} Assistant: {response}')
|
|
|
|
| 153 |
|
| 154 |
| Benchmark | Qwen2.5-VL-3B | InternVL2.5-4B | Ristretto-3B |
|
| 155 |
| :-------: | :----------: | :-------------: | :----: |
|
| 156 |
+
| MMBench-TEST-avg | 76.8 | 78.2 | 80.1 |
|
| 157 |
| MMStar | 56.3 | 58.7 | 62.6 |
|
| 158 |
| MMMU-VAL | 51.2 | 51.8 | 49.1 |
|
| 159 |
+
| MathVista-MINI-test | 61.2 | 60.8 | 67.9 |
|
| 160 |
| HallucinationBench | 46.6 | 46.6 | 50.2 |
|
| 161 |
| AI2D | 81.4 | 81.4 | 84.3 |
|
| 162 |
| OCRBench | 82.8 | 82.0 | 84.0 |
|
| 163 |
| MMVet | 60.0 | 61.5 | 61.8 |
|
| 164 |
+
| Average | 64.5 | 65.1 | 67.6 |
|
| 165 |
|
| 166 |
We use [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) to evaluate Ristretto-3B. Other results are taken from [OpenCompass](https://rank.opencompass.org.cn/leaderboard-multimodal)
|
| 167 |
|