Text Ranking
Transformers
Safetensors
sentence-transformers
qwen3_vl
image-text-to-text
multimodal rerank
text rerank
Instructions to use Qwen/Qwen3-VL-Reranker-8B with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Qwen/Qwen3-VL-Reranker-8B with Transformers:
# Load model directly from transformers import AutoProcessor, AutoModelForImageTextToText processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-Reranker-8B") model = AutoModelForImageTextToText.from_pretrained("Qwen/Qwen3-VL-Reranker-8B") - sentence-transformers
How to use Qwen/Qwen3-VL-Reranker-8B with sentence-transformers:
from sentence_transformers import CrossEncoder model = CrossEncoder("Qwen/Qwen3-VL-Reranker-8B") query = "Which planet is known as the Red Planet?" passages = [ "Venus is often called Earth's twin because of its similar size and proximity.", "Mars, known for its reddish appearance, is often referred to as the Red Planet.", "Jupiter, the largest planet in our solar system, has a prominent red spot.", "Saturn, famous for its rings, is sometimes mistaken for the Red Planet." ] scores = model.predict([(query, passage) for passage in passages]) print(scores) - Notebooks
- Google Colab
- Kaggle
Integrate with Sentence Transformers v5.4
#9
by tomaarsen HF Staff - opened
- 1_LogitScore/config.json +4 -0
- README.md +47 -4
- additional_chat_templates/reranker.jinja +52 -0
- config_sentence_transformers.json +12 -0
- modules.json +14 -0
- sentence_bert_config.json +30 -0
1_LogitScore/config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"true_token_id": 9693,
|
| 3 |
+
"false_token_id": 2152
|
| 4 |
+
}
|
README.md
CHANGED
|
@@ -2,10 +2,10 @@
|
|
| 2 |
license: apache-2.0
|
| 3 |
library_name: transformers
|
| 4 |
pipeline_tag: text-ranking
|
| 5 |
-
|
| 6 |
base_model:
|
| 7 |
- Qwen/Qwen3-VL-8B-Instruct
|
| 8 |
tags:
|
|
|
|
| 9 |
- transformers
|
| 10 |
- multimodal rerank
|
| 11 |
- text rerank
|
|
@@ -68,6 +68,51 @@ We utilize retrieval task datasets from various subtasks of [MMEB-v2](https://hu
|
|
| 68 |
|
| 69 |
## Usage
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
- **requirements**
|
| 72 |
```text
|
| 73 |
transformers>=4.57.0
|
|
@@ -75,8 +120,6 @@ qwen-vl-utils>=0.0.14
|
|
| 75 |
torch==2.8.0
|
| 76 |
```
|
| 77 |
|
| 78 |
-
### Basic Usage Example
|
| 79 |
-
|
| 80 |
```python
|
| 81 |
from scripts.qwen3_vl_reranker import Qwen3VLReranker
|
| 82 |
|
|
@@ -105,7 +148,7 @@ print(scores)
|
|
| 105 |
# [0.7838293313980103, 0.585621178150177, 0.6147719025611877]
|
| 106 |
```
|
| 107 |
|
| 108 |
-
###
|
| 109 |
```python
|
| 110 |
import argparse
|
| 111 |
import os
|
|
|
|
| 2 |
license: apache-2.0
|
| 3 |
library_name: transformers
|
| 4 |
pipeline_tag: text-ranking
|
|
|
|
| 5 |
base_model:
|
| 6 |
- Qwen/Qwen3-VL-8B-Instruct
|
| 7 |
tags:
|
| 8 |
+
- sentence-transformers
|
| 9 |
- transformers
|
| 10 |
- multimodal rerank
|
| 11 |
- text rerank
|
|
|
|
| 68 |
|
| 69 |
## Usage
|
| 70 |
|
| 71 |
+
### Using Sentence Transformers
|
| 72 |
+
|
| 73 |
+
Install Sentence Transformers:
|
| 74 |
+
```bash
|
| 75 |
+
pip install sentence_transformers
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
```python
|
| 79 |
+
from sentence_transformers import CrossEncoder
|
| 80 |
+
|
| 81 |
+
model = CrossEncoder("Qwen/Qwen3-VL-Reranker-8B")
|
| 82 |
+
|
| 83 |
+
query = "A woman playing with her dog on a beach at sunset."
|
| 84 |
+
documents = [
|
| 85 |
+
"A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.",
|
| 86 |
+
"https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
|
| 87 |
+
{
|
| 88 |
+
"text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.",
|
| 89 |
+
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
|
| 90 |
+
},
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
prompt = "Retrieve images or text relevant to the user's query."
|
| 94 |
+
pairs = [(query, doc) for doc in documents]
|
| 95 |
+
scores = model.predict(pairs, prompt=prompt)
|
| 96 |
+
print(scores)
|
| 97 |
+
# [1.3125, 0.25, 0.4375]
|
| 98 |
+
|
| 99 |
+
rankings = model.rank(query, documents, prompt=prompt)
|
| 100 |
+
print(rankings)
|
| 101 |
+
# [{'corpus_id': 0, 'score': 1.3125}, {'corpus_id': 2, 'score': 0.4375}, {'corpus_id': 1, 'score': 0.25}]
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
You can map scores to 0...1 with a sigmoid activation:
|
| 105 |
+
|
| 106 |
+
```python
|
| 107 |
+
scores = model.predict(pairs, activation_fn=torch.nn.Sigmoid(), prompt=prompt)
|
| 108 |
+
print(scores)
|
| 109 |
+
# [0.7891, 0.5625, 0.6094]
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
The default prompt is `"query"` with instruction `"Retrieve text relevant to the user's query."`. You can customize the instruction for your use case via the `prompt` parameter as shown above.
|
| 113 |
+
|
| 114 |
+
### Using Transformers
|
| 115 |
+
|
| 116 |
- **requirements**
|
| 117 |
```text
|
| 118 |
transformers>=4.57.0
|
|
|
|
| 120 |
torch==2.8.0
|
| 121 |
```
|
| 122 |
|
|
|
|
|
|
|
| 123 |
```python
|
| 124 |
from scripts.qwen3_vl_reranker import Qwen3VLReranker
|
| 125 |
|
|
|
|
| 148 |
# [0.7838293313980103, 0.585621178150177, 0.6147719025611877]
|
| 149 |
```
|
| 150 |
|
| 151 |
+
### Using vLLM
|
| 152 |
```python
|
| 153 |
import argparse
|
| 154 |
import os
|
additional_chat_templates/reranker.jinja
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set default_instruction = "Given a search query, retrieve relevant candidates that answer the query." -%}
|
| 2 |
+
{%- set ns = namespace(instruction="", found_instruction=false) -%}
|
| 3 |
+
{%- for message in messages -%}
|
| 4 |
+
{%- if message.role == "system" -%}
|
| 5 |
+
{%- if message.content is string -%}
|
| 6 |
+
{%- set ns.instruction = message.content -%}
|
| 7 |
+
{%- else -%}
|
| 8 |
+
{%- for content in message.content -%}
|
| 9 |
+
{%- if 'text' in content -%}
|
| 10 |
+
{%- set ns.instruction = ns.instruction + content.text -%}
|
| 11 |
+
{%- endif -%}
|
| 12 |
+
{%- endfor -%}
|
| 13 |
+
{%- endif -%}
|
| 14 |
+
{%- set ns.found_instruction = true -%}
|
| 15 |
+
{%- endif -%}
|
| 16 |
+
{%- endfor -%}
|
| 17 |
+
{%- if not ns.found_instruction -%}
|
| 18 |
+
{%- set ns.instruction = default_instruction -%}
|
| 19 |
+
{%- endif -%}
|
| 20 |
+
{%- set image_count = namespace(value=0) -%}
|
| 21 |
+
{%- set video_count = namespace(value=0) -%}
|
| 22 |
+
{%- macro render_multimodal(message) -%}
|
| 23 |
+
{%- if message.content is string -%}
|
| 24 |
+
{{- message.content -}}
|
| 25 |
+
{%- else -%}
|
| 26 |
+
{%- for content in message.content -%}
|
| 27 |
+
{%- if content.type == 'image' or 'image' in content or 'image_url' in content -%}
|
| 28 |
+
{%- set image_count.value = image_count.value + 1 -%}
|
| 29 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 30 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 31 |
+
{%- elif content.type == 'video' or 'video' in content -%}
|
| 32 |
+
{%- set video_count.value = video_count.value + 1 -%}
|
| 33 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 34 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 35 |
+
{%- elif 'text' in content -%}
|
| 36 |
+
{{- content.text -}}
|
| 37 |
+
{%- endif -%}
|
| 38 |
+
{%- endfor -%}
|
| 39 |
+
{%- endif -%}
|
| 40 |
+
{%- endmacro -%}
|
| 41 |
+
{{- '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n<Instruct>: ' + ns.instruction + '<Query>:' -}}
|
| 42 |
+
{%- for message in messages if message.role == "query" -%}
|
| 43 |
+
{{- render_multimodal(message) -}}
|
| 44 |
+
{%- endfor -%}
|
| 45 |
+
{{- '\n<Document>:' -}}
|
| 46 |
+
{%- for message in messages if message.role == "document" -%}
|
| 47 |
+
{{- render_multimodal(message) -}}
|
| 48 |
+
{%- endfor -%}
|
| 49 |
+
{{- '<|im_end|>\n' -}}
|
| 50 |
+
{%- if add_generation_prompt -%}
|
| 51 |
+
{{- '<|im_start|>assistant\n' -}}
|
| 52 |
+
{%- endif -%}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"pytorch": "2.10.0+cu128",
|
| 4 |
+
"sentence_transformers": "5.4.0"
|
| 5 |
+
},
|
| 6 |
+
"activation_fn": "torch.nn.modules.linear.Identity",
|
| 7 |
+
"default_prompt_name": "query",
|
| 8 |
+
"model_type": "CrossEncoder",
|
| 9 |
+
"prompts": {
|
| 10 |
+
"query": "Retrieve text relevant to the user's query."
|
| 11 |
+
}
|
| 12 |
+
}
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.base.modules.transformer.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_CausalScoreHead",
|
| 12 |
+
"type": "sentence_transformers.cross_encoder.modules.logit_score.LogitScore"
|
| 13 |
+
}
|
| 14 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"transformer_task": "any-to-any",
|
| 3 |
+
"modality_config": {
|
| 4 |
+
"text": {
|
| 5 |
+
"method": "forward",
|
| 6 |
+
"method_output_name": "logits"
|
| 7 |
+
},
|
| 8 |
+
"image": {
|
| 9 |
+
"method": "forward",
|
| 10 |
+
"method_output_name": "logits"
|
| 11 |
+
},
|
| 12 |
+
"video": {
|
| 13 |
+
"method": "forward",
|
| 14 |
+
"method_output_name": "logits"
|
| 15 |
+
},
|
| 16 |
+
"message": {
|
| 17 |
+
"method": "forward",
|
| 18 |
+
"method_output_name": "logits",
|
| 19 |
+
"format": "structured"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"module_output_name": "causal_logits",
|
| 23 |
+
"unpad_inputs": false,
|
| 24 |
+
"processing_kwargs": {
|
| 25 |
+
"chat_template": {
|
| 26 |
+
"chat_template": "reranker",
|
| 27 |
+
"add_generation_prompt": true
|
| 28 |
+
}
|
| 29 |
+
}
|
| 30 |
+
}
|