Integrate with Sentence Transformers v5.4

#9
by tomaarsen HF Staff - opened
1_LogitScore/config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "true_token_id": 9693,
3
+ "false_token_id": 2152
4
+ }
README.md CHANGED
@@ -2,10 +2,10 @@
2
  license: apache-2.0
3
  library_name: transformers
4
  pipeline_tag: text-ranking
5
-
6
  base_model:
7
  - Qwen/Qwen3-VL-8B-Instruct
8
  tags:
 
9
  - transformers
10
  - multimodal rerank
11
  - text rerank
@@ -68,6 +68,51 @@ We utilize retrieval task datasets from various subtasks of [MMEB-v2](https://hu
68
 
69
  ## Usage
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  - **requirements**
72
  ```text
73
  transformers>=4.57.0
@@ -75,8 +120,6 @@ qwen-vl-utils>=0.0.14
75
  torch==2.8.0
76
  ```
77
 
78
- ### Basic Usage Example
79
-
80
  ```python
81
  from scripts.qwen3_vl_reranker import Qwen3VLReranker
82
 
@@ -105,7 +148,7 @@ print(scores)
105
  # [0.7838293313980103, 0.585621178150177, 0.6147719025611877]
106
  ```
107
 
108
- ### vLLM Basic Usage Example
109
  ```python
110
  import argparse
111
  import os
 
2
  license: apache-2.0
3
  library_name: transformers
4
  pipeline_tag: text-ranking
 
5
  base_model:
6
  - Qwen/Qwen3-VL-8B-Instruct
7
  tags:
8
+ - sentence-transformers
9
  - transformers
10
  - multimodal rerank
11
  - text rerank
 
68
 
69
  ## Usage
70
 
71
+ ### Using Sentence Transformers
72
+
73
+ Install Sentence Transformers:
74
+ ```bash
75
+ pip install sentence_transformers
76
+ ```
77
+
78
+ ```python
79
+ from sentence_transformers import CrossEncoder
80
+
81
+ model = CrossEncoder("Qwen/Qwen3-VL-Reranker-8B")
82
+
83
+ query = "A woman playing with her dog on a beach at sunset."
84
+ documents = [
85
+ "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.",
86
+ "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
87
+ {
88
+ "text": "A woman shares a joyful moment with her golden retriever on a sun-drenched beach at sunset, as the dog offers its paw in a heartwarming display of companionship and trust.",
89
+ "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
90
+ },
91
+ ]
92
+
93
+ prompt = "Retrieve images or text relevant to the user's query."
94
+ pairs = [(query, doc) for doc in documents]
95
+ scores = model.predict(pairs, prompt=prompt)
96
+ print(scores)
97
+ # [1.3125, 0.25, 0.4375]
98
+
99
+ rankings = model.rank(query, documents, prompt=prompt)
100
+ print(rankings)
101
+ # [{'corpus_id': 0, 'score': 1.3125}, {'corpus_id': 2, 'score': 0.4375}, {'corpus_id': 1, 'score': 0.25}]
102
+ ```
103
+
104
+ You can map scores to 0...1 with a sigmoid activation:
105
+
106
+ ```python
107
+ scores = model.predict(pairs, activation_fn=torch.nn.Sigmoid(), prompt=prompt)
108
+ print(scores)
109
+ # [0.7891, 0.5625, 0.6094]
110
+ ```
111
+
112
+ The default prompt is `"query"` with instruction `"Retrieve text relevant to the user's query."`. You can customize the instruction for your use case via the `prompt` parameter as shown above.
113
+
114
+ ### Using Transformers
115
+
116
  - **requirements**
117
  ```text
118
  transformers>=4.57.0
 
120
  torch==2.8.0
121
  ```
122
 
 
 
123
  ```python
124
  from scripts.qwen3_vl_reranker import Qwen3VLReranker
125
 
 
148
  # [0.7838293313980103, 0.585621178150177, 0.6147719025611877]
149
  ```
150
 
151
+ ### Using vLLM
152
  ```python
153
  import argparse
154
  import os
additional_chat_templates/reranker.jinja ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set default_instruction = "Given a search query, retrieve relevant candidates that answer the query." -%}
2
+ {%- set ns = namespace(instruction="", found_instruction=false) -%}
3
+ {%- for message in messages -%}
4
+ {%- if message.role == "system" -%}
5
+ {%- if message.content is string -%}
6
+ {%- set ns.instruction = message.content -%}
7
+ {%- else -%}
8
+ {%- for content in message.content -%}
9
+ {%- if 'text' in content -%}
10
+ {%- set ns.instruction = ns.instruction + content.text -%}
11
+ {%- endif -%}
12
+ {%- endfor -%}
13
+ {%- endif -%}
14
+ {%- set ns.found_instruction = true -%}
15
+ {%- endif -%}
16
+ {%- endfor -%}
17
+ {%- if not ns.found_instruction -%}
18
+ {%- set ns.instruction = default_instruction -%}
19
+ {%- endif -%}
20
+ {%- set image_count = namespace(value=0) -%}
21
+ {%- set video_count = namespace(value=0) -%}
22
+ {%- macro render_multimodal(message) -%}
23
+ {%- if message.content is string -%}
24
+ {{- message.content -}}
25
+ {%- else -%}
26
+ {%- for content in message.content -%}
27
+ {%- if content.type == 'image' or 'image' in content or 'image_url' in content -%}
28
+ {%- set image_count.value = image_count.value + 1 -%}
29
+ {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
30
+ <|vision_start|><|image_pad|><|vision_end|>
31
+ {%- elif content.type == 'video' or 'video' in content -%}
32
+ {%- set video_count.value = video_count.value + 1 -%}
33
+ {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
34
+ <|vision_start|><|video_pad|><|vision_end|>
35
+ {%- elif 'text' in content -%}
36
+ {{- content.text -}}
37
+ {%- endif -%}
38
+ {%- endfor -%}
39
+ {%- endif -%}
40
+ {%- endmacro -%}
41
+ {{- '<|im_start|>system\nJudge whether the Document meets the requirements based on the Query and the Instruct provided. Note that the answer can only be "yes" or "no".<|im_end|>\n<|im_start|>user\n<Instruct>: ' + ns.instruction + '<Query>:' -}}
42
+ {%- for message in messages if message.role == "query" -%}
43
+ {{- render_multimodal(message) -}}
44
+ {%- endfor -%}
45
+ {{- '\n<Document>:' -}}
46
+ {%- for message in messages if message.role == "document" -%}
47
+ {{- render_multimodal(message) -}}
48
+ {%- endfor -%}
49
+ {{- '<|im_end|>\n' -}}
50
+ {%- if add_generation_prompt -%}
51
+ {{- '<|im_start|>assistant\n' -}}
52
+ {%- endif -%}
config_sentence_transformers.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "pytorch": "2.10.0+cu128",
4
+ "sentence_transformers": "5.4.0"
5
+ },
6
+ "activation_fn": "torch.nn.modules.linear.Identity",
7
+ "default_prompt_name": "query",
8
+ "model_type": "CrossEncoder",
9
+ "prompts": {
10
+ "query": "Retrieve text relevant to the user's query."
11
+ }
12
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.base.modules.transformer.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_CausalScoreHead",
12
+ "type": "sentence_transformers.cross_encoder.modules.logit_score.LogitScore"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "transformer_task": "any-to-any",
3
+ "modality_config": {
4
+ "text": {
5
+ "method": "forward",
6
+ "method_output_name": "logits"
7
+ },
8
+ "image": {
9
+ "method": "forward",
10
+ "method_output_name": "logits"
11
+ },
12
+ "video": {
13
+ "method": "forward",
14
+ "method_output_name": "logits"
15
+ },
16
+ "message": {
17
+ "method": "forward",
18
+ "method_output_name": "logits",
19
+ "format": "structured"
20
+ }
21
+ },
22
+ "module_output_name": "causal_logits",
23
+ "unpad_inputs": false,
24
+ "processing_kwargs": {
25
+ "chat_template": {
26
+ "chat_template": "reranker",
27
+ "add_generation_prompt": true
28
+ }
29
+ }
30
+ }